# Library Importation

In [2]:
import pandas as pd
import warnings
from sklearn.preprocessing import normalize
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
import scipy.stats
from scipy.stats import norm
from tqdm import tqdm
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn import tree
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score

warnings.filterwarnings('ignore')

# Create Datasets Transformers

## Import Data

In [3]:
def raw_data_filter(data):
    columns = (['duration'
    ,'protocol_type'
    ,'service'
    ,'flag'
    ,'src_bytes'
    ,'dst_bytes'
    ,'land'
    ,'wrong_fragment'
    ,'urgent'
    ,'hot'
    ,'num_failed_logins'
    ,'logged_in'
    ,'num_compromised'
    ,'root_shell'
    ,'su_attempted'
    ,'num_root'
    ,'num_file_creations'
    ,'num_shells'
    ,'num_access_files'
    ,'num_outbound_cmds'
    ,'is_host_login'
    ,'is_guest_login'
    ,'count'
    ,'srv_count'
    ,'serror_rate'
    ,'srv_serror_rate'
    ,'rerror_rate'
    ,'srv_rerror_rate'
    ,'same_srv_rate'
    ,'diff_srv_rate'
    ,'srv_diff_host_rate'
    ,'dst_host_count'
    ,'dst_host_srv_count'
    ,'dst_host_same_srv_rate'
    ,'dst_host_diff_srv_rate'
    ,'dst_host_same_src_port_rate'
    ,'dst_host_srv_diff_host_rate'
    ,'dst_host_serror_rate'
    ,'dst_host_srv_serror_rate'
    ,'dst_host_rerror_rate'
    ,'dst_host_srv_rerror_rate'
    ,'attack'
    ,'level'])

    data.columns = columns
    selected_columns = ([
    'service', 
    'flag',
    'src_bytes'
    ,'dst_bytes'
    ,'urgent'
    ,'num_failed_logins'
    ,'num_compromised'
    ,'root_shell'
    ,'su_attempted'
    ,'num_root'
    ,'num_file_creations'
    ,'num_shells'
    ,'num_access_files'
    ,'is_host_login'
    ,'is_guest_login'
    ,'num_outbound_cmds'
    ,'wrong_fragment'
    ,'level',
    ])
    data.drop(columns=selected_columns, inplace=True)
    data = data[data['protocol_type'] == 'tcp']
    data.drop(columns=['protocol_type'], inplace=True, axis =1)
    

    data['attack'] = data.attack.map(lambda a: 0 if a == 'normal' else 1)
    return data

In [4]:
# import raw data
d_raw_train = pd.read_csv("datasets/KDDTrain+.txt", header=None)
d_raw_test = pd.read_csv("datasets/KDDTest+.txt", header=None)


## Normalization

In [5]:
def normalizing(data):
    for idx in range(len(data.columns)):
        if data.nunique()[idx] != 2:
            data.iloc[:,idx] = normalize([np.array(data.iloc[:,idx])]).reshape(-1)
    return data


In [6]:
# this cell is for test:
#  d_raw_train = normalizing(d_raw_train)
#  d_raw_train.head()

## Principal Component Analysis

In [7]:
def PCA_transformation(data, dim = None):
    pca = PCA()
    pca.fit(data)
    cum_vars = np.cumsum(pca.explained_variance_ratio_)
    for i in range(len(cum_vars)):
        if cum_vars[i] == 0.999:
            break
    if dim:
        pca = PCA(dim)
        data = pca.fit_transform(data)
        return pd.DataFrame(data)
    
    pca = PCA(i)
    data = pca.fit_transform(data)
    return pd.DataFrame(data), i

In [8]:
# this cell is for test:
#  d_raw_train = PCA_transformation(d_raw_train)
#  d_raw_train.shape

## Feature Gaussian Mixture Probability Mode l

In [9]:
def GMM_Row_Transform(data, values, threshold):
    probs = []
    for idx in range(len(data.columns)):
        mean = np.array(data.iloc[:,idx]).mean()
        std = np.array(data.iloc[:,idx]).std()

        z_score = (values[idx] - mean)/std
        prob = (1-norm.cdf(z_score))*100
        probs.append(prob)
    return probs

In [10]:
def GMM_Matrix_Transform(origin_data, data, threshold):
    matrix = []
    for i in tqdm(range(len(data))):
        row = GMM_Row_Transform(origin_data, data.iloc[i,:], threshold)
        
        matrix.append(row)
    return matrix

In [11]:
def GMM_vote(data, values, threshold):
    no = 0
    for idx in range(len(data.columns)):
        mean = np.array(data.iloc[:,idx]).mean()
        std = np.array(data.iloc[:,idx]).std()

        z_score = (values[idx] - mean)/std
        prob = (1-norm.cdf(z_score))*100
        if prob <= threshold:
            no += 1
    return no

In [12]:
# test GMM_vote with 70%:
# values = d_raw_train.iloc[0,:]
# GMM_vote(d_raw_train, values, 70)



# Create 8 Datasets

### d_raw

In [13]:
# filter raw data 
d_raw_train = raw_data_filter(d_raw_train)
d_raw_test = raw_data_filter(d_raw_test)

In [14]:
train_target = d_raw_train['attack']
d_raw_train.drop('attack',inplace=True,axis=1)

test_target = d_raw_test['attack']
d_raw_test.drop('attack',inplace=True,axis=1)

In [15]:
d_raw_train['attack'] = train_target
d_raw_train_normal = d_raw_train[d_raw_train['attack'] == 0]
d_raw_train_normal.drop(['attack'], inplace=True, axis = 1)
d_raw_train.drop(['attack'], inplace=True, axis = 1)
d_raw_train_normal.head()

Unnamed: 0,duration,land,hot,logged_in,count,srv_count,serror_rate,srv_serror_rate,rerror_rate,srv_rerror_rate,...,dst_host_count,dst_host_srv_count,dst_host_same_srv_rate,dst_host_diff_srv_rate,dst_host_same_src_port_rate,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate
0,0,0,0,0,2,2,0.0,0.0,0.0,0.0,...,150,25,0.17,0.03,0.17,0.0,0.0,0.0,0.05,0.0
3,0,0,0,1,5,5,0.2,0.2,0.0,0.0,...,30,255,1.0,0.0,0.03,0.04,0.03,0.01,0.0,0.01
4,0,0,0,1,30,32,0.0,0.0,0.0,0.0,...,255,255,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0,0,0,1,3,7,0.0,0.0,0.0,0.0,...,8,219,1.0,0.0,0.12,0.03,0.0,0.0,0.0,0.0
16,0,0,0,1,8,9,0.0,0.11,0.0,0.0,...,91,255,1.0,0.0,0.01,0.02,0.0,0.0,0.0,0.0


### d_raw_pca

In [16]:
d_raw_pca_train, dim = PCA_transformation(d_raw_train)
d_raw_pca_test = PCA_transformation(d_raw_test, dim)

### d_raw_norm

In [17]:
d_raw_norm_train = normalizing(d_raw_train)
d_raw_norm_test = normalizing(d_raw_test)

### d_raw_norm_pca

In [18]:
d_raw_norm_pca_train, dim = PCA_transformation(normalizing(d_raw_train))
d_raw_norm_pca_test = PCA_transformation(normalizing(d_raw_test), dim)

## d_raw_probs

In [19]:
d_raw_probs_train = pd.DataFrame(GMM_Matrix_Transform(d_raw_train, d_raw_train, 50))
d_raw_probs_test =  pd.DataFrame(GMM_Matrix_Transform(d_raw_train, d_raw_test, 50))


100%|██████████| 102689/102689 [1:02:39<00:00, 27.32it/s]
100%|██████████| 18880/18880 [10:20<00:00, 30.44it/s]


In [20]:
# normal
d_raw_probs_train_normal = pd.DataFrame(GMM_Matrix_Transform(d_raw_train_normal, d_raw_train_normal, 50))

100%|██████████| 53600/53600 [21:11<00:00, 42.16it/s]


## d_raw_pca_probs

In [21]:
d_raw_pca_probs_train, dim = PCA_transformation(d_raw_train)
d_raw_pca_probs_train = pd.DataFrame(GMM_Matrix_Transform(d_raw_pca_probs_train, d_raw_pca_probs_train, 50))

d_raw_pca_probs_test = PCA_transformation(d_raw_test, dim )
d_raw_pca_probs_test = pd.DataFrame(GMM_Matrix_Transform(d_raw_pca_probs_train, d_raw_pca_probs_test, 50))

100%|██████████| 102689/102689 [56:24<00:00, 30.34it/s] 
100%|██████████| 18880/18880 [08:46<00:00, 35.86it/s] 


In [22]:
# normal
d_raw_pca_probs_train_normal, dim = PCA_transformation(d_raw_train_normal)
d_raw_pca_probs_train_normal = pd.DataFrame(GMM_Matrix_Transform(d_raw_pca_probs_train_normal, d_raw_pca_probs_train_normal, 50))

100%|██████████| 53600/53600 [17:20<00:00, 51.51it/s]


## d_norm_probs

In [23]:
d_norm_probs_train = normalizing(d_raw_train)
d_norm_probs_train = pd.DataFrame(GMM_Matrix_Transform(d_norm_probs_train, d_norm_probs_train, 50))

d_norm_probs_test = normalizing(d_raw_test)
d_norm_probs_test = pd.DataFrame(GMM_Matrix_Transform(d_norm_probs_train, d_norm_probs_test, 50))

100%|██████████| 102689/102689 [43:24<00:00, 39.43it/s] 
100%|██████████| 18880/18880 [07:58<00:00, 39.48it/s]


In [24]:
# normal
d_norm_probs_train_normal = normalizing(d_raw_train_normal)
d_norm_probs_train_normal = pd.DataFrame(GMM_Matrix_Transform(d_norm_probs_train_normal, d_norm_probs_train_normal, 50))

100%|██████████| 53600/53600 [15:35<00:00, 57.28it/s]


## d_norm_pca_probs

In [25]:
d_norm_pca_probs_train = normalizing(d_raw_train)
d_norm_pca_probs_train, dim = PCA_transformation(d_norm_pca_probs_train)
d_norm_pca_probs_train = pd.DataFrame(GMM_Matrix_Transform(d_norm_pca_probs_train, d_norm_pca_probs_train, 50))

d_norm_pca_probs_test = normalizing(d_raw_test)
d_norm_pca_probs_test, dim = PCA_transformation(d_norm_pca_probs_test)
d_norm_pca_probs_test = pd.DataFrame(GMM_Matrix_Transform(d_norm_pca_probs_train, d_norm_pca_probs_test, 50))

100%|██████████| 102689/102689 [43:55<00:00, 38.97it/s] 
100%|██████████| 18880/18880 [10:09<00:00, 30.99it/s]


In [26]:
d_norm_pca_probs_train_noraml = normalizing(d_raw_train_normal)
d_norm_pca_probs_train_noraml, dim = PCA_transformation(d_norm_pca_probs_train_noraml)
d_norm_pca_probs_train_noraml = pd.DataFrame(GMM_Matrix_Transform(d_norm_pca_probs_train_noraml, d_norm_pca_probs_train_noraml, 50))

100%|██████████| 53600/53600 [17:19<00:00, 51.55it/s]


# **Models**

In [27]:
def voting(data_train, data_test, min_abnormal_features = 10, threshold = 50):
    preds = []
    for idx in tqdm(range(len(data_test))):
        values = list(data_test.iloc[idx,:])
        no = GMM_vote(data_train, values, threshold)
        if no > min_abnormal_features:
            preds.append(1)
        else:
            preds.append(0)
    return preds

In [28]:
def K_means_Distance(test_idx, test_data, model):
    c1 = np.array(test_data.iloc[test_idx,:])
    c2 = model.cluster_centers_[0]
    temp = c1 - c2
    euclid_dist = np.sqrt(np.dot(temp.T, temp))
    euclid_dist
    return euclid_dist

In [29]:
def kmd_model(test_data, train_data, model, threshold_dis):
    kmd_d_raw_preds  = [] 
    for idx in tqdm(range(len(test_data))):
        dis = K_means_Distance(idx, test_data, model)
        if dis > threshold_dis:
            kmd_d_raw_preds.append(1)
        else:
            kmd_d_raw_preds.append(0)
    return kmd_d_raw_preds

In [30]:
def SVM_model(train_data, test_data):
    model = SVC()
    model.fit(train_data, train_target)
    preds = model.predict(test_data)
    return preds

In [31]:
def kmean_C_model(train_data, test_data):
    kmeans = KMeans(n_clusters=2, n_init="auto").fit(train_data)
    preds = kmeans.predict(test_data)
    return preds

In [32]:
def dt_model(train_data, test_data):
    model = tree.DecisionTreeClassifier()
    model.fit(train_data, train_target)
    preds = model.predict(test_data)
    return preds

In [33]:
def mlp_model(train_data, test_data):
    model = MLPClassifier(max_iter=300).fit(train_data, train_target)
    preds = model.predict(test_data)
    return preds

# Voting

## Voting for d_raw

In [34]:
voting_d_raw_preds = voting(d_raw_train_normal, d_raw_test, min_abnormal_features = 10, threshold = 50)

100%|██████████| 18880/18880 [06:39<00:00, 47.30it/s]


## Voting for d_raw_pca

In [35]:
d_raw_pca_train_normal, _ = PCA_transformation(d_raw_train_normal)

In [36]:
voting_d_raw_pac_preds = voting(d_raw_pca_train_normal, d_raw_pca_test, min_abnormal_features = 10, threshold = 50)

100%|██████████| 18880/18880 [06:30<00:00, 48.31it/s]


## Voting for d_raw_norm

In [37]:
d_raw_norm_train_normal = normalizing(d_raw_train_normal)

In [38]:
voting_d_raw_norm_preds = voting(d_raw_norm_train_normal, d_raw_norm_test, min_abnormal_features = 10, threshold = 50)

100%|██████████| 18880/18880 [05:23<00:00, 58.45it/s]


## Voting for d_raw_norm_pca

In [39]:
d_raw_norm_pca_train_noraml, dim = PCA_transformation(normalizing(d_raw_train_normal))

In [40]:
voting_d_raw_norm_pac_preds = voting(d_raw_norm_pca_train, d_raw_norm_pca_test, min_abnormal_features = 10, threshold = 50)

100%|██████████| 18880/18880 [07:10<00:00, 43.82it/s]


# KM-D

## KM-D for d_raw

In [41]:
"""def K_means_Distance_test(test_idx, test_data, model):
    c1 = np.array(test_data.iloc[test_idx,:])
    c2 = model.cluster_centers_[0]
    temp = c1 - c2
    euclid_dist = np.sqrt(np.dot(temp.T, temp))
    euclid_dist
    return euclid_dist

def kmd_model_test(test_data, train_data, model):
    kmd_d_raw_preds  = [] 
    for idx in tqdm(range(len(test_data))):
        dis = K_means_Distance_test(idx, test_data, model)
        kmd_d_raw_preds.append(dis)
    return kmd_d_raw_preds

kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_norm_pca_probs_train_noraml)
res = kmd_model_test(d_norm_pca_probs_train_noraml, d_norm_pca_probs_train_noraml,kmeans)
np.mean(res)+2.5*np.std(res)"""

'def K_means_Distance_test(test_idx, test_data, model):\n    c1 = np.array(test_data.iloc[test_idx,:])\n    c2 = model.cluster_centers_[0]\n    temp = c1 - c2\n    euclid_dist = np.sqrt(np.dot(temp.T, temp))\n    euclid_dist\n    return euclid_dist\n\ndef kmd_model_test(test_data, train_data, model):\n    kmd_d_raw_preds  = [] \n    for idx in tqdm(range(len(test_data))):\n        dis = K_means_Distance_test(idx, test_data, model)\n        kmd_d_raw_preds.append(dis)\n    return kmd_d_raw_preds\n\nkmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_norm_pca_probs_train_noraml)\nres = kmd_model_test(d_norm_pca_probs_train_noraml, d_norm_pca_probs_train_noraml,kmeans)\nnp.mean(res)+2.5*np.std(res)'

In [42]:
kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_raw_train_normal)
kmd_d_raw_preds = kmd_model(d_raw_test, d_raw_train_normal,kmeans, 0.8)

100%|██████████| 18880/18880 [00:03<00:00, 5892.81it/s]


## KM-D for d_raw_pca

In [43]:
kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_raw_pca_train_normal)
kmd_d_raw_pca_preds = kmd_model(d_raw_pca_test, d_raw_pca_train_normal,kmeans,0.8)

100%|██████████| 18880/18880 [00:02<00:00, 8843.43it/s]


## KM-D for d_raw_norm

In [44]:
kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_raw_norm_train_normal)
kmd_d_raw_norm_preds = kmd_model(d_raw_norm_test, d_raw_norm_train_normal,kmeans,0.5)
# np.unique(kmd_d_raw_norm_preds, return_counts=True)

100%|██████████| 18880/18880 [00:03<00:00, 5468.97it/s]


## KM-D for d_raw_norm_pca

In [45]:
kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_raw_norm_pca_train_noraml)
kmd_d_raw_norm_pca_preds = kmd_model(d_raw_norm_pca_test, d_raw_norm_pca_train_noraml,kmeans,0.5)

100%|██████████| 18880/18880 [00:02<00:00, 8528.40it/s]


## KM-D for d_raw_probs

In [46]:
kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_raw_probs_train_normal)
kmd_d_raw_probs_preds = kmd_model(d_raw_probs_test, d_raw_probs_train_normal,kmeans,150)

100%|██████████| 18880/18880 [00:02<00:00, 8676.77it/s]


## KM-D for d_raw_pca_probs

In [47]:
kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_raw_pca_probs_train_normal)
kmd_d_raw_pca_probs_preds = kmd_model(d_raw_pca_probs_test, d_raw_pca_probs_train_normal,kmeans,165)

100%|██████████| 18880/18880 [00:02<00:00, 8062.90it/s]


## KM-D for d_norm_probs

In [48]:
kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_norm_probs_train_normal)
kmd_d_raw_norm_probs_preds = kmd_model(d_norm_probs_test, d_norm_probs_train_normal,kmeans,150)

100%|██████████| 18880/18880 [00:02<00:00, 9409.75it/s]


## KM-D for d_norm_pca_probs

In [49]:
kmeans = KMeans(n_clusters=1, random_state=0, n_init="auto").fit(d_norm_pca_probs_train_noraml)
kmd_d_raw_norm_pca_probs_preds = kmd_model(d_norm_pca_probs_test, d_norm_pca_probs_train_noraml,kmeans,176)

100%|██████████| 18880/18880 [00:02<00:00, 7410.26it/s]


# SVM

## SVM for d_raw

In [50]:
svm_d_raw_preds = SVM_model(d_raw_train, d_raw_test)

## SVM for d_raw_pca

In [51]:
svm_d_raw_pca_preds = SVM_model(d_raw_pca_train, d_raw_pca_test)

## SVM for d_raw_norm

In [52]:
svm_d_raw_norm_preds = SVM_model(d_raw_norm_train, d_raw_norm_test)

## SVM for d_raw_norm_pca

In [53]:
svm_d_raw_norm_pca_preds = SVM_model(d_raw_norm_pca_train, d_raw_norm_pca_test)

## SVM for d_raw_probs

In [54]:
svm_d_raw_probs_preds = SVM_model(d_raw_probs_train, d_raw_probs_test)

## SVM for d_raw_pca_probs

In [55]:
svm_d_raw_pca_probs_preds = SVM_model(d_raw_pca_probs_train, d_raw_pca_probs_test)

## SVM for d_norm_probs

In [56]:
svm_d_norm_probs_preds = SVM_model(d_norm_probs_train, d_norm_probs_test)

## SVM for d_norm_pca_probs 

In [57]:
svm_d_norm_pca_probs_preds = SVM_model(d_norm_pca_probs_train, d_norm_pca_probs_test)

# KM-C

## KM-C for d_raw

In [58]:
kmc_d_raw_preds = kmean_C_model(d_raw_train, d_raw_test)

## KM-C for d_raw_pca

In [59]:
kmc_d_raw_pca_preds = kmean_C_model(d_raw_pca_train, d_raw_pca_test)

## KM-C for d_raw_norm

In [60]:
kmc_d_raw_norm_preds = kmean_C_model(d_raw_norm_train, d_raw_norm_test)

## KM-C for d_raw_norm_pca

In [61]:
kmc_d_raw_norm_pca_preds = kmean_C_model(d_raw_norm_pca_train, d_raw_norm_pca_test)

## KM-C for d_raw_probs

In [62]:
kmc_d_raw_probs_preds = kmean_C_model(d_raw_probs_train, d_raw_probs_test)

## KM-C for d_raw_pca_probs

In [63]:
kmc_d_raw_pca_probs_preds = kmean_C_model(d_raw_pca_probs_train, d_raw_pca_probs_test)

## KM-C for d_norm_probs

In [64]:
kmc_d_norm_probs_preds = kmean_C_model(d_norm_probs_train, d_norm_probs_test)

## KM-D for d_norm_pca_probs

In [65]:
kmc_d_norm_pca_probs_preds = kmean_C_model(d_norm_pca_probs_train, d_norm_pca_probs_test)

# DT

## DT for d_raw

In [66]:
dt_d_raw_preds = dt_model(d_raw_train, d_raw_test)

## DT for d_raw_pca

In [67]:
dt_d_raw_pca_preds = dt_model(d_raw_pca_train, d_raw_pca_test)

## DT for d_raw_norm

In [68]:
dt_d_raw_norm_preds = dt_model(d_raw_norm_train, d_raw_norm_test)

## DT for d_raw_norm_pca

In [69]:
dt_d_raw_norm_pca_preds = dt_model(d_raw_norm_pca_train, d_raw_norm_pca_test)

## DT for d_raw_probs 

In [70]:
dt_d_raw_probs_preds = dt_model(d_raw_probs_train, d_raw_probs_test)

## DT for d_raw_pca_probs

In [71]:
dt_d_raw_pca_probs_preds = dt_model(d_raw_pca_probs_train, d_raw_pca_probs_test)

## DT for d_norm_probs

In [72]:
dt_d_norm_probs_preds = dt_model(d_norm_probs_train, d_norm_probs_test)

## DT for d_norm_pca_probs

In [73]:
dt_d_norm_pca_probs_preds = dt_model(d_norm_pca_probs_train, d_norm_pca_probs_test)

## MLP

## MLP for d_raw

In [74]:
mlp_d_raw_preds = mlp_model(d_raw_train, d_raw_test)

## MLP for d_raw_pca

In [75]:
mlp_d_raw_pca_preds = mlp_model(d_raw_pca_train, d_raw_pca_test)

## MLP for d_raw_norm

In [76]:
mlp_d_raw_norm_preds = mlp_model(d_raw_norm_train, d_raw_norm_test)

## MLP for d_raw_norm_pca

In [77]:
mlp_d_raw_norm_pca_preds = mlp_model(d_raw_norm_pca_train, d_raw_norm_pca_test)

## MLP for d_raw_probs

In [78]:
mlp_d_raw_probs_preds = mlp_model(d_raw_probs_train, d_raw_probs_test)

## MLP for d_raw_pca_probs

In [79]:
mlp_d_raw_pca_probs_preds = mlp_model(d_raw_pca_probs_train, d_raw_pca_probs_test)

## MLP for d_norm_probs

In [80]:
mlp_d_norm_probs_preds = mlp_model(d_norm_probs_train, d_norm_probs_test)

## MLP for d_norm_pca_probs

In [81]:
mlp_d_norm_pca_probs_preds = mlp_model(d_norm_pca_probs_train, d_norm_pca_probs_test)

# Evaluation

In [81]:
def f1(y_true, y_pred):
    return f1_score(y_true, y_pred)

In [82]:
f1(test_target, mlp_d_raw_preds)

0.842760821452468

In [83]:
def sensitivity(y_true, y_pred):
    tp = sum((y_true == 1) & (y_pred == 1))
    fn = sum((y_true == 1) & (y_pred == 0))

    sensitivity_score = tp / (tp + fn) if (tp + fn) > 0 else 0.0

    return sensitivity_score

In [84]:
sensitivity(test_target, mlp_d_raw_preds)

0.7417104547925348