In [153]:
# Import the required libraries
import pandas as pd
import numpy as np
import csv
import sklearn.model_selection
import sklearn.ensemble
import rdkit.Chem.Descriptors
from rdkit import Chem

In [154]:
# load the data into a dataframe
DataFrame = pd.read_csv('tested_molecules_1.csv')
original_df = DataFrame.copy()

In [155]:
# Split the column
original_df[['SMILES', 'ALDH1_inhibition']] = original_df['SMILES,"ALDH1_inhibition"'].str.split(',', expand=True)
original_df.drop('SMILES,"ALDH1_inhibition"', axis=1, inplace=True)

original_df['ALDH1_inhibition'] = original_df['ALDH1_inhibition'].str.strip('"')
# Read file for original_df_test
original_df_test = pd.read_csv('tested_molecules_2.csv')

original_df_test = pd.read_csv('tested_molecules_2.csv')
original_df_test[['SMILES', 'ALDH1_inhibition']] = original_df_test['SMILES;ALDH1_inhibition'].str.split(';', expand=True)
original_df_test.drop('SMILES;ALDH1_inhibition', axis=1, inplace=True)

combined_df = pd.concat([original_df, original_df_test], ignore_index=True)

DataFrame = combined_df.copy()

In [156]:
# create lists for your true positives, true negatives, false positives and false negatives. Only run before the first iteration.
TP_list = []
TN_list = []
FP_list = []
FN_list = []

In [350]:
# Randomly select 20% of the data to be the test set. The other 80% will be the training set.
# Devide the training set into the molecules (X) and the labels (y). 
Train, Test = sklearn.model_selection.train_test_split(DataFrame, test_size = 0.2)
X = Train['SMILES']
y = Train['ALDH1_inhibition']
print(X)
print(y)

1145                  c1ccc2c(Nc3ccncc3)nc(-c3ccoc3)nc2c1
205                 CCc1c(C)sc(NC(=O)c2cc(C(C)C)on2)c1C#N
721     CCn1c(=O)n(CC(=O)Nc2cc(OC)c(OC)c(OC)c2)c(=O)c2...
1330      FC(F)C(F)(F)C(F)(F)C(F)(F)c1cc(-c2ccccc2)[nH]n1
201     COC(=O)[C@H]1C[C@@H]1[C@H](NC(=O)c1ccccc1)c1cc...
                              ...                        
1667                           Nc1c(Cl)nc(-c2ccccc2)nc1Cl
994     CCc1c(C)nc2ncnn2c1N1CCCC(C(=O)NCc2ccc3c(c2)OCO...
1251                    Fc1ccc(CNn2cnnc2SCc2ccc(F)cc2)cc1
646                COC(=O)c1cc2n(n1)CCN(Cc1ccc(F)cc1)C2=O
467       CCc1ccc(C2c3[nH]c4ccccc4c3CCN2C(=O)CNC(N)=O)cc1
Name: SMILES, Length: 1600, dtype: object
1145    0
205     1
721     0
1330    0
201     1
       ..
1667    0
994     0
1251    0
646     0
467     0
Name: ALDH1_inhibition, Length: 1600, dtype: object


In [351]:
# Create a matrix and add the descriptors for the training set.

# descriptor ... : MaxEStateIndex

feature_matrix_train = pd.DataFrame()
X_train = Train['SMILES']
descripted_X_train = []
for molecule in X_train:
    mol = rdkit.Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MaxEStateIndex(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MaxEStateIndex'] = descripted_X_train

In [352]:

# descriptor 1: qed
feature_matrix_train = pd.DataFrame()
X_train = Train['SMILES']
descripted_X_train = []
for molecule in X_train:
    mol = rdkit.Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.qed(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['qed'] = descripted_X_train

In [353]:
# descriptor 2: MolWt
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MolWt(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MolWt'] = descripted_X_train

In [354]:
# descriptor 2: MaxPartialCharge
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MaxPartialCharge(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MaxPartialCharge'] = descripted_X_train

In [355]:
# descriptor 2: MinPartialCharge
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MinPartialCharge(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MinPartialCharge'] = descripted_X_train

In [356]:
# descriptor 3: BCUT2D_CHGHI
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_CHGHI(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BCUT2D_CHGHI'] = descripted_X_train

In [357]:
# descriptor 3: BCUT2D_CHGLO
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_CHGLO(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BCUT2D_CHGLO'] = descripted_X_train

In [358]:
# descriptor 4: BCUT2D_LOGPHI
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_LOGPHI(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BCUT2D_LOGPHI'] = descripted_X_train

In [359]:
# descriptor 5: BCUT2D_MRHI
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_MRHI(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BCUT2D_MRHI'] = descripted_X_train

In [360]:
# descriptor 5: BalabanJ
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BalabanJ(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BalabanJ'] = descripted_X_train

# descriptor 6: AvgIpc
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.AvgIpc(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['AvgIpc'] = descripted_X_train

In [361]:
# descriptor 7: BertzCT
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BertzCT(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BertzCT'] = descripted_X_train

In [362]:
# descriptor 7: Kappa2
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.Kappa2(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['Kappa2'] = descripted_X_train

In [363]:
# descriptor 7: PEOE_VSA7
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.PEOE_VSA7(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['PEOE_VSA7'] = descripted_X_train

In [364]:
# descriptor 8: SMR_VSA1
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SMR_VSA1(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['SMR_VSA1'] = descripted_X_train

In [365]:
# descriptor 8: SMR_VSA10
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SMR_VSA10(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['SMR_VSA10'] = descripted_X_train

# descriptor 9: SlogP_VSA12
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SlogP_VSA12(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['SlogP_VSA12'] = descripted_X_train

# descriptor 10: NumHAcceptors
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.NumHAcceptors(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['NumHAcceptors'] = descripted_X_train

# descriptor 11: NumHeteroatoms
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.NumHeteroatoms(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['NumHeteroatoms'] = descripted_X_train

# descriptor 12: RingCount
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.RingCount(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['RingCount'] = descripted_X_train

# descriptor 13: fr_C_S
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.fr_C_S(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['fr_C_S'] = descripted_X_train

# descriptor 14: fr_furan
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.fr_furan(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['fr_furan'] = descripted_X_train
print(feature_matrix_train)

In [366]:
# descriptor 14: SlogP_VSA5
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SlogP_VSA5(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['SlogP_VSA5'] = descripted_X_train

In [367]:
# descriptor 14: VSA_EState8
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.VSA_EState8(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['VSA_EState8'] = descripted_X_train

In [368]:
# descriptor 14: MolLogP
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MolLogP(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MolLogP'] = descripted_X_train
print(feature_matrix_train)

           qed    MolWt  MaxPartialCharge  MinPartialCharge  BCUT2D_CHGHI  \
0     0.616338  288.310          0.164893         -0.471841      2.097960   
1     0.929918  303.387          0.277989         -0.360331      2.192704   
2     0.634475  413.430          0.331449         -0.492634      2.170867   
3     0.782552  344.205          0.383402         -0.277352      2.761534   
4     0.863972  309.365          0.308539         -0.468783      2.490747   
...        ...      ...               ...               ...           ...   
1595  0.779102  240.093          0.162212         -0.393720      2.086315   
1596  0.673979  422.489          0.253803         -0.453595      2.289563   
1597  0.700648  332.379          0.209513         -0.318117      2.055172   
1598  0.805422  303.293          0.358086         -0.464240      2.276132   
1599  0.653330  376.460          0.312110         -0.356148      2.338417   

      BCUT2D_CHGLO  BCUT2D_LOGPHI  BCUT2D_MRHI  BalabanJ      BertzCT  \
0 

In [369]:
# Train a random forest using the feature matrix and the labels.
forest = sklearn.ensemble.RandomForestClassifier()
predictor = forest.fit(feature_matrix_train, y)

In [370]:
# Store the correct labels for the test set in an array.
y_test = Test['ALDH1_inhibition']
test_correct_values = []
for y in y_test:
    test_correct_values.append(y)
print(test_correct_values)

['0', '1', '0', '1', '1', '0', '0', '0', '0', '0', '0', '1', '0', '1', '1', '1', '0', '1', '0', '0', '1', '0', '1', '0', '0', '1', '1', '0', '0', '0', '0', '0', '0', '1', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '1', '0', '0', '0', '1', '0', '1', '1', '0', '0', '1', '1', '1', '0', '1', '0', '1', '0', '0', '1', '0', '0', '0', '1', '1', '1', '0', '1', '0', '0', '0', '1', '0', '1', '0', '0', '0', '0', '1', '1', '1', '1', '0', '0', '1', '0', '0', '0', '0', '0', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '1', '1', '0', '0', '1', '0', '0', '0', '1', '0', '0', '0', '1', '1', '0', '1', '0', '1', '0', '0', '0', '1', '1', '1', '0', '0', '1', '1', '0', '1', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '1', '0', '0', '1', '0', '0', '0', '1', '0', '1', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '0',

In [371]:
# Create a matrix and add the descriptors for the test set.

# descriptor ... : MaxEStateIndex

feature_matrix_test = pd.DataFrame()
X_test = Test['SMILES']
descripted_X_test = []
for molecule in X_test:
    mol = rdkit.Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MaxEStateIndex(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MaxEStateIndex'] = descripted_X_test

In [372]:
# Create a matrix and add the descriptors for the test set.

# descriptor 1: qed
feature_matrix_test = pd.DataFrame()
X_test = Test['SMILES']
descripted_X_test = []
for molecule in X_test:
    mol = rdkit.Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.qed(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['qed'] = descripted_X_test

In [373]:
# descriptor 2: MolWt
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MolWt(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MolWt'] = descripted_X_test

In [374]:
# descriptor 2: MaxPartialCharge
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MaxPartialCharge(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MaxPartialCharge'] = descripted_X_test

In [375]:
# descriptor 2: MinPartialCharge
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MinPartialCharge(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MinPartialCharge'] = descripted_X_test

In [376]:
# descriptor 3: BCUT2D_CHGHI
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_CHGHI(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BCUT2D_CHGHI'] = descripted_X_test

In [377]:
# descriptor 3: BCUT2D_CHGLO
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_CHGLO(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BCUT2D_CHGLO'] = descripted_X_test

In [378]:
# descriptor 4: BCUT2D_LOGPHI
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_LOGPHI(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BCUT2D_LOGPHI'] = descripted_X_test

In [379]:
# descriptor 5: BCUT2D_MRHI
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_MRHI(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BCUT2D_MRHI'] = descripted_X_test

In [380]:
# descriptor 5: BalabanJ
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BalabanJ(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BalabanJ'] = descripted_X_test

# descriptor 6: AvgIpc
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.AvgIpc(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['AvgIpc'] = descripted_X_test

In [381]:
# descriptor 7: BertzCT
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BertzCT(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BertzCT'] = descripted_X_test

In [382]:
# descriptor 7: Kappa2
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.Kappa2(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['Kappa2'] = descripted_X_test

In [383]:
# descriptor 7: PEOE_VSA7
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.PEOE_VSA7(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['PEOE_VSA7'] = descripted_X_test

In [384]:
# descriptor 8: SMR_VSA1
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SMR_VSA1(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['SMR_VSA1'] = descripted_X_test

In [385]:
# descriptor 8: SMR_VSA10
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SMR_VSA10(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['SMR_VSA10'] = descripted_X_test

# descriptor 9: SlogP_VSA12
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SlogP_VSA12(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['SlogP_VSA12'] = descripted_X_test

# descriptor 10: NumHAcceptors
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.NumHAcceptors(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['NumHAcceptors'] = descripted_X_test

# descriptor 11: NumHeteroatoms
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.NumHeteroatoms(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['NumHeteroatoms'] = descripted_X_test

# descriptor 12: RingCount
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.RingCount(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['RingCount'] = descripted_X_test

# descriptor 13: fr_C_S
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.fr_C_S(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['fr_C_S'] = descripted_X_test

# descriptor 14: fr_furan
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.fr_furan(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['fr_furan'] = descripted_X_test
print(feature_matrix_test)

In [386]:
# descriptor 14: SlogP_VSA5
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SlogP_VSA5(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['SlogP_VSA5'] = descripted_X_test

In [387]:
# descriptor 14: VSA_EState8
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.VSA_EState8(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['VSA_EState8'] = descripted_X_test

In [388]:
# descriptor 14: MolLogP
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MolLogP(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MolLogP'] = descripted_X_test
print(feature_matrix_test)

          qed    MolWt  MaxPartialCharge  MinPartialCharge  BCUT2D_CHGHI  \
0    0.780940  290.326          0.231922         -0.423472      2.126457   
1    0.615716  374.467          0.321220         -0.504254      2.373385   
2    0.638403  337.441          0.341930         -0.495813      2.184515   
3    0.631480  337.377          0.349807         -0.549826      2.302886   
4    0.636570  358.512          0.306092         -0.321739      2.339190   
..        ...      ...               ...               ...           ...   
395  0.511384  384.527          0.242623         -0.331110      2.292007   
396  0.772246  265.342          0.253127         -0.348094      2.106467   
397  0.828073  287.383          0.336197         -0.422683      2.193667   
398  0.619848  261.325          0.292176         -0.371467      2.236901   
399  0.807695  368.388          0.247923         -0.545464      2.171832   

     BCUT2D_CHGLO  BCUT2D_LOGPHI  BCUT2D_MRHI  BalabanJ     BertzCT    Kappa2  \
0     

In [389]:
# Apply the random forest to your test matrix and compare the predictions with the correct labels.
predictions = forest.predict(feature_matrix_test)
print(predictions)
TP = 0
TN = 0
FP = 0
FN = 0
i = 0

for prediction in predictions:
    if prediction == '0':
        if test_correct_values[i] == '0':
            TN += 1
            i += 1
        elif test_correct_values[i] == '1':
            FN += 1
            i += 1
    elif prediction == '1':
        if test_correct_values[i] == '0':
            FP += 1
            i += 1
        elif test_correct_values[i] == '1':
            TP += 1
            i += 1
print('True positive:', TP)
print('True negative:', TN)
print('False positive:', FP)
print('False negative:', FN)

['0' '1' '1' '1' '1' '0' '0' '0' '0' '0' '0' '1' '0' '0' '1' '1' '0' '0'
 '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '0' '1' '0' '0'
 '0' '0' '0' '0' '0' '0' '1' '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '1' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0' '1' '1' '1' '0' '1' '0' '0'
 '0' '0' '0' '0' '1' '0' '1' '0' '0' '0' '1' '1' '1' '0' '1' '0' '0' '0'
 '1' '0' '1' '0' '1' '0' '0' '0' '1' '1' '1' '0' '0' '0' '0' '0' '0' '0'
 '0' '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '0' '1' '0' '1'
 '0' '0' '1' '1' '0' '0' '0' '1' '1' '0' '1' '0' '1' '1' '1' '0' '1' '1'
 '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '0' '0' '0' '0'
 '1' '1' '0' '0' '1' '0' '1' '0' '1' '0' '0' '0' '0' '0' '0' '0' '1' '0'
 '1' '1' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '1' '0' '1' '1' '0' '0'
 '0' '0' '0' '0' '1' '0' '1' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '1' '0' '1'
 '0' '0' '1' '0' '0' '0' '1' '1' '1' '1' '0' '0' '0

In [390]:
# Add your TP, TN, FP an FN values to the lists created earlier. Repeat the process 5 times.

TP_list.append(TP)
TN_list.append(TN)
FP_list.append(FP)
FN_list.append(FN)

In [391]:
# Check if there are 5 numbers in each list.
print(TP_list)
print(TN_list)
print(FP_list)
print(FN_list)

[54, 68, 70, 71, 70]
[260, 236, 232, 247, 241]
[23, 38, 44, 27, 38]
[63, 58, 54, 55, 51]


In [392]:
# After repeating the code above 5 times, compute the average TP, TN, FP and FN values
average_TP = sum(TP_list)/5 
average_TN = sum(TN_list)/5
average_FP = sum(FP_list)/5
average_FN = sum(FN_list)/5
print(average_TP, average_TN, average_FP, average_FN)

66.6 243.2 34.0 56.2


In [394]:
# Calculate the performance metrics
recall_score = average_TP / (average_TP + average_FN)
precision_score = average_TP / (average_TP + average_FP)
accuracy_score = (average_TP + average_TN) / 400 # TP + TN + FP + FN is always 200. === 400
print('average recall score over 5 iterations:', recall_score)
print('average precision score over 5 iterations:', precision_score)
print('average accuracy score over 5 iterations:', accuracy_score)

average recall score over 5 iterations: 0.5423452768729642
average precision score over 5 iterations: 0.6620278330019881
average accuracy score over 5 iterations: 0.7744999999999999
