In [183]:
# Import the required libraries
import pandas as pd
import numpy as np
import csv
import sklearn.model_selection
import sklearn.ensemble
import rdkit.Chem.Descriptors
from rdkit import Chem

In [184]:
# load the data into a dataframe
DataFrame = pd.read_csv('tested_molecules_1.csv')
original_df = DataFrame.copy()

In [185]:
# Split the column
original_df[['SMILES', 'ALDH1_inhibition']] = original_df['SMILES,"ALDH1_inhibition"'].str.split(',', expand=True)
original_df.drop('SMILES,"ALDH1_inhibition"', axis=1, inplace=True)

original_df['ALDH1_inhibition'] = original_df['ALDH1_inhibition'].str.strip('"')
# Read file for original_df_test
original_df_test = pd.read_csv('tested_molecules_2.csv')

original_df_test = pd.read_csv('tested_molecules_2.csv')
original_df_test[['SMILES', 'ALDH1_inhibition']] = original_df_test['SMILES;ALDH1_inhibition'].str.split(';', expand=True)
original_df_test.drop('SMILES;ALDH1_inhibition', axis=1, inplace=True)

combined_df = pd.concat([original_df, original_df_test], ignore_index=True)

DataFrame = combined_df.copy()

In [186]:
# create lists for your true positives, true negatives, false positives and false negatives. Only run before the first iteration.
TP_list = []
TN_list = []
FP_list = []
FN_list = []

In [187]:
# Randomly select 20% of the data to be the test set. The other 80% will be the training set.
# Devide the training set into the molecules (X) and the labels (y). 
Train, Test = sklearn.model_selection.train_test_split(DataFrame, test_size = 0.2)
X = Train['SMILES']
y = Train['ALDH1_inhibition']
print(X)
print(y)

1914        CC1CCCN(C(=O)c2sc3cc([N+](=O)[O-])ccc3c2Cl)C1
757     Cc1ccc2c(c1)N1C(=O)c3ccccc3C1C1=C(O)CC(C)(C)CC...
814                       COc1cc(S(=O)(=O)Nc2cccnc2)ccc1F
363             CC(C)NC(=O)CSc1nnc(-c2ccc(F)cc2)c2ccccc12
1504                         FC(c1nc2ccccc2[nH]1)C(F)(F)F
                              ...                        
354     COc1ccccc1NS(=O)(=O)c1ccc([O-])c(C(=O)OCc2cccc...
1308                         [S-]c1ncnc2sc(Nc3ccccc3)nc12
571                          CCNC(=O)CCSc1nnc(-c2ccco2)o1
324     COc1ccccc1N1CC[NH+](CCn2c(=O)[nH]c3c([nH]c4ccc...
391             Cc1ccc(C2=NN(CC(=O)NCc3cccs3)C(=O)CC2)cc1
Name: SMILES, Length: 1600, dtype: object
1914    1
757     0
814     0
363     0
1504    0
       ..
354     0
1308    0
571     0
324     0
391     0
Name: ALDH1_inhibition, Length: 1600, dtype: object


In [188]:
# Create a matrix and add the descriptors for the training set.

# descriptor ... : MaxEStateIndex

feature_matrix_train = pd.DataFrame()
X_train = Train['SMILES']
descripted_X_train = []
for molecule in X_train:
    mol = rdkit.Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MaxEStateIndex(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MaxEStateIndex'] = descripted_X_train

In [189]:

# descriptor 1: qed
feature_matrix_train = pd.DataFrame()
X_train = Train['SMILES']
descripted_X_train = []
for molecule in X_train:
    mol = rdkit.Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.qed(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['qed'] = descripted_X_train

In [190]:
# descriptor 2: MolWt
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MolWt(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MolWt'] = descripted_X_train

In [191]:
# descriptor 2: MaxPartialCharge
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MaxPartialCharge(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MaxPartialCharge'] = descripted_X_train

In [192]:
# descriptor 2: MinPartialCharge
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MinPartialCharge(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MinPartialCharge'] = descripted_X_train

In [193]:
# descriptor 3: BCUT2D_CHGHI
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_CHGHI(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BCUT2D_CHGHI'] = descripted_X_train

In [194]:
# descriptor 3: BCUT2D_CHGLO
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_CHGLO(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BCUT2D_CHGLO'] = descripted_X_train

In [195]:
# descriptor 4: BCUT2D_LOGPHI
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_LOGPHI(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BCUT2D_LOGPHI'] = descripted_X_train

In [196]:
# descriptor 5: BCUT2D_MRHI
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_MRHI(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BCUT2D_MRHI'] = descripted_X_train

In [197]:
# descriptor 5: BalabanJ
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BalabanJ(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BalabanJ'] = descripted_X_train

# descriptor 6: AvgIpc
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.AvgIpc(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['AvgIpc'] = descripted_X_train

In [198]:
# descriptor 7: BertzCT
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BertzCT(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['BertzCT'] = descripted_X_train

In [199]:
# descriptor 7: Kappa2
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.Kappa2(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['Kappa2'] = descripted_X_train

In [200]:
# descriptor 7: PEOE_VSA7
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.PEOE_VSA7(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['PEOE_VSA7'] = descripted_X_train

In [201]:
# descriptor 8: SMR_VSA1
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SMR_VSA1(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['SMR_VSA1'] = descripted_X_train

In [202]:
# descriptor 8: SMR_VSA10
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SMR_VSA10(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['SMR_VSA10'] = descripted_X_train

# descriptor 9: SlogP_VSA12
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SlogP_VSA12(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['SlogP_VSA12'] = descripted_X_train

# descriptor 10: NumHAcceptors
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.NumHAcceptors(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['NumHAcceptors'] = descripted_X_train

# descriptor 11: NumHeteroatoms
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.NumHeteroatoms(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['NumHeteroatoms'] = descripted_X_train

# descriptor 12: RingCount
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.RingCount(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['RingCount'] = descripted_X_train

# descriptor 13: fr_C_S
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.fr_C_S(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['fr_C_S'] = descripted_X_train

# descriptor 14: fr_furan
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.fr_furan(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['fr_furan'] = descripted_X_train
print(feature_matrix_train)

In [203]:
# descriptor 14: SlogP_VSA5
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SlogP_VSA5(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['SlogP_VSA5'] = descripted_X_train

In [204]:
# descriptor 14: VSA_EState8
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.VSA_EState8(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['VSA_EState8'] = descripted_X_train

In [205]:
# descriptor 14: MolLogP
descripted_X_train = []
for molecule in X_train:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MolLogP(mol)
    descripted_X_train.append(desc_value)
feature_matrix_train['MolLogP'] = descripted_X_train
print(feature_matrix_train)

           qed    MolWt  MaxPartialCharge  MinPartialCharge  BCUT2D_CHGHI  \
0     0.604215  338.816          0.270383         -0.337639      2.261252   
1     0.683971  358.441          0.259066         -0.511850      2.465127   
2     0.931294  282.296          0.261583         -0.493640      2.163720   
3     0.701619  355.438          0.230117         -0.353202      2.127215   
4     0.731601  218.153          0.426518         -0.339206      2.429003   
...        ...      ...               ...               ...           ...   
1595  0.308365  457.440          0.337373         -0.871876      2.194345   
1596  0.566230  259.339          0.189299         -0.757928      2.044497   
1597  0.805523  267.310          0.283774         -0.459227      2.109591   
1598  0.440990  420.493          0.328647         -0.494586      2.223963   
1599  0.908778  341.436          0.243046         -0.349597      2.227686   

      BCUT2D_CHGLO  BCUT2D_LOGPHI  BCUT2D_MRHI  BalabanJ      BertzCT  \
0 

In [206]:
# Train a random forest using the feature matrix and the labels.
forest = sklearn.ensemble.RandomForestClassifier()
predictor = forest.fit(feature_matrix_train, y)

In [207]:
# Store the correct labels for the test set in an array.
y_test = Test['ALDH1_inhibition']
test_correct_values = []
for y in y_test:
    test_correct_values.append(y)
print(test_correct_values)

['0', '0', '0', '1', '0', '0', '0', '0', '1', '0', '1', '1', '0', '0', '0', '1', '0', '1', '1', '0', '0', '1', '1', '0', '0', '0', '0', '0', '0', '0', '1', '0', '1', '0', '1', '0', '0', '0', '0', '0', '1', '1', '0', '1', '1', '1', '0', '0', '1', '1', '1', '1', '0', '1', '1', '0', '0', '0', '0', '0', '1', '0', '0', '0', '1', '1', '0', '0', '0', '0', '0', '1', '0', '0', '0', '1', '1', '1', '0', '1', '1', '0', '0', '1', '1', '0', '1', '1', '0', '1', '1', '0', '0', '1', '1', '1', '0', '1', '0', '1', '0', '0', '0', '1', '0', '1', '0', '1', '0', '1', '0', '1', '1', '0', '0', '0', '0', '0', '0', '0', '1', '1', '0', '0', '0', '0', '1', '0', '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1', '1', '1', '1', '0', '0', '1', '0', '0', '0', '0', '1', '1', '1', '0', '1', '0', '0', '0', '1', '0', '0', '0', '1', '0', '0', '0', '0', '0', '0', '1', '0', '0', '0', '1', '0', '1', '0', '1', '1', '0', '1', '1', '0', '0', '0', '0', '0', '1', '0', '1', '0', '0', '0', '0', '1', '1', '0', '1', '0', '1', '0',

In [208]:
# Create a matrix and add the descriptors for the training set.

# descriptor ... : MaxEStateIndex

feature_matrix_test = pd.DataFrame()
X_train = Train['SMILES']
descripted_X_test = []
for molecule in X_test:
    mol = rdkit.Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MaxEStateIndex(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MaxEStateIndex'] = descripted_X_test

In [209]:
# Create a matrix and add the descriptors for the test set.

# descriptor 1: qed
feature_matrix_test = pd.DataFrame()
X_test = Test['SMILES']
descripted_X_test = []
for molecule in X_test:
    mol = rdkit.Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.qed(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['qed'] = descripted_X_test

In [210]:
# descriptor 2: MolWt
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MolWt(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MolWt'] = descripted_X_test

In [211]:
# descriptor 2: MaxPartialCharge
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MaxPartialCharge(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MaxPartialCharge'] = descripted_X_test

In [212]:
# descriptor 2: MinPartialCharge
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MinPartialCharge(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MinPartialCharge'] = descripted_X_test

In [213]:
# descriptor 3: BCUT2D_CHGHI
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_CHGHI(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BCUT2D_CHGHI'] = descripted_X_test

In [214]:
# descriptor 3: BCUT2D_CHGLO
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_CHGLO(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BCUT2D_CHGLO'] = descripted_X_test

In [215]:
# descriptor 4: BCUT2D_LOGPHI
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_LOGPHI(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BCUT2D_LOGPHI'] = descripted_X_test

In [216]:
# descriptor 5: BCUT2D_MRHI
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BCUT2D_MRHI(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BCUT2D_MRHI'] = descripted_X_test

In [217]:
# descriptor 5: BalabanJ
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BalabanJ(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BalabanJ'] = descripted_X_test

# descriptor 6: AvgIpc
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.AvgIpc(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['AvgIpc'] = descripted_X_test

In [218]:
# descriptor 7: BertzCT
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.BertzCT(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['BertzCT'] = descripted_X_test

In [219]:
# descriptor 7: Kappa2
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.Kappa2(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['Kappa2'] = descripted_X_test

In [220]:
# descriptor 7: PEOE_VSA7
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.PEOE_VSA7(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['PEOE_VSA7'] = descripted_X_test

In [221]:
# descriptor 8: SMR_VSA1
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SMR_VSA1(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['SMR_VSA1'] = descripted_X_test

In [222]:
# descriptor 8: SMR_VSA10
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SMR_VSA10(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['SMR_VSA10'] = descripted_X_test

# descriptor 9: SlogP_VSA12
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SlogP_VSA12(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['SlogP_VSA12'] = descripted_X_test

# descriptor 10: NumHAcceptors
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.NumHAcceptors(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['NumHAcceptors'] = descripted_X_test

# descriptor 11: NumHeteroatoms
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.NumHeteroatoms(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['NumHeteroatoms'] = descripted_X_test

# descriptor 12: RingCount
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.RingCount(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['RingCount'] = descripted_X_test

# descriptor 13: fr_C_S
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.fr_C_S(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['fr_C_S'] = descripted_X_test

# descriptor 14: fr_furan
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.fr_furan(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['fr_furan'] = descripted_X_test
print(feature_matrix_test)

In [223]:
# descriptor 14: SlogP_VSA5
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.SlogP_VSA5(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['SlogP_VSA5'] = descripted_X_test

In [224]:
# descriptor 14: VSA_EState8
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.VSA_EState8(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['VSA_EState8'] = descripted_X_test

In [225]:
# descriptor 14: MolLogP
descripted_X_test = []
for molecule in X_test:
    mol = Chem.MolFromSmiles(molecule)
    desc_value = rdkit.Chem.Descriptors.MolLogP(mol)
    descripted_X_test.append(desc_value)
feature_matrix_test['MolLogP'] = descripted_X_test
print(feature_matrix_test)

          qed    MolWt  MaxPartialCharge  MinPartialCharge  BCUT2D_CHGHI  \
0    0.710201  212.174          0.417922         -0.305713      2.329193   
1    0.829010  368.455          0.242693         -0.339739      2.296022   
2    0.463175  309.350          0.329318         -0.369753      2.132511   
3    0.622913  287.365          0.270389         -0.464963      2.327730   
4    0.527106  309.350          0.256738         -0.431351      2.110325   
..        ...      ...               ...               ...           ...   
395  0.572371  335.363          0.252834         -0.435905      2.101796   
396  0.757570  244.334          0.132754         -0.496757      2.435159   
397  0.717381  326.372          0.166113         -0.544815      2.103491   
398  0.491515  444.536          0.235595         -0.377733      2.232405   
399  0.915551  273.376          0.286485         -0.455341      2.526505   

     BCUT2D_CHGLO  BCUT2D_LOGPHI  BCUT2D_MRHI  BalabanJ      BertzCT  \
0       -2.1194

In [232]:
# Apply the random forest to your test matrix and compare the predictions with the correct labels.
predictions = forest.predict(feature_matrix_test)

TP = 0
TN = 0
FP = 0
FN = 0
i = 0
for prediction in predictions:
    if prediction == 0:
        if test_correct_values[i] == 0:
            TN += 1
            i += 1
        elif test_correct_values[i] == 1:
            FN += 1
            i += 1
    elif prediction == 1:
        if test_correct_values[i] == 0:
            FP += 1
            i += 1
        elif test_correct_values[i] == 1:
            TP += 1
            i += 1
print('True positive:', TP)
print('True negative:', TN)
print('False positive:', FP)
print('False negative:', FN)

['0' '0' '0' '1' '0' '0' '0' '0' '1' '0' '1' '0' '0' '0' '1' '0' '0' '0'
 '1' '0' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '1' '0' '1' '0' '0' '0'
 '0' '1' '1' '0' '0' '1' '0' '0' '1' '1' '0' '0' '0' '1' '1' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '0' '0' '0' '1' '0' '1' '1'
 '0' '1' '0' '0' '1' '0' '0' '0' '1' '0' '0' '0' '1' '0' '0' '1' '0' '1'
 '1' '0' '0' '1' '0' '1' '0' '0' '0' '1' '0' '0' '1' '1' '1' '0' '0' '0'
 '1' '0' '0' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '1' '0' '0' '0' '1'
 '1' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '1' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '1' '0' '1' '0' '0' '0' '0' '0' '0' '1' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '1' '1' '0' '0' '1' '0' '0' '0' '0' '1' '0' '1'
 '1' '0' '0' '0' '1' '0' '1' '0' '0' '0' '0' '0' '0' '0' '1' '0' '1' '0'
 '1' '0' '0' '1' '0' '0' '0' '1' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '1' '0' '0' '0' '1' '0' '0' '1' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '1' '0' '0' '0' '1' '1' '1' '0' '0' '0

In [227]:
# Add your TP, TN, FP an FN values to the lists created earlier. Repeat the process 5 times.
TP_list.append(TP)
TN_list.append(TN)
FP_list.append(FP)
FN_list.append(FN)

In [228]:
# Check if there are 5 numbers in each list.
print(TP_list)
print(TN_list)
print(FP_list)
print(FN_list)

[0]
[0]
[0]
[0]


In [229]:
# After repeating the code above 5 times, compute the average TP, TN, FP and FN values
average_TP = sum(TP_list)/5
average_TN = sum(TN_list)/5
average_FP = sum(FP_list)/5
average_FN = sum(FN_list)/5
print(average_TP, average_TN, average_FP, average_FN)

0.0 0.0 0.0 0.0


In [230]:
# Calculate the performance metrics
recall_score = average_TP / (average_TP + average_FN)
precision_score = average_TP / (average_TP + average_FP)
accuracy_score = (average_TP + average_TN) / 200 # TP + TN + FP + FN is always 200.
print('average recall score over 5 iterations:', recall_score)
print('average precision score over 5 iterations:', precision_score)
print('average accuracy score over 5 iterations:', accuracy_score)

ZeroDivisionError: float division by zero