In [1]:
import pandas as pd

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit import DataStructs
from rdkit.Chem import AllChem
from rdkit import RDLogger

import numpy as np
import seaborn as sns

from typing import Optional, Union, Tuple

In [2]:
template_codes = [f'<RX_{i+1}>' for i in range(10)]

In [3]:
template_names = [
    'Heteroatom alkylation and arylation',
    'Acylation and related processes',
    'C-C bond formation',
    'Heterocycle formation',
    'Protections',
    'Deprotections',
    'Reductions',
    'Oxidations',
    'Functional group conversions (FGI)',
    'Functional group additions (FGA)'
]

In [4]:
RDLogger.DisableLog('rdApp.*')

In [5]:
# with open(r'reactants.txt', 'w') as fp:
#     for smi in df[df['set']=='test']['products_mol'].tolist():
#         fp.write("%s\n" % smi[0])

In [6]:
def concat_molecules(mol_list: list) -> Union[Chem.Mol, None]:
    try:
        concat_mol = Chem.MolFromSmiles('.'.join([Chem.MolToSmiles(mol) for mol in mol_list]))
        return concat_mol
    except:
        return None

def valid_molecules(smi_list: list) -> list:
    mol_list = []
    for smi in smi_list:
        mol = Chem.MolFromSmiles(Chem.CanonSmiles(smi))
        if mol is not None:
            mol_list.append(mol)
        else:
            print(f'\nProblematic SMILES string: {smi}\n')
    return mol_list

def partial_correct(target_list: list, predicted_list: list) -> Tuple[float, list, list]:
    target_mols = valid_molecules(target_list)
    predicted_mols = valid_molecules(predicted_list)

    if len(target_mols) == 0 or len(predicted_mols) == 0:
        print(f'\nThe molecule was not legit xD\n')
        print(f'Target SMILES: {target_list}')
        print(f'Predicted SMILES: {predicted_list}\n')
        return 0, [], []

    relevant_pred_mols = []
    relevant_target_mols = []
    correct = 0
    for p_mol in predicted_mols:
        for t_mol in target_mols:
            if t_mol.HasSubstructMatch(p_mol) and p_mol.HasSubstructMatch(t_mol):
                correct += 1
                relevant_pred_mols.append(p_mol)
                relevant_target_mols.append(t_mol)
    
    return correct/len(target_mols), relevant_target_mols, relevant_pred_mols

def halogen_correction(target_list: list, predicted_list: list) -> Tuple[float, list, list]:
    halogens = ['F', 'Cl', 'Br', 'I', 'At', 'Ts']
    halogen_rep = 'I'

    for halogen in halogens:
        target_list = [smi.replace(halogen, halogen_rep) for smi in target_list]
        predicted_list = [smi.replace(halogen, halogen_rep) for smi in predicted_list]

    return partial_correct(target_list, predicted_list)

def absolute_correct(target_list: list, predicted_list: list) -> bool:
    target_mols = valid_molecules(target_list)
    predicted_mols = valid_molecules(predicted_list)

    if len(target_mols) == 0 or len(predicted_mols) == 0:
        return False
    target_mols = concat_molecules(target_mols)
    predicted_mols = concat_molecules(predicted_mols)
    if target_mols is None or predicted_mols is None:
        return False
    target_smi_canon = Chem.CanonSmiles(Chem.MolToSmiles(target_mols))
    predicted_smi_canon = Chem.CanonSmiles(Chem.MolToSmiles(predicted_mols))
    return target_smi_canon == predicted_smi_canon

def tanimo_coeff(target_list: list, predicted_list: list) -> float:
    target_mols = valid_molecules(target_list)
    predicted_mols = valid_molecules(predicted_list)
    target_fps = [AllChem.GetMorganFingerprintAsBitVect(x, 3, nBits=2048) for x in target_mols]
    predicted_fps = [AllChem.GetMorganFingerprintAsBitVect(x, 3, nBits=2048) for x in predicted_mols]

    tani_coeff = []
    for p_fp in predicted_fps:
        tani_list = DataStructs.BulkTanimotoSimilarity(p_fp, target_fps)
        if tani_list is not None:
            tani_list = sorted(tani_list, reverse=True)
            tani_coeff.append(tani_list[0])
    tani_coeff = sorted(tani_coeff, reverse=True)

    t_len, p_len = len(target_fps), len(predicted_fps)
    if t_len > p_len:
        return sum(tani_coeff)/t_len
    else:
        return sum(tani_coeff[:t_len])/t_len

def tanimo_coeff_concat(target_list: list, predicted_list: list) -> float:
    target_mols = valid_molecules(target_list)
    predicted_mols = valid_molecules(predicted_list)
    target_mol_concat = concat_molecules(target_mols)
    predicted_mol_concat = concat_molecules(predicted_mols)

    if target_mol_concat is not None and predicted_mol_concat is not None:
        t_fp = AllChem.GetMorganFingerprintAsBitVect(target_mol_concat, 3, nBits=2048)
        p_fp = AllChem.GetMorganFingerprintAsBitVect(predicted_mol_concat, 3, nBits=2048)
        return DataStructs.TanimotoSimilarity(t_fp, p_fp)
    else:
        return 0.0

In [25]:
class Benchmark():
    def __init__(
        self, 
        source_data: str='chemformer',
        output_file_name: str='products_mixed.txt', 
        input_file_name: Optional[str]='tgt-test.txt'
        ) -> None:
        self.source_data = source_data
        if self.source_data == 'y_g2s':
            self.df = pd.read_csv(f'{source_data}/{output_file_name}')
        elif self.source_data == 'chemformer':
            self.df = pd.read_pickle(f'{source_data}/{output_file_name}')
        elif self.source_data == 'graph2smiles':
            self.df = pd.read_pickle(f'{source_data}/{output_file_name}')
        self.benchmark_vals = []
        self.problematic_points = []
        self.reconsider_rows = []

    def find_score(self, df_new: pd.DataFrame, isRet: bool=False):
        for index, row in df_new.iterrows():
            try:
                # self.benchmark_vals.append(halogen_correction(row['target_smiles'], row['predicted_smiles'])[0])
                self.benchmark_vals.append(partial_correct(row['target_smiles'], row['predicted_smiles'])[0])
            except:
                self.benchmark_vals.append(-1)
                self.problematic_points.append(index)
        if isRet:
            return self.benchmark_vals, self.problematic_points

    def print_analysis(self, df_new: pd.DataFrame, df_incorrect: pd.DataFrame):
        all_correct = 100 * df_incorrect[df_incorrect['benchmark'] == 1.0].count()[0]/(df_incorrect.count()[0])
        half_correct = 100 * df_incorrect[df_incorrect['benchmark'].between(0.48, 0.52)].count()[0]/df_incorrect.count()[0]
        partial_correct_abs = 100 * (df_incorrect[df_incorrect['benchmark'].between(0.48, 0.52)].count()[0] + df_incorrect[df_incorrect['benchmark'].between(0.31, 0.35)].count()[0])/df_new.count()[0]
        a_third_correct = 100 * df_incorrect[df_incorrect['benchmark'].between(0.31, 0.35)].count()[0]/df_incorrect.count()[0]
        initial_correct = 100 * (df_new.count()[0]-df_incorrect.count()[0])/df_new.count()[0]
        benchmark_correct = 100 * (df_new.count()[0]-df_incorrect.count()[0]+df_incorrect[df_incorrect['benchmark'] == 1.0].count()[0])/df_new.count()[0]
        
        print(f'Initial performance of the model (accuracy): {initial_correct:2.2f}%')
        print(f'Our benchmarked performance of the model (accuracy): {benchmark_correct:2.2f}%')
        print('-'*100)
        print(f'Percent improve in performance using our metric: {benchmark_correct-initial_correct:2.2f}%')
        print(f'Relative perecent improve in performance using our metric: {100*(benchmark_correct-initial_correct)/initial_correct:2.2f}%')
        print('-'*100)
        print(f'Partial correct increase in incorrectly classified datapoints: {partial_correct_abs:2.2f}%')
        print(f'Percent increase in all correct samples from incorrectly classified samples: {all_correct:2.2f}%')
        print(f'Percent increase in partially correct (half-correct) samples from incorrectly classified samples: {half_correct:2.2f}%')
        print(f'Percent increase in partially correct (one-third-correct) samples from incorrectly classified samples: {a_third_correct:2.2f}%')

    def preprocess_df(self):
        if self.source_data == 'y_g2s':
            self.df_new = self.df.drop(columns=['target_smiles_2d', 'predicted_smiles_2d', '!correct_prediction and tanimoto_coeff > 0\.9'])
            self.df_new['target_smiles'] = self.df_new.apply(lambda x: x['target_smiles'].split('.'), axis=1)
            self.df_new['predicted_smiles'] = self.df_new.apply(lambda x: x['predicted_smiles'].split('.'), axis=1)
        
        elif self.source_data == 'chemformer':
            self.df.rename(columns={'original_smiles': 'target_smiles', 'prediction_0': 'predicted_smiles'}, inplace=True)
            self.df_new = self.df.drop(columns=self.df.columns.tolist()[3:])
            self.df_new['target_smiles'] = self.df_new.apply(lambda x: x['target_smiles'].split('.'), axis=1)
            self.df_new['predicted_smiles'] = self.df_new.apply(lambda x: x['predicted_smiles'].split('.'), axis=1)
        
        elif self.source_data == 'graph2smiles':
            self.df_new = self.df.copy()
            self.df_new['target_smiles'] = self.df_new.apply(lambda x: x['target_smiles'][0].split('.'), axis=1)
            self.df_new['predicted_smiles'] = self.df_new.apply(lambda x: x['predicted_smiles'][0].split('.') if len(x['predicted_smiles']) else [], axis=1)
        
        self.df_new['num_targets'] = self.df_new.apply(lambda x: len(x['target_smiles']), axis=1)
        self.df_new['num_preds'] = self.df_new.apply(lambda x: len(x['predicted_smiles']), axis=1)

    def benchmark(self, find_tanimoto: Optional[bool]= False):
        self.df_new['benchmark'] = self.benchmark_vals
        if self.source_data == 'chemformer' or self.source_data == 'graph2smiles':
            self.df_new['correct_prediction'] = self.df_new.apply(lambda x: absolute_correct(x['target_smiles'], x['predicted_smiles']) if x['benchmark'] != -1 else False, axis=1)
        if find_tanimoto:
            self.df_new['tanimoto_coeff'] = self.df_new.apply(lambda x: tanimo_coeff_concat(x['target_smiles'], x['predicted_smiles']) if x['benchmark'] != -1 else 0.0, axis=1)
        self.df_incorrect = self.df_new[self.df_new['correct_prediction'] == False]

        self.print_analysis(self.df_new, self.df_incorrect)

    def find_abnormal_entries(self):
        self.decide_factor = 3

        for index, row in self.df_new.iterrows():
            t_lens = [len(smi) if len(smi) else 0 for smi in row['target_smiles']]
            p_lens = [len(smi) if len(smi) else 0 for smi in row['predicted_smiles']]
            try:
                t_largest = max(t_lens)
                p_largest = max(p_lens)
                t_smallest = min(t_lens)
                p_smallest = min(p_lens)
            except:
                continue
            

            t_mols, p_mols = [], []
            try:
                p_mols.append(sorted([Chem.CanonSmiles(smi) for smi in row['predicted_smiles']], key=lambda s: len(s)))
                t_mols.append(sorted([Chem.CanonSmiles(smi) for smi in row['target_smiles']], key=lambda s: len(s)))
            except:
                continue

            # if t_smallest <= 3 or t_largest/p_largest >= self.decide_factor or p_largest/t_largest >= self.decide_factor:
            #     reconsider_rows.append(row)

            if t_smallest <= 3 and t_mols[0][0] != p_mols[0][0]:
                self.reconsider_rows.append(row)

In [26]:
USPTO_50k = Benchmark(source_data='chemformer', output_file_name='chemformer_pred_test_50.pickle')
USPTO_50k.preprocess_df()

In [27]:
%%capture
USPTO_50k.find_score(USPTO_50k.df_new)

In [28]:
%%capture
USPTO_50k.find_abnormal_entries()

In [29]:
USPTO_50k.benchmark()

Initial performance of the model (accuracy): 53.30%
Our benchmarked performance of the model (accuracy): 54.52%
----------------------------------------------------------------------------------------------------
Percent improve in performance using our metric: 1.22%
Relative perecent improve in performance using our metric: 2.29%
----------------------------------------------------------------------------------------------------
Partial correct increase in incorrectly classified datapoints: 10.59%
Percent increase in all correct samples from incorrectly classified samples: 2.61%
Percent increase in partially correct (half-correct) samples from incorrectly classified samples: 22.42%
Percent increase in partially correct (one-third-correct) samples from incorrectly classified samples: 0.26%


In [30]:
len(USPTO_50k.problematic_points)

30

In [31]:
print(f'\nNumber of problematic points: {len(USPTO_50k.reconsider_rows)}\n')


Number of problematic points: 125



In [32]:
100 * USPTO_50k.df_new['benchmark'][USPTO_50k.df_new['benchmark'].between(0.48, 0.52)].count() / USPTO_50k.df_new['benchmark'].count()

10.471622701838529

In [36]:
USPTO_50k.df_new['benchmark'][USPTO_50k.df_new['benchmark'].between(0.48, 0.52)].count(), USPTO_50k.df_new['benchmark'].count()

(524, 5004)

In [37]:
Graph2Smiles = Benchmark(source_data='graph2smiles', output_file_name='USPTO_50k_g2s_result.pkl')

In [38]:
Graph2Smiles.preprocess_df()

In [39]:
%%capture
Graph2Smiles.find_score(Graph2Smiles.df_new)

In [41]:
%%capture
Graph2Smiles.find_abnormal_entries()

In [42]:
Graph2Smiles.benchmark()

Initial performance of the model (accuracy): 51.25%
Our benchmarked performance of the model (accuracy): 57.30%
----------------------------------------------------------------------------------------------------
Percent improve in performance using our metric: 6.05%
Relative perecent improve in performance using our metric: 11.81%
----------------------------------------------------------------------------------------------------
Percent increase in all correct samples from incorrectly classified samples: 12.41%
Percent increase in partially correct (half-correct) samples from incorrectly classified samples: 16.39%
Percent increase in partially correct (one-third-correct) samples from incorrectly classified samples: 0.04%


In [43]:
len(Graph2Smiles.problematic_points)

0

In [44]:
print(f'\nNumber of problematic points: {len(Graph2Smiles.reconsider_rows)}\n')
# for row in Graph2Smiles.reconsider_rows:
#     print(f'{row}\n')


Number of problemactic points: 116



In [48]:
Graph2Smiles.df_new

Unnamed: 0,target_smiles,predicted_smiles,num_targets,num_preds,benchmark,correct_prediction
0,"[CC(=O)c1ccc2[nH]ccc2c1, CC(C)(C)OC(=O)OC(=O)O...",[CC(O)c1ccc2c(ccn2C(=O)OC(C)(C)C)c1],2,1,0.0,False
1,"[CC(C)(C)OC(=O)OC(=O)OC(C)(C)C, Cc1ccc(S(=O)(=...",[CC(C)(C)OC(=O)N1C[C@H](O)[C@@H]2OC[C@H](O)[C@...,2,2,0.0,False
2,[CCOC(=O)c1nn(-c2ccc(Cl)cc2Cl)c(-c2ccc(OC)cc2)...,[CCOC(=O)c1nn(-c2ccc(Cl)cc2Cl)c(-c2ccc(OC)cc2)...,2,2,1.0,True
3,"[CC(C)(C)OC(=O)OC(=O)OC(C)(C)C, N#Cc1cc(-c2ccc...",[CC(C)(C)OC(=O)Nc1nc2c(-c3cccc([N+](=O)[O-])c3...,2,2,0.0,False
4,"[NCc1ccccc1S(=O)(=O)C1CC1, O=C(OC(=O)C(F)(F)F)...","[O=C(NCc1ccccc1S(=O)O)C(F)(F)F, O=S([O-])C1CC1]",2,2,0.0,False
...,...,...,...,...,...,...
5002,"[C=C(C)C(=O)Cl, OCc1ccc2cc(O)ccc2c1]","[C=C(C)C(=O)O, Oc1ccc2cc(CCl)ccc2c1]",2,2,0.0,False
5003,"[C[C@H]1CN(C(=O)COc2ccc(Cl)cc2)C[C@@H](C)N1, F...","[C[C@H]1CNC[C@@H](C)N1Cc1ccc(F)cc1, O=C(O)COc1...",2,2,0.0,False
5004,"[CCCCc1n[nH]c(=O)n1Cc1ccc(-c2ccccc2C#N)cc1, CO...","[CCCCc1n[nH]c(=O)n1Cc1ccc(-c2ccccc2C#N)cc1, CO...",2,2,1.0,True
5005,[CCOC(=O)c1c2n(c3cc(Br)c(F)cc3c1=O)CCS2],[CCOC(=O)c1c2n(c3cc(Br)c(F)cc3c1=O)CCS2],1,1,1.0,True


In [47]:
USPTO_50k.df_new

Unnamed: 0,target_smiles,predicted_smiles,log_likelihood_0,num_targets,num_preds,benchmark,correct_prediction
0,"[C1=COCCC1, COC(=O)CCC(=O)c1ccc(O)cc1O]","[C1=COCCC1, COC(=O)CCC(=O)c1ccc(O)cc1O]",-0.660116,2,2,1.0,True
1,"[COC(=O)c1cccc(C(=O)O)c1, Nc1cccnc1N]","[COC(=O)c1cccc(C(=O)O)c1, Nc1cccnc1N]",-0.669613,2,2,1.0,True
2,"[CC(C)(C)OC(=O)NC1CCC(C(=O)O)CC1, CNOC]","[CC(C)(C)OC(=O)NC1CCC(C(=O)O)CC1, CNOC]",-0.628202,2,2,1.0,True
3,"[Nc1ccc(O)cc1, O=[N+]([O-])c1ccc(Cl)nc1Cl]","[Nc1ccc(O)cc1, O=[N+]([O-])c1ccc(Cl)nc1Cl]",-0.679155,2,2,1.0,True
4,[[N-]=[N+]=NCC1=CC[C@@H](c2ccc(Cl)cc2Cl)[C@H](...,[C1(CN=[N+]=[N-])=CC[C@@H](c2c(Cl)cc(Cl)cc2)[C...,-3.970532,1,1,1.0,True
...,...,...,...,...,...,...,...
4999,"[Cc1cc([N+](=O)[O-])ccc1O, Nc1cc(Cl)ccn1]","[Cc1cc([N+](=O)[O-])ccc1O, Nc1cc(Cl)ccn1]",-0.692022,2,2,1.0,True
5000,[COC(=O)c1[nH]c2cc(Cl)cc3c2c1C(CC(=O)OC(C)(C)C...,[COC(=O)c1[nH]c2cc(Cl)cc3c2c1C(CC(=O)OC(C)(C)C...,-0.709788,1,1,1.0,True
5001,[COc1cc(C(F)(F)F)cc(SC)c1C(=O)NC1(c2ccccc2)CC(...,"[C=O, COc1cc(C(F)(F)F)cc(SC)c1C(=O)NC1(c2ccccc...",-0.661519,1,2,0.0,False
5002,"[C=C(C)Cn1nc(C)c(Br)c1-c1ccc(F)cc1, OO]",[COC(=O)C(C)Cn1nc(C)c(Br)c1-c1ccc(F)cc1],-0.876032,2,1,0.0,False


In [58]:
USPTO_50k_targets = set(USPTO_50k.df_new.apply(lambda x: Chem.CanonSmiles('.'.join(x['target_smiles'])), axis=1).to_list())

In [59]:
Graph2Smiles_targets = set(Graph2Smiles.df_new.apply(lambda x: Chem.CanonSmiles('.'.join(x['target_smiles'])), axis=1).to_list())

In [60]:
common_targets = USPTO_50k_targets.intersection(Graph2Smiles_targets)

In [61]:
len(common_targets)

490

In [62]:
Graph2Smiles.df_new.apply(lambda x: Chem.CanonSmiles('.'.join(x['target_smiles'])), axis=1).to_csv('g2s_target.txt', header=False, index=False)

In [80]:
def format_g2s(filename):
    with open(filename) as file:
        g2s_input = [line.rstrip() for line in file]

    g2s_input = [Chem.CanonSmiles(''.join(smile.split(' '))) for smile in g2s_input]

    with open(filename, 'w') as f:
        for line in g2s_input:
            f.write(f"{line}\n")

In [81]:
# format_g2s('./g2s_input.txt')
format_g2s('./g2s_target.txt')

In [23]:
Y_G2S = Benchmark(source_data='y_g2s', output_file_name='wandb.csv')

In [24]:
Y_G2S.preprocess_df()

In [25]:
%%capture
Y_G2S.find_score(Y_G2S.df_new)

In [26]:
%%capture
Y_G2S.find_abnormal_entries()

In [27]:
Y_G2S.benchmark()

Initial performance of the model (accuracy): 24.48%
Our benchmarked performance of the model (accuracy): 26.30%
----------------------------------------------------------------------------------------------------
Percent improve in performance using our metric: 1.82%
Relative perecent improve in performance using our metric: 7.43%
----------------------------------------------------------------------------------------------------
Percent increase in all correct samples from incorrectly classified samples: 2.41%
Percent increase in partially correct (half-correct) samples from incorrectly classified samples: 16.86%
Percent increase in partially correct (one-third-correct) samples from incorrectly classified samples: 0.08%


In [28]:
print(f'\nNumber of problematic points: {len(Y_G2S.reconsider_rows)}\n')
# for row in Y_G2S.reconsider_rows:
#     print(f'{row}\n')


Number of problemactic points: 105



In [29]:
import seaborn as sns

In [30]:
df.columns

NameError: name 'df' is not defined

In [None]:
test_target = df['reactants_mol'][0]

In [None]:
test_target

In [None]:
test_pred = ['Ic1ccc(Nc2ncnc3cc(OCCN4CCNCC4)c(OC4CCCC4)cc23)cc1Br', 'N']

In [None]:
halogen_correction(test_target, test_pred)[0]

In [None]:
tanimo_coeff(test_target, test_pred)

In [None]:
tanimo_coeff_concat(test_target, test_pred)

In [None]:
df_new = pd.read_csv('wandb.csv')

In [None]:
df = pd.read_pickle(r'products_mixed.txt')

In [None]:
df.head()

In [None]:
df.rename(columns={'original_smiles': 'target_smiles', 'prediction_0': 'predicted_smiles'}, inplace=True)

In [None]:
df_new = df.drop(columns=df.columns.tolist()[3:])

In [None]:
# df_new = df_new.drop(columns=['target_smiles_2d', 'predicted_smiles_2d', '!correct_prediction and tanimoto_coeff > 0\.9'])

In [None]:
df_new['target_smiles'] = df_new.apply(lambda x: x['target_smiles'].split('.'), axis=1)
df_new['predicted_smiles'] = df_new.apply(lambda x: x['predicted_smiles'].split('.'), axis=1)

In [None]:
df_new.head()

In [None]:
Chem.MolFromSmarts(df_new.iloc[0]['predicted_smiles'][-1])

In [None]:
%%capture
benchmark_vals = []
problematic_points = []
for index, row in df_new.iterrows():
    try:
        benchmark_vals.append(halogen_correction(row['target_smiles'], row['predicted_smiles'])[0])
    except:
        benchmark_vals.append(-1)
        problematic_points.append(index)

In [None]:
len(problematic_points)

In [None]:
df_new['benchmark'] = benchmark_vals

In [None]:
df_new['tanimoto_coeff'] = df_new.apply(lambda x: tanimo_coeff_concat(x['target_smiles'], x['predicted_smiles']) if x['benchmark'] != -1 else 0.0, axis=1)

In [None]:
df_new['correct_prediction'] = df_new.apply(lambda x: absolute_correct(x['target_smiles'], x['predicted_smiles']) if x['benchmark'] != -1 else False, axis=1)

In [None]:
df_incorrect = df_new[df_new['correct_prediction'] == False]
# df_incorrect = df_new

In [None]:
df_new.describe()

In [None]:
df_incorrect.describe()

In [None]:
df_incorrect[df_incorrect['benchmark'] == 1.0].describe()

In [None]:
df_incorrect[df_incorrect['benchmark'] == 1.0].head()

In [None]:
ii = 2
Draw.MolsToGridImage([Chem.MolFromSmiles(df_incorrect[df_incorrect['benchmark'] == 1.0].iloc[ii]['target_smiles'][0]), Chem.MolFromSmiles(df_incorrect[df_incorrect['benchmark'] == 1.0].iloc[ii]['predicted_smiles'][0])])

In [None]:
df_incorrect[df_incorrect['benchmark'] >= 0.5].describe()

In [None]:
df_incorrect[df_incorrect['benchmark'] >= 0.32].describe()

In [None]:
all_correct = 100 * df_incorrect[df_incorrect['benchmark'] == 1.0].count()[0]/(df_incorrect.count()[0])

In [None]:
half_correct = 100 * df_incorrect[df_incorrect['benchmark'] >= 0.5].count()[0]/df_incorrect.count()[0]

In [None]:
a_third_correct = 100 * df_incorrect[df_incorrect['benchmark'] >= 0.32].count()[0]/df_incorrect.count()[0]

In [None]:
initial_correct = 100 * (df_new.count()[0]-df_incorrect.count()[0])/df_new.count()[0]
initial_correct

In [None]:
benchmark_correct = 100 * (df_new.count()[0]-df_incorrect.count()[0]+df_incorrect[df_incorrect['benchmark'] == 1.0].count()[0])/df_new.count()[0]
benchmark_correct

In [None]:
print(f'Initial performance of the model (accuracy): {initial_correct:2.2f}%')
print(f'Our benchmarked performance of the model (accuracy): {benchmark_correct:2.2f}%')
print(f'Percent improve in performance using our metric: {benchmark_correct-initial_correct:2.2f}%')
print(f'Relative perecent improve in performance using our metric: {100*(benchmark_correct-initial_correct)/initial_correct:2.2f}%')
print()
print(f'Percent increase in all correct samples from incorrectly classified samples: {all_correct:2.2f}%')
print(f'Percent increase in partially correct (half-correct) samples from incorrectly classified samples: {half_correct:2.2f}%')
print(f'Percent increase in partially correct (one-third-correct) samples from incorrectly classified samples: {a_third_correct:2.2f}%')

In [None]:
%%capture
reconsider_rows = []
decide_factor = 3

for index, row in df_new.iterrows():
    t_lens = [len(smi) for smi in row['target_smiles']]
    p_lens = [len(smi) for smi in row['predicted_smiles']]
    t_largest = max(t_lens)
    p_largest = max(p_lens)
    t_smallest = min(t_lens)
    p_smallest = min(p_lens)

    t_mols, p_mols = [], []
    try:
        p_mols.append(sorted([Chem.CanonSmiles(smi) for smi in row['predicted_smiles']], key=lambda s: len(s)))
        t_mols.append(sorted([Chem.CanonSmiles(smi) for smi in row['target_smiles']], key=lambda s: len(s)))
    except:
        continue

    # if t_smallest <= 3 or t_largest/p_largest >= decide_factor or p_largest/t_largest >= decide_factor:
    #     reconsider_rows.append(row)

    if t_smallest <= 3 and t_mols[0][0] != p_mols[0][0]:
        reconsider_rows.append(row)

In [None]:
for row in reconsider_rows:
    print(row)
    print()