In [32]:
import pandas as pd

REP_NAMES = ['smiles', 'deepsmiles', 'selfies', 'inchi', 'iupac']
COL_NAMES = ['bbbp', 'bace', 'clintox', 'esol', 'freesolv']

TASK_TO_DF = {
    "bbbp": pd.read_pickle('/content/bbbp_test_fs_final.pkl'),
    "bace": pd.read_pickle('/content/bace_test_fs_final.pkl'),
    "esol": pd.read_pickle('/content/esol_test_fs_final.pkl'),
    "freesolv": pd.read_pickle('/content/freesolv_test_fs_final.pkl'),
    "clintox": pd.read_pickle('/content/clintox_test_fs_final.pkl')
}

TASK_TO_LABEL_COLNAME = {
    "bbbp": "p_np",
    "bace": "Class",
    "esol": "measured log solubility in mols per litre",
    "freesolv": "expt",
    "clintox": "CT_TOX"
}

model_names = ['gemini', 'gpt-4o', 'llama', 'mistral']

fewshot_filepaths = ['/content/fewshot_responses/id_to_pred_gemini-1.5-pro-002_fewshot_final.json',
                  '/content/fewshot_responses/id_to_pred_gpt-4o_fewshot.json',
                  '/content/fewshot_responses/id_to_pred_llama-3.1-405b-instruct_fewshot.json',
                  '/content/fewshot_responses/id_to_pred_mistral-large_fewshot.json']

def index(s):
    return int(s.split("_")[-1])

In [37]:
import json
responses = {}

for model_name, file_path in zip(model_names, fewshot_filepaths):

  with open(file_path) as f:
    data = json.load(f)

    responses[model_name] = {}

    for task_name in COL_NAMES:
      responses[model_name][task_name] = {}

      for colname in REP_NAMES:
        responses[model_name][task_name][colname] = [data[k] for k in list(sorted(data.keys(), key = index)) if (k.split("_")[0] == task_name) and (k.split("_")[1] == colname)]

In [34]:
def extract_boolean_answer(s):
  if "yes" in s.lower().split("decision:")[-1]:
    return True
  elif "no" in s.lower().split("decision:")[-1]:
    return False
  else:
    return False


import re
def extract_float_answer(s):

    if "decision:" in s.lower():
      s = s.lower().split('decision:')[-1]
    elif "decision" in s.lower():
      s = s.lower().split('decision')[-1]

    float_pattern = r'(-?\d+\.?\d*)'

    match = re.search(float_pattern, s)

    if match:
        return float(match.group())
    else:
       return 0

In [38]:
responses.keys()

dict_keys(['gemini', 'gpt-4o', 'llama', 'mistral'])

In [39]:
for model_name in model_names:
  for colname in REP_NAMES:
    TASK_TO_DF['bbbp'][f'{model_name}_{colname}_response'] = responses[model_name]['bbbp'][colname]
    TASK_TO_DF['bace'][f'{model_name}_{colname}_response'] = responses[model_name]['bace'][colname]
    TASK_TO_DF['clintox'][f'{model_name}_{colname}_response'] = responses[model_name]['clintox'][colname]
    TASK_TO_DF['esol'][f'{model_name}_{colname}_response'] = responses[model_name]['esol'][colname]
    TASK_TO_DF['freesolv'][f'{model_name}_{colname}_response'] = responses[model_name]['freesolv'][colname]

  for task_name, df in list(TASK_TO_DF.items())[:2]: # Classification tasks
    for colname in REP_NAMES:
      df[f'{model_name}_{colname}_pred'] = df[f'{model_name}_{colname}_response'].apply(extract_boolean_answer)
      df[f'{model_name}_{colname}_score'] = df[f'{model_name}_{colname}_pred'] == df[TASK_TO_LABEL_COLNAME[task_name]]

  for task_name, df in list(TASK_TO_DF.items())[-1:]: # Classification tasks
    for colname in REP_NAMES:
      df[f'{model_name}_{colname}_pred'] = df[f'{model_name}_{colname}_response'].apply(extract_boolean_answer)
      df[f'{model_name}_{colname}_score'] = df[f'{model_name}_{colname}_pred'] == df[TASK_TO_LABEL_COLNAME[task_name]]

  for task_name, df in list(TASK_TO_DF.items())[2:4]: # Regression tasks
    for colname in REP_NAMES:
      df[f'{model_name}_{colname}_pred'] = df[f'{model_name}_{colname}_response'].apply(extract_float_answer)
      df[f'{model_name}_{colname}_score'] = abs(df[f'{model_name}_{colname}_pred'] - df[TASK_TO_LABEL_COLNAME[task_name]])


In [40]:
from ast import literal_eval
import re

pattern = r"\{[^{}]*\}"
def score_mol_dicts(pred, lab):
  try:
    matches = re.findall(pattern, pred)
    pred = matches[0]
    pred = literal_eval(pred)
    for k in lab.keys():
      if k not in pred.keys():
        return False
      elif lab[k] != pred[k]:
        return False
    return True
  except:
    return False

In [17]:
%%capture
!pip install rdkit

In [41]:
from collections import defaultdict
from rdkit import Chem

def count_atoms(smiles):
  m = Chem.MolFromSmiles(smiles)

  if m is None:
    m = Chem.MolFromSmiles(smiles, sanitize = False)

  atom_counts = defaultdict(int)

  for a in m.GetAtoms():
    atom_counts[a.GetSymbol()] += 1

  return atom_counts

In [42]:
from tqdm import tqdm
tqdm.pandas()

In [43]:
for df in TASK_TO_DF.values():
  df['atom_counts'] = df['smiles'].progress_apply(count_atoms)

  0%|          | 0/194 [00:00<?, ?it/s][23:59:27] Explicit valence for atom # 12 N, 4, is greater than permitted
[23:59:27] Explicit valence for atom # 5 N, 4, is greater than permitted
100%|██████████| 194/194 [00:00<00:00, 2597.90it/s]
100%|██████████| 152/152 [00:00<00:00, 1838.46it/s]
100%|██████████| 113/113 [00:00<00:00, 3700.67it/s]
100%|██████████| 65/65 [00:00<00:00, 7338.42it/s]
100%|██████████| 143/143 [00:00<00:00, 2230.71it/s]


In [45]:
import json

atom_counting_file_paths = ['/content/atom_counting_accuracy/id_to_pred_counting_gemini.json',
                            '/content/atom_counting_accuracy/id_to_pred_counting_gpt.json',
                            '/content/atom_counting_accuracy/id_to_pred_counting_llama-3.1.json',
                            '/content/atom_counting_accuracy/id_to_pred_counting_mistral.json']

for model_name, file_path in zip(model_names, atom_counting_file_paths):
  with open(file_path, 'r') as f:
    id_to_pred_counts = json.load(f)

  for task_name, df in TASK_TO_DF.items():
    for representation in REP_NAMES:
      df[f'{model_name}_atom_counts_from_{representation}'] = id_to_pred_counts[task_name][representation]

In [46]:
for model_name in model_names:
  for task_name, df in TASK_TO_DF.items():
    for representation in REP_NAMES:
      correct = [0] * len(df)
      i = 0
      for pred, lab in zip(df[f'{model_name}_atom_counts_from_{representation}'], df['atom_counts']):
        if score_mol_dicts(pred, lab):
          correct[i] = 1
        else:
          correct[i] = 0
        i += 1
      df[f"{model_name}_{representation}_correct_atom_count"] = correct

  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_atom_count"] = correct
  df[f"{model_name}_{representation}_correct_ato

In [47]:
TASK_TO_DF['esol'].head()

Unnamed: 0,Compound ID,ESOL predicted log solubility in mols per litre,Minimum Degree,Molecular Weight,Number of H-Bond Donors,Number of Rings,Number of Rotatable Bonds,Polar Surface Area,measured log solubility in mols per litre,smiles,...,llama_smiles_correct_atom_count,llama_deepsmiles_correct_atom_count,llama_selfies_correct_atom_count,llama_inchi_correct_atom_count,llama_iupac_correct_atom_count,mistral_smiles_correct_atom_count,mistral_deepsmiles_correct_atom_count,mistral_selfies_correct_atom_count,mistral_inchi_correct_atom_count,mistral_iupac_correct_atom_count
0,Dieldrin,-4.533,1,380.913,0,5,0,12.53,-6.29,ClC4=C(Cl)C5(Cl)C3C1CC(C2OC12)C3C4(Cl)C5(Cl)Cl,...,0,0,0,1,1,0,0,0,1,0
1,Valeraldehyde,-1.103,1,86.134,0,0,3,17.07,-0.85,CCCCC=O,...,1,1,1,1,1,1,1,1,1,1
2,4-Pentene-1-ol,-0.791,1,86.134,1,0,3,20.23,-0.15,OCCCC=C,...,1,1,1,1,1,1,1,1,1,1
3,brompyrazone,-3.005,1,266.098,1,2,1,60.91,-3.127,c1ccccc1n2ncc(N)c(Br)c2(=O),...,0,0,0,1,0,0,0,1,1,0
4,p-Aminophenol,-1.231,1,109.128,2,1,0,46.25,-0.8,Nc1ccc(O)cc1,...,1,0,0,1,1,1,1,1,1,1


-0.12257301796222356

In [55]:
from scipy.stats import pearsonr

score_columns = [f"{model_name}_{rep_name}_score" for model_name in model_names for rep_name in REP_NAMES]

correct_atom_count_columns = [f"{model_name}_{rep_name}_correct_atom_count" for model_name in model_names for rep_name in REP_NAMES]

for task_name, df in TASK_TO_DF.items():
  print(task_name)

  mpp_scores = pd.concat([df[col] for col in score_columns])
  count_scores = pd.concat([df[col] for col in correct_atom_count_columns])

  df['aggregated_score'] = df[score_columns].mean(axis=1)
  df['aggregated_correct_atom_count'] = df[correct_atom_count_columns].mean(axis=1)


  r_value, p_value = pearsonr(df['aggregated_score'], df['aggregated_correct_atom_count'])

  print(f"Pearson's r: {r_value}")
  print(f"P-value: {p_value}")
  print()

bbbp
Pearson's r: 0.05350212714526208
P-value: 0.45874592394751307

bace
Pearson's r: 0.10678058207676774
P-value: 0.19041283462188882

esol
Pearson's r: -0.03715911288751811
P-value: 0.6959821493152122

freesolv
Pearson's r: -0.48629346245330146
P-value: 4.008887834890987e-05

clintox
Pearson's r: -0.11772963311744714
P-value: 0.16140539470169815

