In [None]:
from rdkit import Chem
from mordred import Calculator, descriptors
import pandas as pd
import numpy as np
 

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
test_data=pd.read_csv("/content/drive/MyDrive/new_combinaison.csv")
test_data

Unnamed: 0,D_A,Reported Acceptor,acc_SMILES,Reported Donor,don_SMILES
0,PPDT2FBT__BTDT2R,BTDT2R,CCOc1c(OCC)c(-c2ccc(/C=C3\SC(=S)N(CC)C3=O)s2)c...,PPDT2FBT,CCOc1ccc(OCC)c(-c2ccc(-c3c(F)c(F)c(-c4ccc(-c5c...
1,PTB7-Th__BTDT2R,BTDT2R,CCOc1c(OCC)c(-c2ccc(/C=C3\SC(=S)N(CC)C3=O)s2)c...,PTB7-Th,CCOC(=O)c1sc2c(-c3cc4c(-c5ccc(CC)s5)c5sc(-c6sc...
2,P3HT__BTDT2R,BTDT2R,CCOc1c(OCC)c(-c2ccc(/C=C3\SC(=S)N(CC)C3=O)s2)c...,P3HT,CCc1cc(C)sc1-c1sc(-c2cc(CC)c(-c3sc(-c4cc(CC)c(...
3,PBDB-TF__BTDT2R,BTDT2R,CCOc1c(OCC)c(-c2ccc(/C=C3\SC(=S)N(CC)C3=O)s2)c...,PBDB-TF,CCc1sc(-c2c3cc(-c4ccc(-c5sc(-c6ccc(-c7cc8c(-c9...
4,PB24-3TDC__BTDT2R,BTDT2R,CCOc1c(OCC)c(-c2ccc(/C=C3\SC(=S)N(CC)C3=O)s2)c...,PB24-3TDC,CCOC(=O)c1ccsc1-c1ccc(-c2sc(-c3ccc(-c4c(F)c(F)...
...,...,...,...,...,...
156035,PBQ-0F__6TBA,6TBA,CCc1ccc(C2(c3ccc(CC)cc3)c3c(sc4cc(C=C5C(=O)N(C...,PBQ-0F,CCOc1cccc(-c2nc3c(-c4cccs4)ccc(-c4ccc(-c5cc6c(...
156036,PBQ-QF__6TBA,6TBA,CCc1ccc(C2(c3ccc(CC)cc3)c3c(sc4cc(C=C5C(=O)N(C...,PBQ-QF,CCOc1cccc(-c2nc3c(-c4cccs4)c(F)c(F)c(-c4ccc(-c...
156037,PBQ-4F__6TBA,6TBA,CCc1ccc(C2(c3ccc(CC)cc3)c3c(sc4cc(C=C5C(=O)N(C...,PBQ-4F,CCOc1cccc(-c2nc3c(-c4cccs4)c(F)c(F)c(-c4ccc(-c...
156038,ZR1__6TBA,6TBA,CCc1ccc(C2(c3ccc(CC)cc3)c3c(sc4cc(C=C5C(=O)N(C...,ZR1,CCc1ccc(-c2c3sc4cc(-c5cc(CC)c(-c6ccc(/C=C7/SC(...


In [1]:
def calculate_descriptors(smiles):

  """
  Calculates molecular descriptors from a SMILES string.

  Args:
      smiles (str): The input SMILES string.

  Returns:
      dict: A dictionary containing molecular descriptors as keys and their corresponding values.
          Returns None if the SMILES string cannot be converted to an rdkit.Chem.Mol instance.

  Raises:
      None.

  Example:
      >>> calculate_descriptors('CCO')
      {'MaxEStateIndex': 6.01875, 'MinEStateIndex': 0.78125, '...}

  """
  try:
    mol = Chem.MolFromSmiles(smiles)
    if mol is None : 
      return None
    calc = Calculator(descriptors)
    desc = calc(mol)
    return desc.asdict()
  except ValueError:
        print("Unable to convert SMILES string to rdkit.Chem.Mol instance.")
        return None


**Generate descriptors of Acceptors smiles**

In [None]:
uni=pd.DataFrame(np.unique(test_data["acc_SMILES"]))
uni=uni.rename({0 : "acc_SMILES"}, axis='columns')
mask = uni['acc_SMILES'] == 'bad mol'
uni = uni[~mask]
len(uni)

210

In [None]:
desc_dfA = pd.DataFrame(list(uni['acc_SMILES'].apply(calculate_descriptors))) 

In [None]:
desc_dfA.to_csv('acc_desc.csv', index=False)

In [None]:

new_columns = [col_name + '_acceptor' for col_name in desc_dfA.columns]
desc_dfA.columns = new_columns
desc_dfA

In [None]:

test_data_with_acc_smiles=pd.concat([uni, desc_dfA], axis=1)

In [None]:

test_data_with_acc_smiles=pd.merge(test_data_with_acc_smiles, test_data, on='acc_SMILES')

**Generate descriptors of Donors smiles**

In [None]:
uni=pd.DataFrame(np.unique(test_data["don_SMILES"]))
uni=uni.rename({0 : "don_SMILES"}, axis='columns')
mask = uni['don_SMILES'] == 'bad mol'
uni = uni[~mask]
len(uni)

In [None]:
desc_df = pd.DataFrame(list(uni['don_SMILES'].apply(calculate_descriptors))) 
desc_df.to_csv('don_desc.csv', index=False)

In [None]:
new_columns = [col_name + '_donor' for col_name in desc_df.columns]
desc_df.columns = new_columns
desc_df

In [None]:
test_data_with_don_smiles=pd.concat([uni, desc_df], axis=1)

In [None]:
test_data_with_don_smiles=pd.merge(test_data_with_don_smiles, test_data_with_acc_smiles, on='don_SMILES')

In [None]:
test_data_with_don_smiles.to_csv('test_with_descriptor.csv', index=False)