<a href="https://colab.research.google.com/github/SACHZZ/HodgeLaplacian/blob/main/Fingerprint.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#  Install packages and Import libraries

In [None]:
#! pip install -q condacolab
#! mamba install -c conda-forge rdkit

import condacolab
condacolab.install()

import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit import DataStructs
from rdkit.Chem import AllChem
from rdkit.Chem import PandasTools
from rdkit.Chem import MACCSkeys
from rdkit.Chem import Descriptors, Lipinski

# Import Data

In [None]:
df = pd.read_csv('eib_optimized.csv')
df
df = df.drop(['Unnamed: 0'], axis=1)
df.head()

# Calculate some descriptors


In [None]:
def lipinski(smiles, verbose=False):

    moldata= []
    for elem in smiles:
        mol=Chem.MolFromSmiles(elem) 
        moldata.append(mol)
       
    baseData= np.arange(1,1)
    i=0  
    for mol in moldata:        
       
        desc_MolWt = Descriptors.MolWt(mol)
        desc_MolLogP = Descriptors.MolLogP(mol)
        desc_NumHDonors = Lipinski.NumHDonors(mol)
        desc_NumHAcceptors = Lipinski.NumHAcceptors(mol)
           
        row = np.array([desc_MolWt,
                        desc_MolLogP,
                        desc_NumHDonors,
                        desc_NumHAcceptors])   
    
        if(i==0):
            baseData=row
        else:
            baseData=np.vstack([baseData, row])
        i=i+1      
    
    columnNames=["MW","LogP","NumHDonors","NumHAcceptors"]   
    descriptors = pd.DataFrame(data=baseData,columns=columnNames)
    
    return descriptors

In [None]:
df_lipinski = lipinski(df.smiles)

# Combine dataframes

In [None]:
df_combined = pd.concat([df,df_lipinski], axis=1)

In [None]:
df_final = df_combined

# Calculate fingerprint descriptors

In [None]:
 def computeMorganFP(mol, depth=2, nBits=400):
    a = np.zeros(nBits)
    #a = np.zeros((0,), dtype=np.int8)
    try:
      DataStructs.ConvertToNumpyArray(AllChem.GetMorganFingerprintAsBitVect(mol,depth,nBits),a)
    except:
      return None
    return a

def computeRDKitFP(mol, maxLength=7, nBits=800):
    a = np.zeros(nBits)
    try:
      DataStructs.ConvertToNumpyArray(Chem.RDKFingerprint(mol,maxPath=maxLength, fpSize=nBits),a)
    except:
      return None
    return a

In [None]:
PandasTools.AddMoleculeColumnToFrame(frame=df_final, smilesCol='smiles', molCol='Molecule')

In [None]:
data = df_final

In [None]:
data['Morgan2FP'] = data['Molecule'].map(computeMorganFP)
data['RDKit7FP'] = data['Molecule'].map(computeRDKitFP)

In [None]:
data_X = data.Morgan2FP

#data_X.to_csv('eib_Morgan_fp_400.csv', index=False)