In [None]:
import pandas as pd
from rdkit import Chem
from rdkit import DataStructs
import numpy as np
from rdkit.Chem import AllChem
from rdkit.Chem import MACCSkeys
from rdkit.Chem import rdFingerprintGenerator


In [None]:
def morgan(data, radius):
    fps = [AllChem.GetMorganFingerprint(x, radius) for x in data]
    sum_dissim = 0
    for fp in fps:
        dissim_array = 1 - np.array(DataStructs.BulkTanimotoSimilarity(fp, fps))
        sum_dissim += np.sum(dissim_array)
    output = sum_dissim / (len(fps) * (len(fps) - 1))
    return output

In [None]:
def maccs(data):
    fps = [MACCSkeys.GenMACCSKeys(x) for x in data]
    sum_dissim = 0
    for fp in fps:
        dissim_array = 1 - np.array(DataStructs.BulkTanimotoSimilarity(fp, fps))
        sum_dissim += np.sum(dissim_array)
    output = sum_dissim / (len(fps) * (len(fps) - 1))
    return output

In [None]:
def calc(names):
    output = dict()
    for name in names:
        try:
            suppl = Chem.SDMolSupplier(name)
        except OSError as err:
            print("OS error: {0}".format(err))
            continue
        
        ms = []
        for mol in suppl:
            if mol is None:
                continue
            ms.append(mol)
        
        mor_unp = morgan(ms, radius=2)
        maccs_unp = maccs(ms)
        
        output[name[:9]] = [mor_unp, maccs_unp]
    return output

In [None]:
def writer(data):
    output = list()
    for i in data:
        output.append(data[i])
    
    df1 = pd.DataFrame(output,
                   index=list(data.keys()),
                   columns=['ECFP4', 'MACCS'])
    df1.to_excel("output.xlsx")

In [None]:
if __name__ == '__main__':
    names = []
    data = calc(names)
    writer(data)