In [7]:
import time
import random
import sys
from pathlib import Path
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt 
from matplotlib import pyplot
from rdkit import Chem
from rdkit import DataStructs
from rdkit.ML.Cluster import Butina
from rdkit.Chem import Draw
from rdkit.Chem import rdFingerprintGenerator
from rdkit.Chem.Draw import SimilarityMaps

# Show full results
#np.set_printoptions(threshold=sys.maxsize)

# Define a function to calculate similarities among the molecules
def calculate_similarity_with(molecules,position):
    fgrps = [rdkit_gen.GetFingerprint(Chem.MolFromSmiles(mol)) for mol in molecules]
    nfgrps = len(fgrps)
    sim2=DataStructs.BulkTanimotoSimilarity(fgrps[position], fgrps[:])
    #print(sim2)
    return sim2

rdkit_gen = rdFingerprintGenerator.GetRDKitFPGenerator(maxPath=7)

# Read the input CSV file.
f = open("smi.csv","r")
ligands = f.readlines()
f.close()
ligands = ligands[1:] #remove the first line of csv file (header)

# Insert molecules and labels in arrays from file
cols=[]
results=[]
num=len(ligands)
for line in ligands:
    line=line.strip().split(",")
    cols.append(line)
numcols=len(line)
yields=([cols[j][-1] for j in range(num)])
MaxYield=0.0
for i,Yield in enumerate(yields):
    if float(Yield)>float(MaxYield):
        MaxYield=Yield
        MaxPos=i
print(MaxYield,MaxPos)
numcols-=1
for i in range(numcols):
    b=calculate_similarity_with([cols[j][i] for j in range(num)],MaxPos)
    results.append(b)

maxrows=len(results)
maxlines=len(results[0])
# Write similarities
f = open("gfrefre.csv", "w")
for i in range(maxlines):
    lin=[]
    for j in range(maxrows):
        lin.append(str(results[j][i]))
    print(",".join(lin), file=f)
#print (similarities, file=f)
f.close()
print("process ended")





99.99999 995
[0.5702917771883289, 0.5702917771883289, 0.836436170212766, 0.6430707876370887, 0.5702917771883289, 1.0, 0.5702917771883289, 0.836436170212766, 0.5702917771883289, 0.836436170212766, 0.6430707876370887, 1.0, 0.836436170212766, 0.6430707876370887, 0.836436170212766, 1.0, 0.5702917771883289, 1.0, 0.5702917771883289, 0.5702917771883289, 1.0, 0.6430707876370887, 0.6430707876370887, 1.0, 0.836436170212766, 0.5702917771883289, 0.5702917771883289, 0.836436170212766, 0.836436170212766, 0.836436170212766, 1.0, 1.0, 0.5702917771883289, 0.5702917771883289, 1.0, 0.5702917771883289, 1.0, 0.836436170212766, 1.0, 0.5702917771883289, 0.5702917771883289, 1.0, 1.0, 0.836436170212766, 0.6430707876370887, 0.6430707876370887, 0.836436170212766, 0.5702917771883289, 0.5702917771883289, 0.5702917771883289, 0.6430707876370887, 0.6430707876370887, 1.0, 0.836436170212766, 0.836436170212766, 1.0, 0.5702917771883289, 0.6430707876370887, 0.6430707876370887, 0.836436170212766, 0.6430707876370887, 0.5702

[0.1595744680851064, 0.14933333333333335, 0.14933333333333335, 0.15228426395939088, 0.1595744680851064, 0.16331658291457288, 0.13545816733067728, 0.17355371900826447, 0.16331658291457288, 1.0, 0.3114406779661017, 0.4358974358974359, 0.1603905160390516, 0.37467018469656993, 0.15714285714285714, 0.175, 0.13545816733067728, 0.15714285714285714, 0.37467018469656993, 0.14152700186219738, 0.15714285714285714, 0.14910536779324055, 0.16331658291457288, 0.175, 0.14933333333333335, 0.14910536779324055, 0.1673469387755102, 0.17355371900826447, 0.14285714285714285, 0.14152700186219738, 0.16331658291457288, 0.175, 0.15126050420168066, 0.15714285714285714, 0.3114406779661017, 0.4358974358974359, 0.15217391304347827, 0.1603905160390516, 0.15800865800865802, 0.175, 0.4358974358974359, 0.14285714285714285, 0.45307443365695793, 0.15126050420168066, 0.15800865800865802, 0.17355371900826447, 0.16331658291457288, 0.1595744680851064, 0.1509433962264151, 0.15800865800865802, 0.1603905160390516, 0.175, 0.1428

[0.0855397148676171, 0.0855397148676171, 0.0855397148676171, 1.0, 1.0, 0.30952380952380953, 1.0, 1.0, 0.0855397148676171, 0.30952380952380953, 0.0855397148676171, 0.0855397148676171, 0.30952380952380953, 1.0, 1.0, 1.0, 0.0855397148676171, 0.30952380952380953, 1.0, 0.0855397148676171, 0.0855397148676171, 0.30952380952380953, 0.30952380952380953, 0.30952380952380953, 0.0855397148676171, 0.0855397148676171, 0.30952380952380953, 0.30952380952380953, 0.0855397148676171, 0.30952380952380953, 1.0, 0.30952380952380953, 0.0855397148676171, 0.30952380952380953, 0.0855397148676171, 0.30952380952380953, 0.30952380952380953, 0.0855397148676171, 1.0, 0.30952380952380953, 0.0855397148676171, 0.0855397148676171, 1.0, 0.30952380952380953, 1.0, 0.30952380952380953, 1.0, 1.0, 0.0855397148676171, 0.30952380952380953, 0.0855397148676171, 1.0, 0.30952380952380953, 0.30952380952380953, 1.0, 0.0855397148676171, 1.0, 0.0855397148676171, 1.0, 0.0855397148676171, 0.0855397148676171, 1.0, 0.0855397148676171, 1.0,

[0.2782608695652174, 0.26956521739130435, 0.12658227848101267, 0.07936507936507936, 0.05917159763313609, 0.05917159763313609, 0.2818181818181818, 1.0, 0.05952380952380952, 0.26956521739130435, 0.2857142857142857, 0.2857142857142857, 0.06707317073170732, 0.06707317073170732, 0.26956521739130435, 0.2857142857142857, 0.4019607843137255, 0.2782608695652174, 0.12658227848101267, 0.052941176470588235, 0.26956521739130435, 0.12834224598930483, 0.2818181818181818, 0.06707317073170732, 0.26956521739130435, 0.11320754716981132, 0.26956521739130435, 1.0, 1.0, 0.4019607843137255, 0.07106598984771574, 0.26956521739130435, 0.07936507936507936, 0.12834224598930483, 0.2818181818181818, 0.11320754716981132, 0.05917159763313609, 0.4019607843137255, 0.05952380952380952, 0.07936507936507936, 0.11320754716981132, 0.12834224598930483, 1.0, 0.07106598984771574, 0.12658227848101267, 0.07936507936507936, 0.2818181818181818, 0.4019607843137255, 0.4019607843137255, 0.11320754716981132, 0.12658227848101267, 0.128