In [2]:
### Step 1: Initiation ###

from rdkit import Chem, rdBase
from rdkit.Chem import Draw,rdDepictor
from rdkit.Chem import AllChem, Descriptors, DataStructs
from rdkit.Chem.Draw import IPythonConsole, rdMolDraw2D
from rdkit.Chem.Scaffolds import rdScaffoldNetwork
from datetime import datetime
# import math
# import matplotlib as mpl
# from matplotlib import pyplot as plt
# import os
# import pyvis
# from pyvis.network import Network
# import inspect
import psycopg2
from collections import defaultdict

# DATABASE Connections

# Chemistry Database
#db_chem = psycopg2.connect(host = "localhost", dbname="Chemistry", user="postgres", password="postgres")
db_chem = psycopg2.connect(host = "192.168.86.31", dbname="Chemistry", user="postgres", password="postgres")
# create a cursor
cur = db_chem.cursor()
# execute a statement
cur.execute('SELECT version()')
db_ver = cur.fetchone()
print('PostgreSQL database version:', db_ver) 
#display the PostgreSQL database server version


print('RDKit version: ',rdBase.rdkitVersion)
# print('MatplotLib version:', mpl.__version__)
# print('Pyvis version:', pyvis.__version__)
print(datetime.now())

###################################################################################################################

### Step 2: Generate Fingerprints for New Structures ("no fingerprints") ###

import collections
import fingerprinter as fpt

# SELECT Structures without fingerprints

# db_chem = psycopg2.connect(host = "localhost", dbname="Chemistry", user="postgres", password="postgres")
cur = db_chem.cursor() 
sql = 'SELECT s."Structure_ID", s."SMILES" FROM public."Structures" s \
WHERE s."Structure_ID" IN (SELECT f."Structure_ID" FROM public."Structures" f \
WHERE NOT EXISTS (SELECT 1 FROM public."Structure_Fingerprint" fp WHERE f."Structure_ID" = fp."Structure_ID"))' 
cur.execute(sql) 

mollist = cur.fetchall()
print()
print('##### Structures without Fingerprints #####')
print()
print(len(mollist),'new Structures')
print(mollist)


# generate Morgan fingerprints for molecules

fp_key = {}
fp_val = []
fp_dict = collections.defaultdict(list)
fp_allmol = []
fp_smi_list = {}

for mol in mollist:
    print()
    print('Molecule : ',mol)
    # print(mol[0], mol[1])
    m = Chem.MolFromSmiles(mol[1])

    # Generate Fingerprint SMILES and SMILES list
    fp_smi = fpt.FingerprintToSmiles(m, 3)
    print('Fingerprint : ',fp_smi)    
    for f in fp_smi: 
        fp_smi_list[f[0]] = f[1]
             
    # Generate Fingerprint Footprints:
    big ={}
    fp = AllChem.GetMorganFingerprint(m,3, bitInfo=big)
    # print(len(big))
    # print(big)
    # print(fp)
    for fpb,v in big.items():
        ma = []
        fp_dict[fpb].append(len(v))
        # print(mol[0],fpb, len(v),v)
        ma.append(mol[0])
        ma.append(fpb)
        ma.append(len(v))
        # print(ma)
        fp_allmol.append(ma)
        
print()
print("All Fingerprints by molecule")
print(fp_allmol)
        
print()        
print(fp_smi_list)
print(len(fp_smi_list),'SMILES')

for fpb,v in fp_smi_list.items():
    
    print(fpb,' - ',v)
    
        # key = fbp
        #fp_dict[fpb].append(len(v))
                
print()        
# print(fp_dict)
print(len(fp_dict),'fingerprints')

for fpb,v in fp_dict.items():
    print(fpb,' - ',len(v),' - ',v)
        # key = fbp
        #fp_dict[fpb].append(len(v))
        
###################################################################################################################

### Step 3: Insert new fingerprints into fingerprint table ###

print()
print('##### Insert New Fingerprints #####')
print()

# db_chem = psycopg2.connect(host = "localhost", dbname="Chemistry", user="postgres", password="postgres")
cur = db_chem.cursor()
sql = 'SELECT * FROM public."Fingerprints" WHERE "FP_Type" = 1'
cur.execute(sql)
fpdblist = cur.fetchall()
# print(len(fpdblist), "existing fingerprints")
# print(fpdblist)
# print(fp_smi_list)

print()
if len(fp_smi_list) == 0:
    print('No new fingerprint inserts')
else:
    print("New inserted fingerprints")
print()

sql = 'INSERT INTO public."Fingerprints"("FP_Smiles", "FP_Number", "FP_Type") VALUES(%s, %s, 1);'

for fpb,v in fp_smi_list.items():
    smi_fl = 0
    for i in range(0,len(fpdblist)):
        if fpb == fpdblist[i][3]:
            smi_fl = 1

    if smi_fl == 0:
        cur.execute(sql, (v, fpb))
        db_chem.commit()
        print(fpb,' - ',v, smi_fl, "inserted")

###################################################################################################################

### Step 4: Insert Fingerprints for New Molecules ###

print()
print('##### Insert Structure/Fingerprints Links #####')
print()

# db_chem = psycopg2.connect(host = "localhost", dbname="Chemistry", user="postgres", password="postgres")
cur = db_chem.cursor()
sql = 'SELECT * FROM public."Fingerprints" WHERE "FP_Type" = 1'
cur.execute(sql)
fpdblist = cur.fetchall()
# print(fpdblist)

fpid = {}

for m in fpdblist:
    # print(m)
    i = m[0]
    j = m[3]
    # print(i,j)
    fpid[j]=i
    
# print(fpid)

cur = db_chem.cursor()
sql = 'INSERT INTO public."Structure_Fingerprint"("Structure_ID","FP_ID","FP_Count") VALUES (%s,%s,%s);'

# Insert Notification
print()
if len(fp_allmol) == 0:
    print('No new structure/fingerprint inserts')
else:
    print('New structure/fingerprint links')
print()

for m in fp_allmol:
    # j = m[1]
    i = fpid[m[1]]
    cur.execute(sql, (m[0],i,m[2]))
    db_chem.commit()
    print(m,i)
    
###################################################################################################################

### Step 5: Update Shannon Entropies ###
print()
print('##### Shannon Update #####')
print()

# This is part of netprepper
#    import pandas as pd
#    from scipy import stats as scistat

# import sys
# print(sys.path)
# set PYTHONPATH to proper directory set holds private modules and packages (here pkg_mod)
import netprepper

# SELECT fingerprint length and append to fp_dict

#db_chem = psycopg2.connect(host = "localhost", dbname="Chemistry", user="postgres", password="postgres")
db_chem = psycopg2.connect(host = "192.168.86.31", dbname="Chemistry", user="postgres", password="postgres")
cur = db_chem.cursor() 
sql = 'SELECT sf."Structure_ID", sf."FP_ID", sf."FP_Count", fp."FP_Shannon" FROM public."Structure_Fingerprint" sf \
JOIN public."Fingerprints" fp ON sf."FP_ID" = fp."Fingerprint_ID" WHERE fp."FP_Type" = 1'
cur.execute(sql)
fp_list = cur.fetchall()
# print(fp_list)

fp_dict = collections.defaultdict(list)
fp_mol = collections.defaultdict(list)

for m in fp_list:
    # print(m, m[1],m[2])
    fp_dict[m[1]].append(m[2])
    fp_mol[m[0]].append(m[1])

# n_fp = 0
print(len(fp_dict),'fingerprints')
print(len(fp_mol),'structures')
print()

n_fp = len(fp_mol)

#for fpb, i in fp_dict.items():
#    n_fp = max(n_fp, len(i))
    # print (fpb, i, n)


# update entropy field in Fingerprints    
    
sql = 'UPDATE public."Fingerprints" SET "FP_Shannon" = %s WHERE "Fingerprint_ID" = %s'
cur = db_chem.cursor()
for fpb, i in fp_dict.items():
    ns = netprepper.shannon(i,n_fp)
    # print(fpb,' - ',netprepper.shannon(i,n))
    print(fpb,' - ',ns[0],' - ',ns[1])
    cur.execute(sql, (ns[1], fpb))
db_chem.commit()



PostgreSQL database version: ('PostgreSQL 12.7 (Ubuntu 12.7-1.pgdg20.04+1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, 64-bit',)
RDKit version:  2020.03.3
2021-07-01 22:21:46.301460

##### Structures without Fingerprints #####

3 new Structures
[(41, 'C1=CC=C(C(=C1)C2=NC(=NO2)C3=CC(=CC=C3)C(=O)O)F'), (40, 'OC1=CC=C(C(C)(C)C)C=C1/N=C/C2=CC=CC([N+]([O-])=O)=C2 '), (39, 'O=C1NC(S/C1=C\\C2=CC=C(C3=C([N+]([O-])=O)C=CC=C3)O2)=N')]

Molecule :  (41, 'C1=CC=C(C(=C1)C2=NC(=NO2)C3=CC(=CC=C3)C(=O)O)F')
Fingerprint :  [(68650007, 'c-c(n)o'), (98513984, 'ccc'), (128522177, 'cccc(c)C'), (144269902, 'c-c1cccc(C(=O)O)c1'), (316611168, 'nc(o)-c1ccccc1F'), (696640294, 'cc(C)cc(c)-c'), (707536137, 'cnoc(-c)n'), (864662311, 'O'), (864942730, 'O'), (882399112, 'F'), (951226070, 'ccc'), (967579429, 'nc(n)-c1cccc(C(=O)O)c1'), (982195248, 'cnc(on)-c1ccccc1F'), (994485099, 'ccc'), (1008805407, 'cc(c)-c1noc(-c)n1'), (1075499757, 'ccc(-c(n)o)c(c)F'), (1101907775, 'cnc'), (132346


New inserted fingerprints

68650007  -  c-c(n)o 0 inserted
128522177  -  cccc(c)C 0 inserted
144269902  -  c-c1cccc(C(=O)O)c1 0 inserted
316611168  -  nc(o)-c1ccccc1F 0 inserted
696640294  -  cc(C)cc(c)-c 0 inserted
707536137  -  cnoc(-c)n 0 inserted
882399112  -  F 0 inserted
967579429  -  nc(n)-c1cccc(C(=O)O)c1 0 inserted
982195248  -  cnc(on)-c1ccccc1F 0 inserted
994485099  -  ccc 0 inserted
1008805407  -  cc(c)-c1noc(-c)n1 0 inserted
1075499757  -  ccc(-c(n)o)c(c)F 0 inserted
1101907775  -  cnc 0 inserted
1323467736  -  cccc(c)-c 0 inserted
1444177117  -  ccc(cc)-c(n)n 0 inserted
1491315937  -  ccc(cc)C(=O)O 0 inserted
1510328189  -  C=O 0 inserted
1533864325  -  CO 0 inserted
1710869618  -  cc(c)C(=O)O 0 inserted
1857396528  -  cc(c)F 0 inserted
1859637136  -  cc(c)-c1nc(-c)no1 0 inserted
2030861577  -  c-c(n)nc(-c)o 0 inserted
2246699815  -  C 0 inserted
2309365369  -  O=C(O)c1ccccc1 0 inserted
2313611749  -  Fc1ccccc1 0 inserted
2323300214  -  conc(-c)n 0 inserted
2353112200  -

[39, 606428793, 1] 211
[39, 656020703, 1] 212
[39, 714337811, 1] 213
[39, 767234443, 1] 214
[39, 848127915, 1] 167
[39, 849275503, 1] 215
[39, 864942730, 2] 97
[39, 864942795, 1] 168
[39, 889218762, 1] 216
[39, 951226070, 4] 40
[39, 1081386698, 1] 217
[39, 1151882127, 1] 218
[39, 1169352347, 1] 219
[39, 1207681471, 1] 220
[39, 1245593428, 1] 221
[39, 1323467736, 1] 123
[39, 1330014279, 1] 222
[39, 1514095308, 1] 223
[39, 1634017026, 1] 224
[39, 1723533782, 1] 225
[39, 1791250428, 1] 226
[39, 1814116706, 1] 227
[39, 1888311577, 1] 228
[39, 2036328569, 1] 229
[39, 2043162897, 1] 230
[39, 2063282750, 1] 231
[39, 2132511834, 1] 232
[39, 2246703798, 1] 180
[39, 2296493092, 1] 233
[39, 2378775366, 1] 181
[39, 2378779377, 1] 182
[39, 2401181567, 1] 234
[39, 2749896868, 1] 235
[39, 2800666064, 1] 236
[39, 2882298817, 1] 237
[39, 2927074206, 1] 238
[39, 3189457552, 1] 145
[39, 3217380708, 7] 42
[39, 3218693969, 6] 27
[39, 3351556771, 1] 239
[39, 3361006637, 1] 240
[39, 3393513652, 1] 241
[39, 3

80  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
81  -  [2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
82  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
83  -  [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.36764947740014225
84  -  [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.36764947740014225
85  -  [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.30463609734923813
86  -  [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.30463609734923813
87  -  [2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.36764947740014225
88  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
89  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
90  -  [1, 0, 0, 0, 

168  -  [1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.30463609734923813
169  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
170  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
171  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
172  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
173  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
174  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
175  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
176  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
177  -  [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]  -  0.18490739916777568
178  -  [1