In [1]:
import pandas as pd
from m2p import PolyMaker
from polyid import generate_hash
from fingerprints import HierarchticalFingerprints

pm = PolyMaker()
hfp = HierarchticalFingerprints()

%load_ext autoreload
%autoreload 2

2023-03-31 10:27:28.623547: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-31 10:27:28.993579: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/lib64::/home/wilsoa6/miniconda3/envs/stonks/lib/
2023-03-31 10:27:28.993638: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/lib64::/home/wilsoa6/miniconda

# Generate Structures

In [2]:
# import data
data = pd.read_csv('../data/example_monomer_smiles.csv',index_col=0)

# generate monomers tuple
data['monomers'] = data.smiles_monomer.apply(pm.get_monomers)

# # generate polymer structures with DP=8 and 2 replicate stuctures
data = pm.thermoplastic(data.sample(5),DP=8,mechanism='all',replicate_structures=1)

# remove any polymers which may have resulted in an error during reaction
dferror = data[data.smiles_polymer.str.contains('ERR')]
data = data[~data.smiles_polymer.str.contains('ERR')]

# hash columns for tracking predictions and structures
data = generate_hash(data,hash_cols=['smiles_polymer'])


data

100%|██████████| 5/5 [00:00<00:00, 19.97it/s]


Unnamed: 0_level_0,smiles_monomer,monomers,distribution,replicate_structure,smiles_polymer,mechanism
hash-smiles_polymer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
86tohyFm8nd9utdCyWYYMR,O=C(Cl)CCCC(=O)Cl.OCC1CCC(CO)CC1,"('OCC1CCC(CO)CC1', 'O=C(Cl)CCCC(=O)Cl')",[],0,O=C(Cl)CCCC(=O)OCC1CCC(COC(=O)CCCC(=O)OCC2CCC(...,ester
XcGAkBhYsc94XppJaPvb2f,O=C(O)c1ccc(C(=O)O)c2c(C(=O)O)ccc(C(=O)O)c12.O...,"('OCCCCCCCCCCCCO', 'O=C(O)c1ccc(C(=O)O)c2c(C(=...",[],0,O=C(O)c1ccc(C(=O)O)c2c(C(=O)OCCCCCCCCCCCCOC(=O...,ester
XWz4Mj2yA3vNxKNYVQM87Y,O=C(O)CCCCCCCCCCCCCCCO,"('O=C(O)CCCCCCCCCCCCCCCO',)",[],0,O=C(O)CCCCCCCCCCCCCCCOC(=O)CCCCCCCCCCCCCCCOC(=...,ester
Gtcwupoq2EaVrqqYSHcgbq,Nc1ccc(Oc2ccc(N)cc2)cc1.O=C1/C(=C/c2ccc(N3C(=O...,"('Nc1ccc(Oc2ccc(N)cc2)cc1', 'O=C1/C(=C/c2ccc(N...",[],0,Nc1ccc(Oc2ccc(NC(=O)c3ccc4c(c3)C(=O)N(c3ccc(/C...,amide
NKwR8NYmaUhztGFGMhKwkx,O=C(O)c1ccc(-c2ccc(-c3ccc(C(=O)O)cc3)cc2)cc1.OCCO,"('OCCO', 'O=C(O)c1ccc(-c2ccc(-c3ccc(C(=O)O)cc3...",[],0,O=C(O)c1ccc(-c2ccc(-c3ccc(C(=O)OCCOC(=O)c4ccc(...,ester


# Generate Atomic and Molecular/Morphological Fingerprints

In [3]:
hfp.gen_fp_atomic_molecular_morphological(data)

print('Atomic Fingerprints\n{}{}\n\n'.format(hfp.df_atomic.iloc[0:4,0:4],' ...'))
print('Molecular and Morpholotical (mordred) Fingerprints\n{}{}\n\n'.format(hfp.df_molecular_morphological.iloc[0:4,0:4],' ...'))
print('Atomic, Molecular/Morpholotical (mordred) Fingerprints\n{}{}\n\n'.format(hfp.df_atomic_molecular_morphological.iloc[0:5,[0,1,-11,-10]],' ...'))

100%|██████████| 5/5 [00:00<00:00, 523.95it/s]
100%|██████████| 5/5 [00:06<00:00,  1.38s/it]

Atomic Fingerprints
                        10565946   132611095  161963127  195323148
hash-smiles_polymer                                               
86tohyFm8nd9utdCyWYYMR        0.0        0.0        0.0        1.0
XcGAkBhYsc94XppJaPvb2f        0.0        5.0       40.0        0.0
XWz4Mj2yA3vNxKNYVQM87Y        0.0        0.0       88.0        0.0
Gtcwupoq2EaVrqqYSHcgbq       20.0        0.0        0.0        0.0 ...


Molecular and Morpholotical (mordred) Fingerprints
                               ABC      ABCGG  nAcid  nBase
hash-smiles_polymer                                        
86tohyFm8nd9utdCyWYYMR   51.896196  29.248913      0      0
XcGAkBhYsc94XppJaPvb2f   95.520052  55.959254      5      0
XWz4Mj2yA3vNxKNYVQM87Y  102.807885  40.549736      1      0
Gtcwupoq2EaVrqqYSHcgbq  198.683425  86.283976      1      0 ...


Atomic, Molecular/Morpholotical (mordred) Fingerprints
                        10565946  132611095     SRW09      SRW10
hash-smiles_polymer                


