In [3]:
import pandas as pd
from m2p import PolyMaker
from polyid import generate_hash
from fingerprints import HierarchticalFingerprints

pm = PolyMaker()
hfp = HierarchticalFingerprints()

2023-08-10 19:42:59.517765: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Generate Structures

In [4]:
# import data
data = pd.read_csv('../data/SI_Table-of-polymer-properties.csv',index_col=0)

# generate monomers tuple
data['monomers'] = data.smiles_monomer.apply(pm.get_monomers)

# # generate polymer structures with DP=8 and 2 replicate stuctures
data = pm.thermoplastic(data.sample(5),DP=8,mechanism='all',replicate_structures=1)

# remove any polymers which may have resulted in an error during reaction
dferror = data[data.smiles_polymer.str.contains('ERR')]
data = data[~data.smiles_polymer.str.contains('ERR')]

# hash columns for tracking predictions and structures
data = generate_hash(data,hash_cols=['smiles_polymer'])

data.head()

100%|██████████| 5/5 [00:01<00:00,  3.03it/s]


Unnamed: 0_level_0,smiles_monomer,property,value,units,smiles_polymer,mechanism,reference,doi,monomers,distribution,replicate_structure
hash-smiles_polymer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
enr8YKTFcDv9oZQhfpPsDr,Nc1ccc(Oc2ccc(N)cc2)cc1.O=C(O)CN1C(=O)c2ccc(C(...,Glass_Transition,235.0,C,Nc1ccc(Oc2ccc(NC(=O)CN3C(=O)c4ccc(C(=O)c5ccc6c...,amide,"Hsiao, Sheng-Huel, and Chin-Ping Yang. “Prepar...",10.1002/pola.1991.080290318,"('Nc1ccc(Oc2ccc(N)cc2)cc1', 'O=C(O)CN1C(=O)c2c...",[],0
W5YabwrufnNwqHV5waFkdr,CC(C)(C)c1cc(C(=O)O)cc(C(=O)O)c1.CC(C)(c1ccc(O...,Glass_Transition,244.0,C,CC(C)(C)c1cc(C(=O)O)cc(C(=O)Nc2ccc(Oc3ccc(C(C)...,amide,"Lee, Kueir-Rarn, Der-Jang Liaw, Been-Yang Liaw...",10.1016/s0376-7388(97)00054-9,"('CC(C)(C)c1cc(C(=O)O)cc(C(=O)O)c1', 'CC(C)(c1...",[],0
Vb4SfDcEF3HZfgmDxdLL4Z,Cc1c(N)c(C)c2c(c1N)C(C)(C)CC2.O=C(c1ccc2c(c1)C...,Permeability_O2,3.72e-09,barrer,Cc1c(N)c2c(c(C)c1N1C(=O)c3ccc(C(=O)c4ccc5c(c4)...,imide,"Xiao, Youchang, Tai-Shung Chung, Mei Lin Chng,...",10.1021/jp050177l,"('Cc1c(N)c(C)c2c(c1N)C(C)(C)CC2', 'O=C(c1ccc2c...",[],0
2zYzbCconiycZVCPQxJJ3K,O=C(Cl)Cl.Oc1ccc(C2(c3ccc(O)cc3)CC3CCC2C3)cc1,Permeability_N2,0.470588,barrer,O=C(Cl)Oc1ccc(C2(c3ccc(OC(=O)Oc4ccc(C5(c6ccc(O...,carbonate,"Hellums, M.W., W.J. Koros, and J.C. Schmidhaus...",10.1016/0376-7388(92)87041-u,"('O=C(Cl)Cl', 'Oc1ccc(C2(c3ccc(O)cc3)CC3CCC2C3...",[],0
V6PxEFRrT7cFzAX2UdZbrZ,Nc1ccc(Oc2ccc(N)cc2)cc1.CC(C)(C)c1ccc(-c2c3c(=...,Permeability_O2,20.0,barrer,CC(C)(C)c1ccc(-c2c3c(=O)N(c4ccc(Oc5ccc(-n6c(=O...,imide,"Kim, Yun-Hi, Seong-Kuk Ahn, Hyung Sun Kim, and...",10.1002/pola.10493,"('Nc1ccc(Oc2ccc(N)cc2)cc1', 'CC(C)(C)c1ccc(-c2...",[],0


# Generate Atomic and Molecular/Morphological Fingerprints

In [5]:
hfp.gen_fp_atomic_molecular_morphological(data)

print('Atomic Fingerprints\n{}{}\n\n'.format(hfp.df_atomic.iloc[0:4,0:4],' ...'))
print('Molecular and Morphological (mordred) Fingerprints\n{}{}\n\n'.format(hfp.df_molecular_morphological.iloc[0:4,0:4],' ...'))
print('Atomic, Molecular/Morphological (mordred) Fingerprints\n{}{}\n\n'.format(hfp.df_atomic_molecular_morphological.iloc[0:5,[0,1,-11,-10]],' ...'))

100%|██████████| 5/5 [00:00<00:00, 44.55it/s]
100%|██████████| 5/5 [01:57<00:00, 23.53s/it]


Atomic Fingerprints
                        10565946   26234434   91234434   112486133
hash-smiles_polymer                                               
enr8YKTFcDv9oZQhfpPsDr       16.0        0.0        0.0        0.0
W5YabwrufnNwqHV5waFkdr        0.0        0.0        0.0        4.0
Vb4SfDcEF3HZfgmDxdLL4Z       16.0        0.0        0.0        0.0
2zYzbCconiycZVCPQxJJ3K        0.0        1.0        4.0        0.0 ...


Molecular and Morphological (mordred) Fingerprints
                               ABC      ABCGG  nAcid  nBase
hash-smiles_polymer                                        
enr8YKTFcDv9oZQhfpPsDr  145.590352  72.245998      1      0
W5YabwrufnNwqHV5waFkdr  144.630791  67.391800      1      0
Vb4SfDcEF3HZfgmDxdLL4Z  122.705692  80.008226      0      0
2zYzbCconiycZVCPQxJJ3K   76.284556  45.501590      0      0 ...


Atomic, Molecular/Morphological (mordred) Fingerprints
                        10565946  26234434     SRW09      SRW10
hash-smiles_polymer                 