In [1]:
import pandas as pd
from m2p import PolyMaker
from polyid import generate_hash
from fingerprints import HierarchticalFingerprints

pm = PolyMaker()
hfp = HierarchticalFingerprints()

2023-03-31 10:29:50.257029: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-31 10:29:50.618908: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/lib64::/home/wilsoa6/miniconda3/envs/stonks/lib/
2023-03-31 10:29:50.618965: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda-11.2/lib64::/home/wilsoa6/miniconda

# Generate Structures

In [2]:
# import data
data = pd.read_csv('../data/example_monomer_smiles.csv',index_col=0)

# generate monomers tuple
data['monomers'] = data.smiles_monomer.apply(pm.get_monomers)

# # generate polymer structures with DP=8 and 2 replicate stuctures
data = pm.thermoplastic(data.sample(5),DP=8,mechanism='all',replicate_structures=1)

# remove any polymers which may have resulted in an error during reaction
dferror = data[data.smiles_polymer.str.contains('ERR')]
data = data[~data.smiles_polymer.str.contains('ERR')]

# hash columns for tracking predictions and structures
data = generate_hash(data,hash_cols=['smiles_polymer'])


data

100%|██████████| 5/5 [00:00<00:00, 14.92it/s]


Unnamed: 0_level_0,smiles_monomer,monomers,distribution,replicate_structure,smiles_polymer,mechanism
hash-smiles_polymer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
7pj5KxpS6wbGP247VzGkE4,CC1(C)CC(C)(c2ccc(Oc3ccc(-c4ccc(N)cc4)cc3C(F)(...,"('O=C(O)c1cc(C(=O)O)c(C(=O)O)cc1C(=O)O', 'CC1(...",[],0,CC1(C)CC(C)(c2ccc(Oc3ccc(-c4ccc(N)cc4)cc3C(F)(...,amide
bGVjhweP7tvL2MqLYUHDay,O=C(O)CCCCC(=O)O.OCC(CO)(CBr)CBr,"('O=C(O)CCCCC(=O)O', 'OCC(CO)(CBr)CBr')",[],0,O=C(O)CCCCC(=O)OCC(CBr)(CBr)COC(=O)CCCCC(=O)OC...,ester
nftut9Tz2wwCVnxe5EHyWS,O=C(O)c1ccc(C(=O)O)cc1.OCCCCO,"('OCCCCO', 'O=C(O)c1ccc(C(=O)O)cc1')",[],0,O=C(O)c1ccc(C(=O)OCCCCOC(=O)c2ccc(C(=O)OCCCCOC...,ester
YGkD3DKQsU4iHR3dSoKsXu,CC(C)(CO)C(=O)O,"('CC(C)(CO)C(=O)O',)",[],0,CC(C)(COC(=O)C(C)(C)COC(=O)C(C)(C)COC(=O)C(C)(...,ester
8a9FUMxB7L2euEP2Va5PgR,C[Si](C)(c1ccc(C(=O)Nc2ccc(C(=O)O)cc2)cc1)c1cc...,"('Nc1ccc(Cc2ccc(N)cc2)cc1', 'C[Si](C)(c1ccc(C(...",[],0,C[Si](C)(c1ccc(C(=O)Nc2ccc(C(=O)O)cc2)cc1)c1cc...,amide


# Generate Atomic and Molecular/Morphological Fingerprints

In [3]:
hfp.gen_fp_atomic_molecular_morphological(data)

print('Atomic Fingerprints\n{}{}\n\n'.format(hfp.df_atomic.iloc[0:4,0:4],' ...'))
print('Molecular and Morphological (mordred) Fingerprints\n{}{}\n\n'.format(hfp.df_molecular_morphological.iloc[0:4,0:4],' ...'))
print('Atomic, Molecular/Morphological (mordred) Fingerprints\n{}{}\n\n'.format(hfp.df_atomic_molecular_morphological.iloc[0:5,[0,1,-11,-10]],' ...'))

100%|██████████| 5/5 [00:00<00:00, 492.93it/s]
100%|██████████| 5/5 [00:08<00:00,  1.69s/it]

Atomic Fingerprints
                        132611095  133666253  227001719  305828102
hash-smiles_polymer                                               
7pj5KxpS6wbGP247VzGkE4        5.0        0.0        0.0       10.0
bGVjhweP7tvL2MqLYUHDay        0.0        7.0        7.0        0.0
nftut9Tz2wwCVnxe5EHyWS        0.0        0.0        0.0        0.0
YGkD3DKQsU4iHR3dSoKsXu        0.0        0.0        0.0        0.0 ...


Molecular and Morphological (mordred) Fingerprints
                               ABC       ABCGG  nAcid  nBase
hash-smiles_polymer                                         
7pj5KxpS6wbGP247VzGkE4  256.375771  135.098718      5      0
bGVjhweP7tvL2MqLYUHDay   49.067769   36.535805      1      0
nftut9Tz2wwCVnxe5EHyWS   48.744248   28.439315      1      0
YGkD3DKQsU4iHR3dSoKsXu   42.632309   35.773633      1      0 ...


Atomic, Molecular/Morphological (mordred) Fingerprints
                        132611095  133666253     SRW09      SRW10
hash-smiles_polymer         


