In [17]:
import pandas as pd
from m2p import PolyMaker
from polyid import generate_hash
from fingerprints import HierarchticalFingerprints

pm = PolyMaker()
hfp = HierarchticalFingerprints()

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Generate Structures

In [18]:
# import data
data = pd.read_csv('../data/example_monomer_smiles.csv',index_col=0)

# generate monomers tuple
data['monomers'] = data.smiles_monomer.apply(pm.get_monomers)

# # generate polymer structures with DP=8 and 2 replicate stuctures
data = pm.thermoplastic(data.sample(5),DP=8,mechanism='all',replicate_structures=1)

# remove any polymers which may have resulted in an error during reaction
dferror = data[data.smiles_polymer.str.contains('ERR')]
data = data[~data.smiles_polymer.str.contains('ERR')]

# hash columns for tracking predictions and structures
data = generate_hash(data,hash_cols=['monomers','replicate_structure'])
data = generate_hash(data.reset_index(),hash_cols=['monomers'])

data.head()

100%|██████████| 5/5 [00:00<00:00, 17.54it/s]


Unnamed: 0_level_0,hash-monomers-replicate_structure,smiles_monomer,monomers,distribution,replicate_structure,smiles_polymer,mechanism
hash-monomers,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
bebVfgVkBo4KvxVebKyauG,ckV2NPPhTgFTBG5agtf7aK,Nc1ccc(Oc2ccc(C3(c4ccc(Oc5ccc(N)cc5)cc4)C4CC5C...,"('O=C(O)c1ccc(C(=O)O)c2ccccc12', 'Nc1ccc(Oc2cc...",[],0,Nc1ccc(Oc2ccc(C3(c4ccc(Oc5ccc(NC(=O)c6ccc(C(=O...,amide
9g9GvuxVE4Wr4ZuQSSZ3KX,iM54oFtcdWoJ9EmaChtu9M,Nc1cccc(Oc2ccc(C(=O)c3ccc(Oc4cccc(N)c4)cc3)cc2...,"('O=C(O)c1ccc(C(=O)O)nc1', 'Nc1cccc(Oc2ccc(C(=...",[],0,Nc1cccc(Oc2ccc(C(=O)c3ccc(Oc4cccc(NC(=O)c5ccc(...,amide
PBK6iwfvGwCAooynT7aE6Y,DbuyNwX7cjj7s2aBBEzzEG,O=C(O)C/C=C/CC(=O)O.OCCCCCCCCCCCCO,"('O=C(O)C/C=C/CC(=O)O', 'OCCCCCCCCCCCCO')",[],0,O=C(O)C/C=C/CC(=O)OCCCCCCCCCCCCOC(=O)C/C=C/CC(...,ester
M9f7jYAB5vq2n6dbxshLyU,Yk7xphgTexhKcJgwPtnRSr,Nc1cccc(Oc2ccc(C(=O)c3ccc(Oc4cccc(N)c4)cc3)cc2...,"('O=C(O)c1cccc(C(=O)O)c1', 'Nc1cccc(Oc2ccc(C(=...",[],0,Nc1cccc(Oc2ccc(C(=O)c3ccc(Oc4cccc(NC(=O)c5cccc...,amide
ESHrPd7LPhc6HZanmzHLFH,RYnTL5ftRD7sgU7xNXVhq8,CC(C)(c1ccc(Oc2ccc(N)cc2)cc1)c1ccc(Oc2ccc(N)cc...,('CC(C)(c1ccc(Oc2ccc(N)cc2)cc1)c1ccc(Oc2ccc(N)...,[],0,CC(C)(c1ccc(Oc2ccc(N)cc2)cc1)c1ccc(Oc2ccc(NC(=...,amide


# Generate Atomic and Molecular/Morphological Fingerprints

In [30]:
hfp.gen_fp_atomic_molecular_morphological(data)

print('Atomic Fingerprints\n{}{}\n\n'.format(hfp.df_atomic.iloc[0:4,0:4],' ...'))
print('Molecular and Morpholotical (mordred) Fingerprints\n{}{}\n\n'.format(hfp.df_molecular_morphological.iloc[0:4,0:4],' ...'))
print('Atomic, Molecular/Morpholotical (mordred) Fingerprints\n{}{}\n\n'.format(hfp.df_atomic_molecular_morphological.iloc[0:5,[0,1,-11,-10]],' ...'))

Atomic Fingerprints
                        10565946  38514050  77216203  98513984
hash-smiles_polymer                                           
Vmay8YstfdcJBTzeUsHs58       0.0       0.0       0.0       8.0
B2pqfR2y2n8T3mypvPUneQ       0.0       7.0       1.0       8.0
aqYuCc3GenJVbT4SmhYM4L       0.0       0.0       0.0       0.0
VDXBmiG9e8H3GCuJ9KGpSa       0.0       7.0       0.0      12.0 ...


Molecular and Morpholotical (mordred) Fingerprints
                               ABC      ABCGG  nAcid  nBase
hash-smiles_polymer                                        
Vmay8YstfdcJBTzeUsHs58  171.833397  74.401398      1      0
B2pqfR2y2n8T3mypvPUneQ  128.163636  55.588990      1      0
aqYuCc3GenJVbT4SmhYM4L   63.209905  31.886685      1      0
VDXBmiG9e8H3GCuJ9KGpSa  128.163636  58.354910      1      0 ...


Atomic, Molecular/Morpholotical (mordred) Fingerprints
                        10565946  38514050     SRW09      SRW10
hash-smiles_polymer                                         