In [8]:
import pandas as pd
import numpy as np
import pickle as pk
import os
import sys

from rdkit import Chem

from datetime import datetime
import matplotlib.pyplot as plt

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from xenonpy.descriptor import Fingerprints
import xenonpy
xenonpy.__version__

from tqdm.autonotebook import tqdm
from radonpy.core import poly, calc, const
from radonpy.ff.gaff2 import GAFF2
from radonpy.ff.descriptor import FF_descriptor
const.print_level = 1


'0.6.5'

### Prepare cyclic SMILES for polymers

The sample codes below demonstrate how to use radonpy to produce SMILES that represents a polymer that undergoes the following two operations:
1. Make N copies of the repeating unit of a polymer and connect them sequentially (head to tail) that results in a long polymer chain
2. Connect the head and tail of the long polymer chain to make a cyclic polymer used to mimic an infinitely long polymer chain

Note that the original polymer SMILES is assumed to have exactly two '*'s that represent the head and tail of a polymer

In [9]:
N_cyclic = 10 # number of repeating unit for the long polymer chain

smis_single = ['*C(CC)CC*', '*c1ccc(C*)cc1'] # two examples of polymer SMILES

smis_cyclic = [poly.make_cyclicpolymer(x, n=N_cyclic) for x in smis_single]


### Calculate descriptors for polymers and solvents

#### data preparation

Load data: in our data file, SMILES of polymers and solvents are connected with '_' initially. 

In [10]:
data = pd.read_csv('sample_data/data_Chi.csv', index_col=0)
smis_poly = []
smis_solv = []
for smi in data['ps_pair'].values:
    tmp = smi.split('_')
    smis_poly.append(tmp[0])
    smis_solv.append(tmp[1])
    

In [11]:
# extract the unique SMILES of polymers and solvents
uni_poly = np.unique(smis_poly)
uni_solv = np.unique(smis_solv)

print(f'Unique number of polymer SMILES: {uni_poly.shape[0]}')
print(f'Unique number of solvent SMILES: {uni_solv.shape[0]}')


Unique number of polymer SMILES: 46
Unique number of solvent SMILES: 140


To save time, we only calculate the unique SMILES and the assemble the descriptor for each polymer-solvent pair afterward.

In [12]:
# set up a dictionary for descriptor calculation
uni_smis = {'Polymer': uni_poly, 'Solvent': uni_solv}

# set up a dictionary to store all descriptors
desc_data = {}


Note that the final SMILES that we are using for the descriptor calculation contains 'H's explicitly. If the SMILES you have (for both the cyclic polymers and solvents) does not have explicit 'H' representation, please use the follow sample code to modify the SMILES.

In [13]:
smis_noHs = ['CCl', 'ClCCl']
smis_addHs = [Chem.MolToSmiles(Chem.AddHs(Chem.MolFromSmiles(x))) for x in smis_noHs]


#### calculate Force-field descriptors using radonpy

In [14]:
# parameters for force-field descriptors
nk = 20
sigma = 1/nk/2

for key, val in uni_smis.items():
    try:
        ff = GAFF2()
        ff_desc = FF_descriptor(ff, polar=True)
        desc_names = ff_desc.ffkm_desc_names(nk=nk)

        desc = ff_desc.ffkm_mp(list(val), nk=nk, s=sigma, cyclic=0)
            
        desc_data[f'{key}_ff'] = pd.DataFrame(desc, columns=[f'{key}_{x}' for x in desc_names], index=val)
        
        print(datetime.now())
        print(f'{key} done')
        
    except:
        print(f'{key} failed')
        pass
    
print('All done!')


RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for cl,c2,c3
RadonPy info: Using empirical angle parameters theta0 = 114.995000, k_angle = 62.562968 for

#### calculate descriptors from rdkit using xenonpy

In [15]:
%%time

print(datetime.now())
print('Program started...')
for key, val in uni_smis.items():
    mols = [Chem.MolFromSmiles(x) for x in val]
    
    desc_data[f'{key}_rdk'] = Fingerprints(featurizers = 'DescriptorFeature', input_type='mol', on_errors='nan').transform(mols)
    desc_data[f'{key}_rdk']['Ipc'] = np.log(desc_data[f'{key}_rdk']['Ipc'])
    desc_data[f'{key}_rdk'].index = val
    desc_data[f'{key}_rdk'].columns = [f'{key}_{x}' for x in desc_data[f'{key}_rdk'].columns]

    print(datetime.now())
    print(f'{key} done')


2024-08-26 08:59:28.990107
Program started...




2024-08-26 08:59:37.867020
Polymer done


please use MorganGenerator




2024-08-26 08:59:42.614186
Solvent done
CPU times: user 506 ms, sys: 822 ms, total: 1.33 s
Wall time: 13.6 s


  result = getattr(ufunc, method)(*inputs, **kwargs)


#### combine descriptors

In [16]:
# desc_final is in the same format as the descriptor dataframes that are stored in the sample_data folder
desc_final = pd.concat([desc_data['Polymer_ff'].loc[smis_poly,:].reset_index(drop=True),
                       desc_data['Polymer_rdk'].loc[smis_poly,:].reset_index(drop=True),
                       desc_data['Solvent_ff'].loc[smis_solv,:].reset_index(drop=True),
                       desc_data['Solvent_rdk'].loc[smis_solv,:].reset_index(drop=True)], axis=1)

print(desc_final)

desc_final.to_csv('demo_data.csv', index=False)

      Polymer_mass_H  Polymer_mass_C  Polymer_mass_N  Polymer_mass_O  \
0           0.586817        0.508618        0.439979        0.357449   
1           0.586817        0.508618        0.439979        0.357449   
2           0.586817        0.508618        0.439979        0.357449   
3           0.586817        0.508618        0.439979        0.357449   
4           0.586817        0.508618        0.439979        0.357449   
...              ...             ...             ...             ...   
1185        0.608521        0.608521        0.534774        0.438422   
1186        0.608521        0.608521        0.534774        0.438422   
1187        0.608521        0.608521        0.534774        0.438422   
1188        0.608521        0.608521        0.534774        0.438422   
1189        0.608521        0.608521        0.534774        0.438422   

      Polymer_mass_F  Polymer_mass_P  Polymer_mass_S  Polymer_mass_Cl  \
0           0.227742        0.081921        0.089032         0