In [None]:
import SSC
from SSC import train, Analyzer

import numpy as np
import pandas as pd
from rdkit import Chem

# Train

In [None]:
"A single command for full pipeline of deep learning."
"It returns the path of the folder where the training results are saved."

model_path= train(data="SciData_emi", target=['Emi_eV'])

In [None]:
"SGC bascially adopts GAT for graph convolution, but you can try other algorithms by calling other networks."

# model_path= train(data="SciData_emi", target=['Emi_eV'], network= "SSC_DMPNN")

In [None]:
"Hyperparameter configurations are also declared as keyword arguments of train function."

# model_path= train(data="SciData_emi", target=['Emi_eV'], conv_layers= 6, hidden_dim=256)

# Predict & Analyze

In [None]:
# Analyzer handles the trained model. Getting the folder name as input, it loads the trained models.
# analyzer = Analyzer(model_path.split('/')[-1])
analyzer = Analyzer('trained_SSC_GAT')

In [None]:

smi = "S1C(C2=CC=C([N+](=O)[O-])S2)=CC=C1N(C)C"
smi = Chem.MolToSmiles(Chem.MolFromSmiles(smi))

In [None]:
# Analyzer supports the prediction for the pair of choromophore and solvent.
analyzer.predict(smi, "CCO")

In [None]:
# The parameters in SSC approach, including Reference Property (RP), Subgroup contribution (SC), and Proximity effect factor (PEF), are accessibe
RP = analyzer.get_score(smi, "CCO", 
                        key='RP', inverse_transform=True, is_key_relative=True)
SC = analyzer.get_score(smi, "CCO", 
                        key='SC', inverse_transform=False, is_key_relative=True)
PEF = analyzer.get_score(smi, "CCO", 
                        key='PEF', inverse_transform=False, is_key_relative=False)
print(SC)
print(PEF)

In [None]:
# These parameters can be visualized
result = analyzer.plot_score(smi, "CCO", 
                             key="SC", inverse_transform=False, is_key_relative=True, 
                            score_scaler = lambda x: (x-min(x))/(max(x)-min(x)) )

In [None]:
subgroups = analyzer.get_fragment(smi)
subgroup_smiles = [s.smiles for s in subgroups]

In [None]:
pd.DataFrame(
    {
     "Smiles":subgroup_smiles,
     "SC": SC.reshape(-1),
     "PEF": PEF.reshape(-1)
    }
)

# Calculate NSC

In [None]:
"To calculate NSC, you need to calculate the SC values in 20 solvents, and then subtract their average from SC values"

In [None]:
solvent_data = pd.read_csv("solvent_feature.csv")
solvent_data = solvent_data.sort_values("ET(30)")

SC_dict = {"Smiles": subgroup_smiles}
for solv in solvent_data['smiles']:
    SC_dict[solv]=analyzer.get_score(smi, solv, 
                        key='SC', inverse_transform=False, is_key_relative=True).reshape(-1)

In [None]:
SC_table = pd.DataFrame(SC_dict)
SC_table

In [None]:
NSC_table = SC_table.copy()
NSC_table.iloc[:,1:] = np.array(SC_table.iloc[:,1:])- np.mean(np.array(SC_table.iloc[:,1:]), axis=1).reshape(-1,1)
NSC_table