## Load Python Libraries, Model Configurations, Model Checkpoints and Dataset

In [None]:
# Standard libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from scipy.special import softmax

# PyTorch
import torch

# Configuration
from omegaconf import OmegaConf

# Visualization
import seaborn as sns

# Scikit-learn
from sklearn.preprocessing import MinMaxScaler

# RDKit core and general chemistry modules
from rdkit import Chem, RDLogger
from rdkit.Chem import AllChem, Draw, BRICS, Recap, rdReducedGraphs
from rdkit.Chem.rdmolops import FastFindRings
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.rdMolDescriptors import *

# DeepChem
from deepchem.utils.typing import RDKitMol
from deepchem.feat.base_classes import MolecularFeaturizer

# Local imports
from src.model.arcfdi import ArcDFI

conf = OmegaConf.load('./src/settings.yaml')['arcdfi']
model = ArcDFI.Model.load_from_checkpoint('./ArcDFI/checkpoints/arcdfi.ckpt', strict=True, conf=conf)
df = pd.read_csv('./ArcDFI/datasets/dfi_final.csv', index_col=0)

## Data Processing Code for Model Inference 

In [None]:
FEAT2DIM   = dict(morgan=1024,pharma=39972,maccs=167,erg=441,pubchem=881)

def check_compound_sanity(smiles):
    try:
        mol = Chem.MolFromSmiles(smiles) 
        return True if mol else False
    except:
        return False

def create_morgan_fingerprint(smiles, mol=None):
    if mol == None:
        mol = Chem.MolFromSmiles(smiles)
    mol.UpdatePropertyCache()
    FastFindRings(mol)

    return np.array(AllChem.GetMorganFingerprintAsBitVect(mol,2, nBits=1024)).reshape(1,-1)

def create_pharma_fingerprint(smiles, mol=None):
    if mol == None:
        mol = Chem.MolFromSmiles(smiles)
    mol.UpdatePropertyCache()
    FastFindRings(mol)

    return np.array(Generate.Gen2DFingerprint(mol, Gobbi_Pharm2D.factory)).reshape(1,-1)

def create_maccs_fingerprint(smiles, mol=None):
    if mol == None:
        mol = Chem.MolFromSmiles(smiles)
    mol.UpdatePropertyCache()

    return np.array(GetMACCSKeysFingerprint(mol)).reshape(1,-1)

def create_erg_fingerprint(smiles, mol=None):
    if mol == None:
        mol = Chem.MolFromSmiles(smiles)
    mol.UpdatePropertyCache()
    
    return np.array(rdReducedGraphs.GetErGFingerprint(mol)).reshape(1,-1)

def get_all_compound_features(dcomp_smiles, fcomp_smiles, dcomp_mol=None, fcomp_mol=None):
        try:
            return dict(
                dcomp_morgan_fp=create_morgan_fingerprint(dcomp_smiles, dcomp_mol),
                dcomp_maccs_fp=create_maccs_fingerprint(dcomp_smiles, dcomp_mol),
                dcomp_erg_fp=create_erg_fingerprint(dcomp_smiles, dcomp_mol),
                fcomp_morgan_fp=create_morgan_fingerprint(fcomp_smiles, fcomp_mol),
                fcomp_maccs_fp=create_maccs_fingerprint(fcomp_smiles, fcomp_mol),
                fcomp_erg_fp=create_erg_fingerprint(fcomp_smiles, fcomp_mol)
                )
        except Exception as e:
            print(e)
            return None

def tokenize(matrix, padding_idx=1024):
    tokenized_indices = [torch.nonzero(row).squeeze(1) for row in matrix]
    max_length        = max(len(indices) for indices in tokenized_indices)
    padded_tensor = torch.full((len(tokenized_indices), max_length), fill_value=padding_idx)

    for i, indices in enumerate(tokenized_indices):
        padded_tensor[i, :len(indices)] = indices

    padding_mask = (padded_tensor != padding_idx).float()

    assert padded_tensor.shape[1] == padding_mask.shape[1]

    return padded_tensor, padding_mask

def get_substructures_morgan(comp_smiles):
    mol = Chem.MolFromSmiles(comp_smiles)
    if mol == None:
        mol = Chem.MolFromSmiles(comp_smiles)
    mol.UpdatePropertyCache()
    FastFindRings(mol)

    bitInfo = {}
    fp = AllChem.GetMorganFingerprintAsBitVect(mol,2, nBits=1024, bitInfo=bitInfo)

    highlight_atoms ={ }
    
    substructures = {}
    for bit, atoms_radius in bitInfo.items():
        for atom_idx, rad in atoms_radius:
            # Create a substructure (submol) containing the atoms associated with the bit
            env = Chem.FindAtomEnvironmentOfRadiusN(mol, rad, atom_idx)
            atoms = set()
            for bond in env:
                atoms.add(mol.GetBondWithIdx(bond).GetBeginAtomIdx())
                atoms.add(mol.GetBondWithIdx(bond).GetEndAtomIdx())
            
            # Create the substructure (submol) for these atoms
            submol = Chem.PathToSubmol(mol, env)
            smiles_substructure = Chem.MolToSmiles(submol)
            # print(env,submol)
            # Store the substructure SMILES with its corresponding bit
            substructures[bit%1024] = smiles_substructure

            # For visualization
            matches = mol.GetSubstructMatches(submol)
            if smiles_substructure != '':
                highlight_atoms[bit%1024] = [i for match in matches for i in match] 
    
    return substructures, highlight_atoms, mol

def make_inference_data(**kwargs):
    data_instance                      = get_all_compound_features(kwargs['drugcompound_smiles'], kwargs['foodcompound_smiles'])
    data_instance['pair_id']           = kwargs['drugcompound_id'] + ' & ' + kwargs['foodcompound_id']
    data_instance['dcomp_id']          = kwargs['drugcompound_id']
    data_instance['fcomp_id']          = kwargs['foodcompound_id']
    data_instance['dcomp_smiles']      = kwargs['drugcompound_smiles']
    data_instance['fcomp_smiles']      = kwargs['foodcompound_smiles']
    data_instance['y_dfi_label']       = np.array([0])
    data_instance['dcomp_dci_labels']  = np.zeros(10).reshape(1,-1)
    data_instance['dcomp_dci_masks']   = np.zeros(10).reshape(1,-1)

    input_dict                         = dict()
    input_dict['dcomp_id']             = [data_instance['dcomp_id']]
    input_dict['fcomp_id']             = [data_instance['fcomp_id']]
    input_dict['dcomp_smiles']         = [data_instance['dcomp_smiles']]
    input_dict['fcomp_smiles']         = [data_instance['fcomp_smiles']]
    
    input_dict['dcomp_morgan_fp']      = torch.tensor(data=data_instance['dcomp_morgan_fp'],dtype=torch.float32)
    input_dict['dcomp_maccs_fp']       = torch.tensor(data=data_instance['dcomp_maccs_fp'], dtype=torch.float32)
    input_dict['dcomp_erg_fp']         = torch.tensor(data=data_instance['dcomp_erg_fp'],   dtype=torch.float32)
    input_dict['fcomp_morgan_fp']      = torch.tensor(data=data_instance['fcomp_morgan_fp'],dtype=torch.float32)
    input_dict['fcomp_maccs_fp']       = torch.tensor(data=data_instance['fcomp_maccs_fp'], dtype=torch.float32)
    input_dict['fcomp_erg_fp']         = torch.tensor(data=data_instance['fcomp_erg_fp'],   dtype=torch.float32)
    
    input_dict['dcomp_morgan_words'], input_dict['dcomp_morgan_masks'] = tokenize(input_dict['dcomp_morgan_fp'], FEAT2DIM['morgan'])
    input_dict['dcomp_maccs_words'],  input_dict['dcomp_maccs_masks']  = tokenize(input_dict['dcomp_maccs_fp'], FEAT2DIM['maccs'])
    input_dict['dcomp_erg_words'],    input_dict['dcomp_erg_masks']    = tokenize(input_dict['dcomp_erg_fp'], FEAT2DIM['erg'])
    input_dict['fcomp_morgan_words'], input_dict['fcomp_morgan_masks'] = tokenize(input_dict['fcomp_morgan_fp'], FEAT2DIM['morgan'])
    input_dict['fcomp_maccs_words'],  input_dict['fcomp_maccs_masks']  = tokenize(input_dict['fcomp_maccs_fp'], FEAT2DIM['maccs'])
    input_dict['fcomp_erg_words'],    input_dict['fcomp_erg_masks']    = tokenize(input_dict['fcomp_erg_fp'], FEAT2DIM['erg'])  
    
    input_dict['y_dfi_label']      = torch.tensor(data=data_instance['y_dfi_label'],      dtype=torch.float32)
    input_dict['dcomp_dci_labels'] = torch.tensor(data=data_instance['dcomp_dci_labels'], dtype=torch.float32)
    input_dict['dcomp_dci_masks']  = torch.tensor(data=data_instance['dcomp_dci_masks'],  dtype=torch.float32)

    return input_dict

## Inference Code for Predicted Drug-Food Interaction Matrix

In [None]:
def dfi_prediction(ext_drug, ext_food):
    torch.cuda.empty_cache()
    
    ext_drug_substructures, ext_drug_atomhighlights, mol_drug = get_substructures_morgan(ext_drug[1])
    ext_food_substructures, ext_food_atomhighlights, mol_food = get_substructures_morgan(ext_food[1])
    
    input_dict = make_inference_data(drugcompound_id=ext_drug[0], 
                                     foodcompound_id=ext_food[0],
                                     drugcompound_smiles=ext_drug[1],
                                     foodcompound_smiles=ext_food[1])

    attn_weights = dict()
    model.eval()
    model.freeze()
    output_dict = model.infer(input_dict)

    return output_dict

def make_dfi_matrix(df):
    list_predictions    = []

    for _, dfi_data in df.iterrows():
        output_dict = dfi_prediction((dfi_data.Drug_Name,dfi_data.Drug_Smiles), (dfi_data.Food_Name,dfi_data.Food_Smiles))
        list_predictions.append(output_dict['yhat_dfi'].item())

    df['Prediction_Score'] = list_predictions
    df['Prediction_Label'] = df.Prediction_Score > 0.5

    return df

In [None]:
dfi_dataframe = pd.read_csv('dfi_external.csv', index_col=0)
dfi_dataframe = dfi_dataframe[dfi_dataframe.Drug_Smiles!='nothing']
dfi_dataframe = dfi_dataframe[dfi_dataframe.Food_Smiles!='nothing']
dfi_dataframe.shape

In [None]:
dfi_dataframe

In [None]:
dfi_matrix    = make_dfi_matrix(dfi_dataframe)
dfi_matrix.head()

In [None]:
dfi_matrix