# This Notebook will give an example how to create a fragmented interaction fingerprint (FIFI) for a set of docked compounds (sdf) and a protein (pdb)

In [1]:
import numpy as np
import pandas as pd
import os

## Load the protein pdb and ligand sdf
- using beta2-adrenergic receptor (adrb2) as an axample

In [2]:
from fifi_vicinity import load_pdb_file_with_amino_acid, load_mols_from_sdf
target_abv = "adrb2"


protein_file = load_pdb_file_with_amino_acid(f'file_source/{target_abv}_target.pdb')
ligand_file = load_mols_from_sdf(f'file_source/{target_abv}_ligands_train.sdf')

## Extracting the definition of proximal atoms
- use this when first creating FIFI / when there isn't any proximal atoms pickle yet

In [8]:
from fifi_vicinity import extract_close_atoms, ligand_interaction_comprehension

close_atom_list = extract_close_atoms(protein_file, ligand_file)
df_fragment = ligand_interaction_comprehension(close_atom_list, ligand_file)

processing 969/969

## Saving DF fragments for future use
- use this part if you want to save df_fragment as a local pickle file

In [5]:
from fifi_vicinity import dict2pickle

df_fragment_path = f'vicinity_atoms/' #path to save the file result
os.makedirs(df_fragment_path, exist_ok= True)
dict2pickle(df_fragment, f"{df_fragment_path}/{target_abv}.pickle")

## Loading DF fragments from previous work
- Use this if previous pickle file of df fragment has already been made and saved and doesn't need to be made again

In [3]:
df_savefile_path =  f'vicinity_atoms/{target_abv}.pickle'
df_fragment = pd.read_pickle(df_savefile_path)

## Generating FIFI FP
- Need df_fragments, protein pdb and ligands sdf file to be able to run
- It will save the bits in a pickle format. for FIFI-US each residue is stored in each pickle
- The dictionary for hash value and SMARTS also saved in the respective folder

In [5]:
from fifi_generating_fp import residue_dict_maker, generating_fifi_fp

ligand_depth = 2 #number of proximal atoms' neighbor to be included 
protein_file_name = f'file_source/{target_abv}_target.pdb' #filename of pdb
save_path = f"smarts_bits_v8/{target_abv}" #where to save the FIFI FP


os.makedirs(save_path, exist_ok= True)
aa_dict = residue_dict_maker(protein_file_name)
generating_fifi_fp(ligand_file, df_fragment, aa_dict, lig_depth = ligand_depth)

processing 316TYR 83/83 aa 968/969 ligands

## Loading FIFI pickle and make it into a dataframe, as an input of ML
- need folder path of the fifi pickle, and a list of included ligand compound names


In [18]:
from fifi_pickle_load import fifius_pickle2df

example_ligands = ["ch322", "ch351", "neg272547"]
fifi_us_pickle_path =  f'smarts_bits_v8/5.5/ecfp2_n2'

df_fifi = fifius_pickle2df(fifi_us_pickle_path, example_ligands)

In [19]:
print(df_fifi)

           27_0  27_1  27_2  27_3  27_4  40_0  40_1  40_2  40_3  40_4  ...  \
ch322         0     0     0     0     0     0     0     0     0     0  ...   
ch351         0     0     0     0     0     0     0     0     0     0  ...   
neg272547     0     0     0     0     0     0     0     0     0     0  ...   

           316_678  316_679  316_680  316_681  316_682  316_683  316_684  \
ch322            0        0        0        0        0        0        0   
ch351            0        0        0        0        0        0        0   
neg272547        0        0        0        0        0        0        0   

           316_685  316_686  316_687  
ch322            0        0        0  
ch351            0        0        0  
neg272547        0        0        0  

[3 rows x 23041 columns]


In [17]:
#if you rather want to get list of all compound names from the sdf, can use this code
from fifi_pickle_load import get_cpd_names_sdf

compound_names = get_cpd_names_sdf(ligand_file)
print(compound_names)

['ch322', 'ch323', 'ch324', 'ch325', 'ch327', 'ch330', 'ch332', 'ch334', 'ch336', 'ch339', 'ch341', 'ch342', 'ch343', 'ch344', 'ch345', 'ch347', 'ch349', 'ch350', 'ch351', 'neg272547', 'neg185882', 'neg100405', 'neg55418', 'neg261234', 'neg271644', 'neg210492', 'neg75718', 'neg172627', 'neg225650', 'neg270206', 'neg290722', 'neg175562', 'neg271484', 'neg86487', 'neg2016', 'neg136790', 'neg90181', 'neg321149', 'neg104234', 'neg30968', 'neg231787', 'neg181495', 'neg167641', 'neg68271', 'neg48577', 'neg105407', 'neg227665', 'neg134447', 'neg201315', 'neg197381', 'neg199400', 'neg259445', 'neg180390', 'neg105175', 'neg182246', 'neg107216', 'neg46380', 'neg221964', 'neg122116', 'neg268183', 'neg179616', 'neg251744', 'neg251745', 'neg106136', 'neg47788', 'neg221051', 'neg103907', 'neg235892', 'neg245437', 'neg162877', 'neg298924', 'neg74233', 'neg18066', 'neg165642', 'neg30603', 'neg17093', 'neg174315', 'neg69718', 'neg124743', 'neg224129', 'neg39394', 'neg320895', 'neg292022', 'neg85163', '