# PROJECT

### Download of the NSP13 Protein (6zsl comformation)

In [1]:
import Bio
from Bio.PDB import PDBList
from pathlib import Path

def retrive_nsp13(structure="6zsl"):
    """Download a NSP13 structure from the PDB
    Returns
    -------
    structure : .cif file of the structure
    """
    pdbl = PDBList()
    pdbl.download_pdb_files([structure], pdir="NSP13")
    #MEET_U --> meetu2024
    fpath = Path.home() / f"Desktop/M2/S1/meetu2024/2023-2024_Team4-SU/NSP13/{structure}.cif"
    with open(fpath) as f:
        return f.readlines()
    

nsp13 = retrive_nsp13()

#Display of the protein
#nsp13

Structure exists: 'NSP13\6zsl.cif' 




### VISUALIZATION WITH PYMOL

In [3]:
import pymol
import cmd 

#Visualization w Pymol

def visualize_protein(pdb_code):
    """
    Visualize a protein structure using PyMOL.

    Parameters:
    - pdb_code (str): The PDB code of the protein.

    Example:
    visualize_protein("6zsl")
    """
    # Start PyMOL
    pymol.finish_launching(['pymol', '-qc'])

    # Fetch the protein structure
    cmd.fetch(pdb_code)

    # Show the protein as a ribbon
    cmd.show("cartoon")

    # Display the protein structure
    cmd.zoom()

    # Keep the PyMOL window open
    cmd.rock()

#visualize_protein(nsp13)

PyMOL failed to start, dumping diagnostics...
pymol-2.5.7-py312h1880af8_0.json
conda build: py312h1880af8_0 https://conda.anaconda.org/schrodinger/win-64
pymol-bundle-2.5.7-0.json
conda build: 0 https://conda.anaconda.org/schrodinger/win-64
pymol-upgrade-helper-1.1-1.json
conda build: 1 https://conda.anaconda.org/schrodinger/win-64
pymol-web-examples-2.4-1.json
conda build: 1 https://conda.anaconda.org/schrodinger/noarch

Operating System:
Windows-11-10.0.22621-SP0
PyQt5 5.15.10 (Qt 5.15.2)

Python:
3.12.1 | packaged by conda-forge | (main, Dec 23 2023, 07:53:56) [MSC v.1937 64 bit (AMD64)]
prefix=c:\Users\LALIN\anaconda3\envs\pymol
executable=c:\Users\LALIN\anaconda3\envs\pymol\python.exe
filesystemencoding=utf-8

Qt, Python and PyMOL Environment Variables:
PYDEVD_IPYTHON_COMPATIBLE_DEBUGGING=1
PYDEVD_USE_FRAME_EVAL=NO
PYMOL_DATA=c:\Users\LALIN\anaconda3\envs\pymol\share\pymol\data
PYMOL_HOME=C:\Users\LALIN\AppData\Local\Schrodinger\PyMOL2
PYMOL_PATH=c:\Users\LALIN\anaconda3\envs\pymo

ImportError: DLL load failed while importing _cmd: Le module spécifié est introuvable.

### POCKET DETECTION (P2RANK)

1. Pocket Generation

In [4]:
#Pocket creation script 

import subprocess

# Path to the p2rank script (prank.sh)
p2rank_path = './p2rank/prank.sh'

# Path to Git Bash for the Windows environment (to be replaced or improved if necessary)
bash_path = 'c:/PROGRA~1/Git/bin/bash.exe'  # Make sure to set the correct path

# Folder to store pocket prediction results
output_folder = './pockets'

# Command to execute p2rank and predict pockets
command = f'{bash_path} {p2rank_path} predict -f ./NSP13/6zsl.cif -o {output_folder}'

# Execution of the command
subprocess.run(command, shell=True)


CompletedProcess(args='c:/PROGRA~1/Git/bin/bash.exe ./p2rank/prank.sh predict -f ./NSP13/6zsl.cif -o ./pockets', returncode=1)

2. Display of the pockets of the 6ZSL comformation

In [5]:
#Reading / Display of the pockets

import pandas as pd

csv = f'./pockets/6zsl.cif_predictions.csv'
r_csv = pd.read_csv(csv)

df = pd.DataFrame(r_csv)
print(df.head(2))

   name         rank     score   probability   sas_points   surf_atoms  \
0  pocket1         1     16.76         0.786           95           54   
1  pocket2         2     12.94         0.668           84           38   

      center_x     center_y     center_z  \
0     -13.6826      11.0843     -74.6564   
1     -18.4473      34.5627     -22.8999   

                                         residue_ids  \
0   A_284 A_285 A_287 A_288 A_289 A_290 A_312 A_3...   
1   B_284 B_285 B_287 B_288 B_289 B_313 B_316 B_3...   

                                       surf_atom_ids  
0   6511 6517 6518 6519 6520 6528 6529 6539 6540 ...  
1   2116 2122 2123 2133 2134 2137 2141 2144 2145 ...  


3. Selection of the binding pockets

### LIGAND PREPARATION

1. Downloading the ligands from the ZINC database w the script

In [11]:
import subprocess

file = f'./ligands-downloader/ligand_downloader.py'

# Exécution du script
process = subprocess.run(['python', file], capture_output=True, text=True)


2. Display of the downloaded ligands 

In [12]:
import os

l_file = f'./ligands/zinc_database/'

#Command to read all the zinc files that were downloaded and put them in a dataframe 
ligands_rep = [lig for lig in os.listdir(l_file) if os.path.isfile(os.path.join(l_file, lig))]
pd_ligrep = pd.DataFrame(ligands_rep)

#Display of the first 3 (to make the file more readable)
print(pd_ligrep.head(3))
#for nom_dossier in noms_dossiers:
#    print(ligands_rep)

                      0
0  ZINC001485529916.sdf
1  ZINC001485529917.sdf
2  ZINC001485529920.sdf


3. Preparation of the ligands for the docking with Rdkit

In [15]:
from rdkit import Chem
from rdkit.Chem import AllChem
import os

#Lien avec le fichier csv contenant la liste des ligands à télécharger
csv_file = './ligands/ligands_downloader/csv_files/substances.csv'  
df = pd.read_csv(csv_file,sep = ';')

# Charger les fichiers SDF et préparer les ligands qu'on a téléchargé
ligands_folder = './ligands/zinc_database/'
os.makedirs(ligands_folder, exist_ok=True)

for zinc_id in df['zinc_id']:
    sdf_file = f'{ligands_folder}/{zinc_id}.sdf'

    # Charger les molécules depuis le fichier SDF
    suppl = Chem.SDMolSupplier(sdf_file)
    ligands = [mol for mol in suppl if mol is not None]

    # Ajout des hydrogènes explicites et génération des conformations 3D
    for mol in ligands:
        mol = Chem.AddHs(mol)
        AllChem.EmbedMolecule(mol)

    # Sauvegarder les molécules modifiées
    modified_sdf_file = f'./ligands/zinc_db_prepared/{zinc_id}_prepared.sdf'
    w = Chem.SDWriter(modified_sdf_file)
    for mol in ligands:
        w.write(mol)
    w.close()

4. Display of the prepared ligands

In [13]:
import os

l_file = f'./ligands/zinc_db_prepared/'

#Command to read all the zinc files that were downloaded and put them in a dataframe 
pligpd_rep = [lig for lig in os.listdir(l_file) if os.path.isfile(os.path.join(l_file, lig))]
pd_ligpd = pd.DataFrame(pligpd_rep)

#Display of the first 3 (to make the file more readable)
print(pd_ligpd.head(3))
#for nom_dossier in noms_dossiers:
#    print(ligands_rep)

                               0
0  ZINC001485529916_prepared.sdf
1  ZINC001485529917_prepared.sdf
2  ZINC001485529920_prepared.sdf


### DOCKING ENGINE

In [None]:
#The scripts>> vina