<a href="https://colab.research.google.com/github/GuerrSim96/Molecular_Dynamics_Simulation_with_Trifluoroethanol/blob/main/Molecular_Dynamics_Simulation_with_Trifluoroethanol.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@title ##### Mount the Drive
#@markdown # Click the last output URL
!sudo add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!sudo apt-get update -qq 2>&1 > /dev/null
!sudo apt -y install -qq google-drive-ocamlfuse 2>&1 > /dev/null
!google-drive-ocamlfuse

In [None]:
!sudo apt-get install -qq w3m # to act as web browser
!xdg-settings set default-web-browser w3m.desktop # to set default browser
%cd /content
!mkdir drive
%cd drive
!mkdir MyDrive
%cd ..
!google-drive-ocamlfuse /content/drive/MyDrive

In [None]:
#@title ##### Installing and Import
# Install of py3Dmol and biopython
!pip --quiet install biopython  # --quiet flag prevents printing of the output
!pip --quiet install py3Dmol
print('Istallazione di biopython, py3Dmol completata!!!')
# Time when the program begins
import time
from datetime import datetime
time_start = time.time()  # Get the time in seconds
date_time_start = datetime.fromtimestamp(time_start)  # To convert the time by seconds to date and time
with open('timer', 'x') as clk:
  clk.write(f'Start:\t\t{date_time_start}\n')
# Import parameters files
import os
import sys
import shutil as sh
path_mdp='/content/drive/MyDrive/MDS-pVT/md_files/mdp/'
par= os.listdir(path_mdp)
mdp=['ions.mdp','minim.mdp','nvt.mdp','npt.mdp','md.mdp']
fc=os.listdir('/content')
for p in mdp:
  if p not in par:
    print(f'{p} there is not in {path_mdp}')
    sys.exit  # Block the execution of the code
  else:
    print(p+' is already there')
  if p not in fc:
    sh.copy(path_mdp+p, '/content')  # Copy a file in the working directory
    print(f'-The file \"{p}\" has been moved successfully')
  else:
    print(f'-The file \"{p}\" is already in /content')
path_solvs='/content/drive/MyDrive/MDS-pVT/md_files/input/'
solvs=['trifluoroethanol.pdb', 'tip3p_added.pdb']
for solv in solvs:
  if solv not in fc:
    sh.copy(path_solvs+solv, '/content')  # Copy a file in the working directory
    print(f'-The file \"{solv}\" has been moved successfully')
  else:
    print(f'-The file \"{solv}\" is already in /content')

In [None]:
#@title # Get the Structure
from Bio.PDB import *
#@markdown Enter the identification code of the pdb structure in the line below:
pdb_id = "1jrj"  #@param {type:"string"}
pdbl= PDBList()
parser= PDBParser(PERMISSIVE=True, QUIET=True)
pdbl.retrieve_pdb_file(pdb_id, pdir='.', file_format ='pdb', overwrite=True)
os.rename(f'pdb{pdb_id.lower()}.ent', f'{pdb_id}.pdb')  # Convert the str id in lowercase
structure = parser.get_structure(pdb_id, f'{pdb_id}.pdb')
model = structure.get_models()
models= list(model)
input_f=models[0]
io= PDBIO()
io.set_structure(input_f)  # Get only one model from a NMR file
io.save("file.pdb")

In [None]:
#@title ##### Set Comunication
#to run GROMACS bin files
import subprocess
path_bin='/content/drive/MyDrive/MDS-pVT/gromacs-2023/bin/'
os.environ['PATH'] += f':{path_bin}'
# To understand
for root, directories, files in os.walk(path_bin):
    for dir in directories:
        os.chmod(os.path.join(root, dir), 0o777)
    for f in files:
        os.chmod(os.path.join(root, f), 0o777)
subprocess.Popen('source "$PATH"/GMXRC.bash', shell=True, executable='/bin/bash', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# to allow communication between bash and python
def gmx(cmd):
  '''To launch gmx process'''
  process = subprocess.Popen(cmd, shell=True, executable='/bin/bash', stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  output, error = process.communicate()
  print(f'Output: \n{output.decode("utf-8")} \n\n{error.decode("utf-8")}\n\n')

In [None]:
#@title # Generating Topology
# Removing water
!grep -v HOH file.pdb > file_clean.pdb
# Counting the sulfur bond
ss, line= 0, 'y \n'
with open(f'{pdb_id}.pdb', 'r') as pdb:
  for line in pdb:
    if 'SSBOND' in line:
      ss+=1
with open('options', 'x') as file:
  for x in range(ss):
    file.write(line)
# Prepare the topology with oplsaa ff and tip3p water model -ingh is a flag that we can use with a file.pdb generated from an NMR technique
pdb2gmx= 'gmx pdb2gmx -f file_clean.pdb -o file_processed.pdb -water tip3p -ff oplsaa -ignh -ss yes < options'
gmx(pdb2gmx)

In [None]:
#@title # Calculation of Box Dimensions
with open('options', 'w') as file:
   file.write('Backbone \n')
# Orient the structure with the major axis coincident with the x-axis
orient='gmx editconf -f file_processed.pdb -o file_oriented.pdb -princ < options'
gmx(orient)
x,y,z = [],[],[]
with open('file_oriented.pdb', 'r') as protein:
  for line in protein:
    if line.startswith("ATOM"): # extract x, y, z coordinates for carbon alpha atoms
      x_c = float(line[30:38].strip())
      y_c = float(line[38:46].strip())
      z_c = float(line[46:54].strip())
      if line[12:16].strip() == "CA":
        x.append(x_c)
        y.append(y_c)
        z.append(z_c)
x_max, x_min= x.index(max(x)), x.index(min(x))
y_max, y_min= y.index(max(y)), y.index(min(y))
z_max, z_min= z.index(max(z)), z.index(min(z))
dist_x=((x[x_min]-x[x_max])**2 + (y[x_min]-y[x_max])**2 + (z[x_min]-z[x_max])**2)**0.5 /10
dist_y=((x[y_min]-x[y_max])**2 + (y[y_min]-y[y_max])**2 + (z[y_min]-z[y_max])**2)**0.5 /10
dist_z=((x[z_min]-x[z_max])**2 + (y[z_min]-y[z_max])**2 + (z[z_min]-z[z_max])**2)**0.5 /10
x_l, y_l, z_l= int(dist_x*1.75)+1, int(dist_y*1.75)+1, int(dist_z*1.75)+1
os.environ['MAIN_AXIS_BOX']= str(x_l) #to trasfer a variable from python to bash
if y_l > z_l:
  os.environ['SEC_AXIS_BOX']= str(y_l)
else:
  os.environ['SEC_AXIS_BOX']= str(z_l)
# Define the box cell as triclinic and using the spc216 equilibrated 3-point solvent model
editconf='gmx editconf -f file_oriented.pdb -o file_newbox.pdb -bt triclinic -c -box "$MAIN_AXIS_BOX" "$SEC_AXIS_BOX" "$SEC_AXIS_BOX"'
gmx(editconf)

In [None]:
#@title # Solvation
#@markdown If you want trifluoroethanol in the system, flag the box below:
include_tfe= True #@param {type:'boolean'}
# if statement
if include_tfe: # True
  with open('options', 'w') as file:
    file.write('TFE \n')
  # Produces an #include file for a topology
  restr_tfe= 'gmx genrestr -f trifluoroethanol.pdb -o porse_tfe.itp < options'
  gmx(restr_tfe)
  # Find where insert the string #include file
  with open('topol.top', 'r') as top:
    info= top.readlines()
  with open('topol.top', 'r') as top:
    for num, line in enumerate(top, 1):
      if '#include "oplsaa.ff/ions.itp"' in line: # Modificare
        t_line= num
  add_info='\n;Include TFE topology \n#include "oplsaa.ff/trifluoroethanol.itp" \n \n;Include Position restraint file \n#ifdef POSRES_TFE \n#include "posre_tfe.itp" \n#endif \n'
  info.insert(t_line, add_info) # Write in topol.top
  with open('topol.top', 'w') as top:
    info= ''.join(info)
    top.write(info)
  # obtain the number of water molecules that fit in the box
  w4box='gmx solvate -cp file_newbox.pdb -cs spc216.gro -o w4box.pdb &> w4box'
  gmx(w4box)
  # To calculate the molecules number of tfe to add in the simulation box
  with open('w4box', 'r') as file:  # Search into the file
    for line in file:
      if 'Number of solvent molecules' in line:  # To get the number of the water molecules
        field = line.split()  # Divide the row in elements
        for element in field:
          if element.isdigit():  # Verify if the element is a number
            w_n= int(element)  # Convert from str into int
  from scipy.constants import Avogadro
  #@markdown Trifluoroethanol percentage (%v/v):
  tfe_percent= 30 #@param {type:"slider", min:10, max:100, step:10}
  w_percent= 100- tfe_percent # Water percentage
  w_vol_mol, tfe_vol_mol, w_mol= 0.01807, 0.07232, w_n/Avogadro  # Convert in moles
  w_vol= w_mol * w_vol_mol # Convert in volume
  tfe_vol, w_vol_new= w_vol * tfe_percent/100, w_vol * w_percent/100 # Calculate the volume
  tfe_mol, w_mol_new= tfe_vol / tfe_vol_mol, w_vol_new / w_vol_mol # Final moles
  tfe_n, w_n_new= int(tfe_mol * Avogadro), int(w_mol_new * Avogadro) # Number of trifluorethanol molecules and water molecules
  os.environ['MOLS_TFE'] = str(tfe_n)  # Trasfer variable from python to bash
  # Add trifluoroethanol in the system
  insert_tfe= 'gmx insert-molecules -f file_newbox.pdb -ci trifluoroethanol.pdb -nmol "$MOLS_TFE" -o file_tfe.pdb'
  gmx(insert_tfe)
  # Modify topology file
  with open('topol.top', 'a') as top:  # Add at the end of the file
    top.write(f'TFE {tfe_n} \n')  # Write the number of trifluoroethanol molecules
  os.environ['MOL_WATER'] = str(w_n_new)  # Trasfer variable from python to bash
  # Add water molecules to obrtain the correct proportion between trifluoroethanol and water
  solvate= 'gmx insert-molecules -f file_tfe.pdb -ci tip3p_added.pdb -nmol "$MOL_WATER" -try 20 -o file_tfe_solv.pdb &> added_water'
  gmx(solvate)
  # Verify if water molecules were added
  verific=[]
  with open('added_water', 'r') as file:  # Search into the file
    for line in file:
      if 'Added' in line:
        field = line.split()  # Divide the row in elements
        for element in field:
          if element.isdigit():  # Verify if the element is a number
            verific.append(int(element))  # Convert from str into int
  if verific[0] == verific[1]:
    print('All water molecules have been added in the simulation box!')
  else:  # Block the execution of the code
    sys.exit('the condition "w_added == diff" must be true to allow the execution of the code.')
  # Modify topol.top after added water molecules
  with open('topol.top', 'a') as top:  # Add at the end of the file
    top.write(f'SOL {w_n_new} \n')  # Write the number of water molecules
  # Set the variables for the cell below
  to_neutralize= 'file_tfe_solv.pdb'
  os.environ['TO_NEUTRALIZE']= to_neutralize
else: # False
  solvate='gmx solvate -cp file_newbox.pdb -cs spc216.gro -o file_solv.pdb -p topol.top'
  gmx(solvate)
  to_neutralize= 'file_solv.pdb'
  os.environ['TO_NEUTRALIZE']= to_neutralize

In [None]:
#@title #Neutralizing the System
!grep 'qtot' topol.top
# Generate the run input for genion
grompp_ions= 'gmx grompp -f ions.mdp -c "$TO_NEUTRALIZE" -p topol.top -o ions.tpr -maxwarn 1'
gmx(grompp_ions)
# Overwrite "options" file, it will be used ad imput insed of the std input
with open('options', 'w') as file:
  file.write('SOL \n')
# Replace some water molecules (SOL) with ions to remove net charge
genion= 'gmx genion -s ions.tpr -o file_solv_ions.pdb -p topol.top -rmin 0.32 -pname NA -nname CL -conc 0.15 -neutral < options'
gmx(genion)

In [None]:
#@title ##### Box System Visualization
import py3Dmol
view= py3Dmol.view()
view.addModel(open('file_solv_ions.pdb', 'r').read(),'pdb')  # File to screen
view.setBackgroundColor('Black')  # Background color
view.addStyle({'atom':'OW'},{'sphere':{'radius':'0.1'}})  # Style water molecules
view.zoomTo()
view.show()

In [None]:
#@title # Minimization
# Relax the system
grompp_minim, mdrun_minim= 'gmx grompp -f minim.mdp -c file_solv_ions.pdb -o em.tpr -maxwarn 1', 'gmx mdrun -deffnm em'
gmx(grompp_minim)
gmx(mdrun_minim)

In [None]:
#@title # Equilibration
# Equilibration phase I (under nVT/isothermal-isochoric/canonical) - stabilizes T
grompp_nvt, mdrun_nvt= 'gmx grompp -v -f nvt.mdp -c em.gro -r em.gro -p topol.top -o nvt.tpr -maxwarn 1', 'gmx mdrun -deffnm nvt'
gmx(grompp_nvt)
gmx(mdrun_nvt)
# Equilibration phase II (under npT/isothermal-isobaric) - stabilizes p
grompp_npt, mdrun_npt= 'gmx grompp -f npt.mdp -c nvt.gro -r nvt.gro -t nvt.cpt -p topol.top -o npt.tpr -maxwarn 2', 'gmx mdrun -deffnm npt'
gmx(grompp_npt)
gmx(mdrun_npt)
def pa_di(list_path):
  '''Function to create new directory into the drive, starting from a list of names.'''
  print(pa_di.__doc__)
  for dir in list_path:
    if os.path.isdir(dir) is True:
      print('-The directory \"'+ dir +'\" is already there')
    else:
      os.mkdir(dir)
      print('-The directory \"'+ dir +'\" has been created successfully')

In [None]:
#@title ##### Export to Drive
# Generating the output directory
lst_dir, path_simul= [], f'/content/drive/MyDrive/MDS-pVT/md_files/{pdb_id}'
lst_dir.append(path_simul)
path_par = path_simul + '/parameters/' # Subdirectory to separate the outputs
lst_dir.append(path_par)
pa_di(lst_dir)
here= os.getcwd() # Working directory
os.environ['DIRECTORY'] = '/content/'
if here != path_simul:
  os.chdir(path_simul) # Changing working directory
  print(f'\n-Now the working directory is \"{path_simul}\"')
else:
  print(f'\n-The working directory was already \"{path_simul}\"')
# Assemble the structure for a 10-ns MD simulation using md.mdp as a parameter file
gromp_md= 'gmx grompp -f "$DIRECTORY"md.mdp -c "$DIRECTORY"npt.gro -t "$DIRECTORY"npt.cpt -p "$DIRECTORY"topol.top -o md_1.tpr -maxwarn 2'
gmx(gromp_md)
# Move all the file to the working directory
fc_1, files_par, files_simul, trasfer= os.listdir('/content'), os.listdir(path_par), os.listdir(path_simul), ['timer']
for file in fc_1:
  if file.endswith('.top'):
    if file not in files_simul:
      trasfer.append(file)
    else:
      print('-The file \"'.upper() + file + '\" is already there!'.upper()) # It will be printed in UPPERCASE
  elif file.endswith('.mdp'):
    if file not in files_par:
      sh.copy(f'/content/{file}', path_par)
      print(f'-The file \"{file}\" has been moved successfully')
    else:
      print(f'-The file \"{file}\" is already there')
for file in trasfer:
  sh.copy(f'/content/{file}', path_simul)
  print(f'-The file \"{file}\" has been moved successfully')

In [None]:
#@title # MD Simulation
# Time when molecular dynamics begins
time_md_s= time.time()
date_time_md_s= datetime.fromtimestamp(time_md_s) # To convert the time by seconds to date and time
with open('timer', 'a') as clk:
  clk.write(f'MD start:\t{date_time_md_s}\n')
# Run the 10ns MD simulation
mdrun_md= 'gmx mdrun -deffnm md_1'
gmx(mdrun_md)
# Time when the first simulation ends
time_md_e= time.time()
date_time_md_e= datetime.fromtimestamp(time_md_e) # To convert the time by seconds to date and time
with open('timer', 'a') as clk:
  clk.write(f'MD 10ns:\t\t{date_time_md_e}\n')

In [None]:
#@title ##### Extended Simulation
#@markdown ## If you want add other 10 ns to the sumlation, flag the box below:
extend_simulation= False #@param {type:'boolean'}
#@markdown ###### (this is strongly discouraged if you do not have colab pro)
with open('options', 'w') as file:
    file.write('Protein \n System \n')
  # Puts the center of mass of molecules in the box
trjconv= 'gmx trjconv -s md_1.tpr -f md_1.xtc -o md_10ns_1_protPBC.xtc -pbc mol -center -skip 100 < options'
gmx(trjconv)
# if statement
if extend_simulation: # True
  # Extend the simulation for 10ns
  extend, mdrun_extend= 'gmx convert-tpr -s md_1.tpr -extend 10000 -o md_2.tpr', 'gmx mdrun -deffnm md_2 -cpi md_1.cpt -noappend'
  gmx(extend)
  gmx(mdrun_extend)
  # Puts the center of mass of molecules in the box
  trjconv_ex= 'gmx trjconv -s md_2.tpr -f md_2.xtc -o md_10ns_2_protPBC.xtc -pbc mol -center -skip 100 < options'
  gmx(trjconv_ex)
  # Merge the trajectories
  merge= 'gmx trjcat -f md_10ns_*.xtc -o md_20ns_protPBC.xtc'
  gmx(merge)
  # Time when the second simulation ends
  time_end= time.time()
  date_time_end= datetime.fromtimestamp(time_end) # To convert the time by seconds to date and time
  with open('timer', 'a') as clk:
    clk.write(f'MD 20ns:\t\t\t{date_time_end}\n')
# Time when the program ends
time_end= time.time()
date_time_end= datetime.fromtimestamp(time_end) # To convert the time by seconds to date and time
with open('timer', 'a') as clk:
  clk.write(f'End:\t\t\t{date_time_end}\n')