<a href="https://colab.research.google.com/github/cbedart/S-DISCO/blob/main/S_DISCO_Protein_Modeling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**<center><h1>S-DISCO - Protein modeling</h1></center>**



---



## Software installation

In [4]:
!wget https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
!tar -zxf modeller-10.4.tar.gz
!echo "MODELLER extraction completed"
%cd modeller-10.4
with open('modeller_config', 'a') as f:
  f.write("2\n")
  f.write("/content/compiled/MODELLER\n")
  f.write("MODELIRANJE\n")
!./Install < modeller_config
!echo "MODELLER set up completed"
%cd /content/

%cd modeller-10.4
!ln -sf /content/compiled/MODELLER/bin/mod10.4 /usr/bin/
%cd /content/

!mod10.4 | awk 'NR==1{if($1=="usage:") print "MODELLER succesfully installed"; else if($1!="usage:") print "Something went wrong. Please install again"}'

!pip install biopython
!pip install py3Dmol

--2023-11-15 13:25:58--  https://salilab.org/modeller/10.4/modeller-10.4.tar.gz
Resolving salilab.org (salilab.org)... 169.230.79.19
Connecting to salilab.org (salilab.org)|169.230.79.19|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 38244158 (36M) [application/x-gzip]
Saving to: ‘modeller-10.4.tar.gz.1’


2023-11-15 13:26:00 (26.4 MB/s) - ‘modeller-10.4.tar.gz.1’ saved [38244158/38244158]

MODELLER extraction completed
/content/modeller-10.4
[H[2JInstallation of MODELLER 10.4

This script will install MODELLER 10.4 into a specified directory
for which you have read/write permissions.

To accept the default answers indicated in [...], press <Enter> only.

------------------------------------------------------------------------

The currently supported architectures are as follows:

   1) Linux x86 PC (e.g. RedHat, SuSe).
   2) x86_64 (Opteron/EM64T) box (Linux).
   3) Alternative x86 Linux binary (e.g. for FreeBSD).
   4) Linux on 32-bit ARM (e.g. for Raspb

In [5]:
import os
from pathlib import Path
import py3Dmol
from Bio import SeqIO, Entrez
from Bio.PDB import *
import ipywidgets as widgets
from IPython.display import display
from google.colab import files
import shutil

## References selection

Select your 3 PDB IDs:

In [None]:
musculus = 'XXXX' # @param {type:"string"}
norvegicus = 'XXXX' # @param {type:"string"}
lanceolatum = 'XXXX' # @param {type:"string"}

musculus = musculus.lower()
norvegicus = norvegicus.lower()
lanceolatum = lanceolatum.lower()

templates = [musculus, norvegicus, lanceolatum]
templates = [i.lower() for i in templates]
pdbl = PDBList()
for s in templates:
  pdbl.retrieve_pdb_file(s, pdir='.', file_format ="pdb", overwrite=True)
  os.rename("pdb"+s+".ent", s+".pdb")

## MODELLER

In [None]:
text_area1 = widgets.Textarea(layout=widgets.Layout(width='700px', height='120px'))
label = widgets.Label(value='Enter your sequence to build:')
hbox = widgets.HBox([label, text_area1])
display(hbox)

def submit_form1(sender):
  global sequence_tobuild
  sequence_tobuild = text_area1.value

  with open("target.fasta","w") as fileout:
    fileout.write(">target\n{0}".format(sequence_tobuild))
  print("Submitted !")

submit_button = widgets.Button(description='Submit')
submit_button.on_click(submit_form1)
display(submit_button)

HBox(children=(Label(value='Enter your sequence to build:'), Textarea(value='', layout=Layout(height='120px', …

Button(description='Submit', style=ButtonStyle())

## Align the sequences from the PDB structures with the target sequence


These block of codes will align automatically the target sequence with the reference sequence from the PDB structure. MODELLER requires a specific alignment format (PIR format) which is quite difficult to obtain outside MODELLER. You can convert your FASTA format alignment into a PIR file with some online converters, but it is safer to use the proper alignment tool from MODELLER for the practical sessions.


(Note: The "*'import site' failed; use -v for traceback*" line is not problematic, and is caused by the use of Colab. Don't take it into account)

In [None]:
cmd_alignment_musculus = "from modeller import *\nenv = environ()\naln = alignment(env)\nmdl = model(env, file='{0}', \
model_segment=('FIRST:A','LAST:A'))\naln.append_model(mdl, align_codes='{0}A', atom_files='{0}.pdb')\naln.append(file=\
'target.fasta', align_codes='target', alignment_format='FASTA')\naln.align2d()\naln.write(file='aligned_{0}.fasta', alignment_format=\
'FASTA')\naln.write(file='aligned_{0}.ali', alignment_format='PIR')\naln.write(file='aligned_{0}.pap', alignment_format='PAP')\n".format(musculus)
with open("alignment_musculus.py","w") as fileout:
  fileout.write(cmd_alignment_musculus)
!mod10.4 alignment_musculus.py
print("Done !")

In [None]:
cmd_alignment_norvegicus = "from modeller import *\nenv = environ()\naln = alignment(env)\nmdl = model(env, file='{0}', \
model_segment=('FIRST:A','LAST:A'))\naln.append_model(mdl, align_codes='{0}A', atom_files='{0}.pdb')\naln.append(file=\
'target.fasta', align_codes='target', alignment_format='FASTA')\naln.align2d()\naln.write(file='aligned_{0}.fasta', alignment_format=\
'FASTA')\naln.write(file='aligned_{0}.ali', alignment_format='PIR')\naln.write(file='aligned_{0}.pap', alignment_format='PAP')\n".format(norvegicus)
with open("alignment_norvegicus.py","w") as fileout:
  fileout.write(cmd_alignment_norvegicus)
!mod10.4 alignment_norvegicus.py
print("Done !")

In [None]:
cmd_alignment_lanceolatum = "from modeller import *\nenv = environ()\naln = alignment(env)\nmdl = model(env, file='{0}', \
model_segment=('FIRST:A','LAST:A'))\naln.append_model(mdl, align_codes='{0}A', atom_files='{0}.pdb')\naln.append(file=\
'target.fasta', align_codes='target', alignment_format='FASTA')\naln.align2d()\naln.write(file='aligned_{0}.fasta', alignment_format=\
'FASTA')\naln.write(file='aligned_{0}.ali', alignment_format='PIR')\naln.write(file='aligned_{0}.pap', alignment_format='PAP')\n".format(lanceolatum)
with open("alignment_lanceolatum.py","w") as fileout:
  fileout.write(cmd_alignment_lanceolatum)
!mod10.4 alignment_lanceolatum.py
print("Done !")

## Build the models

These blocks of code will create a new directory in /content, named after the species name (as an example, /content/musculus for your musculus model). Inside this directory, you will find all the working files: you can take a look at them, especially the *build_musculus.log* file, or the *target.DXXXXX* files, it's really interesting !

</br>

Every block of code will take approximatively ~3min30 to run, with the creation, optimization and minimization of 10 models to download automatically the best one. You can change this value to a lower number (to get results as fast as possible, 10 is enough), but it is recommended to create around 50 models per run (so ~15 min with Google Colab)

</br>

(Note: The "*'import site' failed; use -v for traceback*" line is not problematic, and is caused by the use of Colab. Don't take it into account)

Shortcut to /content/musculus

In [None]:
number_models_musculus = 10

cmd_build_musculus = "from modeller import *\nfrom modeller.automodel import *\n\nenv = environ()\na = automodel(env, alnfile='aligned_{0}.ali', knowns='{0}A', sequence='target', assess_methods=(assess.DOPE, assess.GA341))\na.starting_model = 1\na.ending_model = {1}\na.make()\n\nok_models = filter(lambda x: x['failure'] is None, a.outputs)\n\nkey = 'DOPE score'\nok_models.sort(lambda a,b: cmp(a[key], b[key]))\n\nm = ok_models[0]\nprint 'Top model: %s (DOPE score %.3f)' % (m['name'], m[key])".format(musculus, number_models_musculus)
with open("build_musculus.py","w") as fileout:
  fileout.write(cmd_build_musculus)

os.makedirs("/content/musculus", exist_ok = True)
!cd /content/musculus
shutil.copyfile("/content/build_musculus.py", "/content/musculus/build_musculus.py")
shutil.copyfile("/content/aligned_{0}.ali".format(musculus), "/content/musculus/aligned_{0}.ali".format(musculus))
shutil.copyfile("/content/{0}.pdb".format(musculus), "/content/musculus/{0}.pdb".format(musculus))

os.chdir("/content/musculus")
!mod10.4 build_musculus.py
print("Done !")

with open('build_musculus.log', 'r') as f:
    last_line = f.readlines()[-2]
    best_musculus = last_line.split(" ")[2]
    print(last_line)

!cp $best_musculus best_musculus.pdb
!cp best_musculus.pdb /content/best_musculus.pdb
os.chdir("/content")
files.download('best_musculus.pdb')

Shortcut to /content/norvegicus

In [None]:
number_models_norvegicus = 10

cmd_build_norvegicus = "from modeller import *\nfrom modeller.automodel import *\n\nenv = environ()\na = automodel(env, alnfile='aligned_{0}.ali', knowns='{0}A', sequence='target', assess_methods=(assess.DOPE, assess.GA341))\na.starting_model = 1\na.ending_model = {1}\na.make()\n\nok_models = filter(lambda x: x['failure'] is None, a.outputs)\n\nkey = 'DOPE score'\nok_models.sort(lambda a,b: cmp(a[key], b[key]))\n\nm = ok_models[0]\nprint 'Top model: %s (DOPE score %.3f)' % (m['name'], m[key])".format(norvegicus, number_models_norvegicus)
with open("build_norvegicus.py","w") as fileout:
  fileout.write(cmd_build_norvegicus)

os.makedirs("/content/norvegicus", exist_ok = True)
!cd /content/norvegicus
shutil.copyfile("/content/build_norvegicus.py", "/content/norvegicus/build_norvegicus.py")
shutil.copyfile("/content/aligned_{0}.ali".format(norvegicus), "/content/norvegicus/aligned_{0}.ali".format(norvegicus))
shutil.copyfile("/content/{0}.pdb".format(norvegicus), "/content/norvegicus/{0}.pdb".format(norvegicus))

os.chdir("/content/norvegicus")
!mod10.4 build_norvegicus.py
print("Done !")

with open('build_norvegicus.log', 'r') as f:
    last_line = f.readlines()[-2]
    best_norvegicus = last_line.split(" ")[2]
    print(last_line)

!cp $best_norvegicus best_norvegicus.pdb
!cp best_norvegicus.pdb /content/best_norvegicus.pdb
os.chdir("/content")
files.download('best_norvegicus.pdb')

Shortcut to /content/lanceolatum

In [None]:
number_models_lanceolatum = 10

cmd_build_lanceolatum = "from modeller import *\nfrom modeller.automodel import *\n\nenv = environ()\na = automodel(env, alnfile='aligned_{0}.ali', knowns='{0}A', sequence='target', assess_methods=(assess.DOPE, assess.GA341))\na.starting_model = 1\na.ending_model = {1}\na.make()\n\nok_models = filter(lambda x: x['failure'] is None, a.outputs)\n\nkey = 'DOPE score'\nok_models.sort(lambda a,b: cmp(a[key], b[key]))\n\nm = ok_models[0]\nprint 'Top model: %s (DOPE score %.3f)' % (m['name'], m[key])".format(lanceolatum, number_models_lanceolatum)
with open("build_lanceolatum.py","w") as fileout:
  fileout.write(cmd_build_lanceolatum)

os.makedirs("/content/lanceolatum", exist_ok = True)
!cd /content/lanceolatum
shutil.copyfile("/content/build_lanceolatum.py", "/content/lanceolatum/build_lanceolatum.py")
shutil.copyfile("/content/aligned_{0}.ali".format(lanceolatum), "/content/lanceolatum/aligned_{0}.ali".format(lanceolatum))
shutil.copyfile("/content/{0}.pdb".format(lanceolatum), "/content/lanceolatum/{0}.pdb".format(lanceolatum))

os.chdir("/content/lanceolatum")
!mod10.4 build_lanceolatum.py
print("Done !")

with open('build_lanceolatum.log', 'r') as f:
    last_line = f.readlines()[-2]
    best_lanceolatum = last_line.split(" ")[2]
    print(last_line)

!cp $best_lanceolatum best_lanceolatum.pdb
!cp best_lanceolatum.pdb /content/best_lanceolatum.pdb
os.chdir("/content")
files.download('best_lanceolatum.pdb')

# Generic use of the tool

In [None]:
pdb_id = 'XXXX' # @param {type:"string"}

generic = pdb_id

generic = generic.lower()

templates = [generic]
templates = [i.lower() for i in templates]
pdbl = PDBList()
for s in templates:
  pdbl.retrieve_pdb_file(s, pdir='.', file_format ="pdb", overwrite=True)
  os.rename("pdb"+s+".ent", s+".pdb")

In [11]:
text_area1 = widgets.Textarea(layout=widgets.Layout(width='700px', height='120px'))
label = widgets.Label(value='Enter your sequence to build:')
hbox = widgets.HBox([label, text_area1])
display(hbox)

def submit_form1(sender):
  global sequence_tobuild
  sequence_tobuild = text_area1.value

  with open("target.fasta","w") as fileout:
    fileout.write(">target\n{0}".format(sequence_tobuild))
  print("Submitted !")

submit_button = widgets.Button(description='Submit')
submit_button.on_click(submit_form1)
display(submit_button)

HBox(children=(Label(value='Enter your sequence to build:'), Textarea(value='', layout=Layout(height='120px', …

Button(description='Submit', style=ButtonStyle())

In [None]:
cmd_alignment_generic = "from modeller import *\nenv = environ()\naln = alignment(env)\nmdl = model(env, file='{0}', \
model_segment=('FIRST:A','LAST:A'))\naln.append_model(mdl, align_codes='{0}A', atom_files='{0}.pdb')\naln.append(file=\
'target.fasta', align_codes='target', alignment_format='FASTA')\naln.align2d()\naln.write(file='aligned_{0}.fasta', alignment_format=\
'FASTA')\naln.write(file='aligned_{0}.ali', alignment_format='PIR')\naln.write(file='aligned_{0}.pap', alignment_format='PAP')\n".format(generic)
with open("alignment_generic.py","w") as fileout:
  fileout.write(cmd_alignment_generic)
!mod10.4 alignment_generic.py
print("Done !")

In [None]:
number_models_generic = 10

cmd_build_generic = "from modeller import *\nfrom modeller.automodel import *\n\nenv = environ()\na = automodel(env, alnfile='aligned_{0}.ali', knowns='{0}A', sequence='target', assess_methods=(assess.DOPE, assess.GA341))\na.starting_model = 1\na.ending_model = {1}\na.make()\n\nok_models = filter(lambda x: x['failure'] is None, a.outputs)\n\nkey = 'DOPE score'\nok_models.sort(lambda a,b: cmp(a[key], b[key]))\n\nm = ok_models[0]\nprint 'Top model: %s (DOPE score %.3f)' % (m['name'], m[key])".format(generic, number_models_generic)
with open("build_generic.py","w") as fileout:
  fileout.write(cmd_build_generic)

os.makedirs("/content/generic", exist_ok = True)
!cd /content/generic
shutil.copyfile("/content/build_generic.py", "/content/generic/build_generic.py")
shutil.copyfile("/content/aligned_{0}.ali".format(generic), "/content/generic/aligned_{0}.ali".format(generic))
shutil.copyfile("/content/{0}.pdb".format(generic), "/content/generic/{0}.pdb".format(generic))

os.chdir("/content/generic")
!mod10.4 build_generic.py
print("Done !")

with open('build_generic.log', 'r') as f:
    last_line = f.readlines()[-2]
    best_generic = last_line.split(" ")[2]
    print(last_line)

!cp $best_generic best_generic.pdb
!cp best_generic.pdb /content/best_generic.pdb
os.chdir("/content")
files.download('best_generic.pdb')