We will apply the modelling script to draft model protein CCNI (accession code Q14094)

In [1]:
from Bio.Blast import NCBIWWW 
from Bio import SeqIO
from io import StringIO
import requests


idname='Q14094'

# retrieve info
baseUrl="http://www.uniprot.org/uniprot/"
currentUrl=baseUrl+idname+".fasta"
response = requests.post(currentUrl)
cData=''.join(response.text)
Seq=StringIO(cData)



In [2]:
# print in FASTA format
sequery="" 
for record in SeqIO.parse(Seq, "fasta"):
    print(">"+record.description)
    print(record.seq)
    print("Found match for "+idname)
    filename="files/"+idname+"_blast.xml"
    print("Calling BLAST against the PDB, and saving the XML file "+filename)
    result_handle=NCBIWWW.qblast("blastp", "pdb", record.seq, hitlist_size=100) 
    a=str(record.seq[0:4]) 
    blastXMLfh = open(filename, "w") 
    blastXMLfh.write(result_handle.read()) 
    blastXMLfh.close() 
    result_handle.close()
    print("Done!")
    sequery = str(record.seq)

>sp|Q14094|CCNI_HUMAN Cyclin-I OS=Homo sapiens OX=9606 GN=CCNI PE=1 SV=1
MKFPGPLENQRLSFLLEKAITREAQMWKVNVRKMPSNQNVSPSQRDEVIQWLAKLKYQFNLYPETFALASSLLDRFLATVKAHPKYLSCIAISCFFLAAKTVEEDERIPVLKVLARDSFCGCSSSEILRMERIILDKLNWDLHTATPLDFLHIFHAIAVSTRPQLLFSLPKLSPSQHLAVLTKQLLHCMACNQLLQFRGSMLALAMVSLEMEKLIPDWLSLTIELLQKAQMDSSQLIHCRELVAHHLSTLQSSLPLNSVYVYRPLKHTLVTCDKGVFRLHPSSVPGPDFSKDNSKPEVPVRGTAAFYHHLPAASGCKQTSTKRKVEEMEVDDFYDGIKRLYNEDNVSENVGSVCGTDLSRQEGHASPCPPLQPVSVM
Found match for Q14094
Calling BLAST against the PDB, and saving the XML file files/Q14094_blast.xml
Done!


In [3]:
from Bio.Blast import NCBIXML
import urllib
result_handle = open("files/"+idname+"_blast.xml")
blast_records=list(NCBIXML.parse(result_handle)) #putting the results into a list is convenient 
                                                 #to do some extra work with them


fileh = open ("files/seq.fasta","w")
fileh.write(">"+idname+"\n")
fileh.write(sequery+"\n")
E_VALUE_THRESH = 1e-9
listofknowns = []
for blast_record in blast_records:
    for alignment in blast_record.alignments:
        
        for hsp in alignment.hsps:
            if hsp.expect < E_VALUE_THRESH:
                print("\nALIGNMENT\n=========\n")
                print(alignment.title)
                fileh.write(">"+alignment.title+"\n")
                print("E value:",hsp.expect)
                print(hsp.query[0:75]+"...")
                print(hsp.match[0:75]+"...")
                print(hsp.sbjct[0:75]+"...")
                fileh.write(hsp.sbjct+"\n")   
                print("found good match: ",alignment.hit_id)
                PDBcode = alignment.hit_id.split('|')[1]
                PDBfile = 'data/'+PDBcode+'.pdb'
                print("retrieving PDB file: ",PDBcode)
                urllib.request.urlretrieve('http://files.rcsb.org/download/'+PDBcode+'.pdb', PDBfile)
                listofknowns.append(PDBfile)
fileh.close()
print(listofknowns)


ALIGNMENT

pdb|1W98|B Chain B, G1/s-specific Cyclin E1 [Homo sapiens]
E value: 1.51769e-10
VSPSQRDEVIQWLAKLKYQFNLYPETFALASSLLDRFLATVKAHPK-YLSCIAISCFFLAAKTVEEDERIPVLKV...
+ P  R  ++ WL ++   + L+ ETF LA    DR++AT +   K  L  I IS  F+AAK   E+   P L  ...
LQPKMRAILLDWLMEVCEVYKLHRETFYLAQDFFDRYMATQENVVKTLLQLIGISSLFIAAKL--EEIYPPKLHQ...
found good match:  pdb|1W98|B
retrieving PDB file:  1W98

ALIGNMENT

pdb|5L2W|B Chain B, G1/s-specific Cyclin-e1 [Homo sapiens]
E value: 1.82195e-10
VSPSQRDEVIQWLAKLKYQFNLYPETFALASSLLDRFLATVKAHPK-YLSCIAISCFFLAAKTVEEDERIPVLKV...
+ P  R  ++ WL ++   + L+ ETF LA    DR++AT +   K  L  I IS  F+AAK   E+   P L  ...
LQPKMRAILLDWLMEVCEVYKLHRETFYLAQDFFDRYMATQENVVKTLLQLIGISSLFIAAKL--EEIYPPKLHQ...
found good match:  pdb|5L2W|B
retrieving PDB file:  5L2W
['data/1W98.pdb', 'data/5L2W.pdb']


In [4]:
from Bio import AlignIO 
from Bio.Align.Applications import ClustalOmegaCommandline 

# use these lines to ensure clustalo can be found. This works for my installation using 
# the above conda instructions
import os
os.environ['PATH'] += ':~/miniconda3/bin/'

# this is an example using the complete collection of sequences. You should try it with your collection
# obtained from the BALST calculation
file='files/seq.fasta'
outfile='files/seq_aligned.fasta'
cline = ClustalOmegaCommandline(infile= file, outfile= outfile, verbose=True, auto=True, force=True) 
stdout, stderr = cline() 

In [5]:
print(stdout)

Using 1 threads
Read 3 sequences (type: Protein) from files/seq.fasta
not more sequences (3) than cluster-size (100), turn off mBed
Setting options automatically based on input sequence characteristics (might overwrite some of your options).
Auto settings: Enabling mBed.
Auto settings: Setting iteration to 1.
Using 2 seeds (chosen with constant stride from length sorted seqs) for mBed (from a total of 3 sequences)
Calculating pairwise ktuple-distances...
Ktuple-distance calculation progress: 0 % (0 out of 5)
Ktuple-distance calculation progress: 40 % (2 out of 5)
Ktuple-distance calculation progress done. CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00
mBed created 1 cluster/s (with a minimum of 1 and a soft maximum of 100 sequences each)
Distance calculation within sub-clusters: 0 % (0 out of 1)
Distance calculation within sub-clusters done. CPU time: 0.00u 0.00s 00:00:00.00 Elapsed: 00:00:00
Guide-tree computation (mBed) done.
Progressive alignment progress: 50 % (1 out of 2)
Pro

In [6]:
# you can check what you created:
!cat files/seq_aligned.fasta

>Q14094
MKFPGPLENQRLSFLLEKAITREAQMWKVNVRKMPSNQNVSPSQRDEVIQWLAKLKYQFN
LYPETFALASSLLDRFLATVKAHP-KYLSCIAISCFFLAAKTVEEDERIPVLKVLARDSF
CGCSSSEILRMERIILDKLNWDLHTATPLDFLHIFHAIAVSTRPQLLFSLPKLSPSQHLA
VLTKQLLHCMACNQLLQFRGSMLALAMVSLEMEKLIPDWLSLTIELLQKAQMDSSQLIHC
RELVAHHLSTLQSSLPLNSVYVYRPLKHTLVTCDKGVFRLHPSSVPGPDFSKDNSKPEVP
VRGTAAFYHHLPAASGCKQTSTKRKVEEMEVDDFYDGIKRLYNEDNVSENVGSVCGTDLS
RQEGHASPCPPLQPVSVM
>pdb|1W98|B Chain B, G1/s-specific Cyclin E1 [Homo sapiens]
---------------------------------------LQPKMRAILLDWLMEVCEVYK
LHRETFYLAQDFFDRYMATQENVVKTLLQLIGISSLFIAAKL--EEIYPPKLHQFAYVTD
GACSGDEILTMELMIMKALKWRLSPLTIVSWLNVYMQVA---------------------
------------------------------------------------------------
------------------------------------------------------------
------------------------------------------------------------
------------------
>pdb|5L2W|B Chain B, G1/s-specific Cyclin-e1 [Homo sapiens]
---------------------------------------LQPKMRAILLDWLMEVCEVYK
LHRETFYLAQDFFDRYMATQENV

Once you have the list of PDB files and the alignment you are ready to use MODELLER.
Check the [SaliLab web site](https://salilab.org) for registration to the program.
```
conda config --add channels salilab
conda install modeller
```
In the same site you can find examples of use.

QUESTION: so, did you get how to obtain your models with MODELLER? 


In [7]:
from modeller import *
from modeller.automodel import *
#from modeller import d
env = environ()
a = automodel(env, alnfile='files/seq_aligned.fasta',
              knowns=listofknowns, sequence='qseq1',
              assess_methods=(assess.DOPE,
                              #soap_protein_od.Scorer(),
                              assess.GA341))
a.starting_model = 1
a.ending_model = 5
a.make()


                         MODELLER 9.23, 2019/10/22, r11522

     PROTEIN STRUCTURE MODELLING BY SATISFACTION OF SPATIAL RESTRAINTS


                     Copyright(c) 1989-2019 Andrej Sali
                            All Rights Reserved

                             Written by A. Sali
                               with help from
              B. Webb, M.S. Madhusudhan, M-Y. Shen, G.Q. Dong,
          M.A. Marti-Renom, N. Eswar, F. Alber, M. Topf, B. Oliva,
             A. Fiser, R. Sanchez, B. Yerkovich, A. Badretdinov,
                     F. Melo, J.P. Overington, E. Feyfant
                 University of California, San Francisco, USA
                    Rockefeller University, New York, USA
                      Harvard University, Cambridge, USA
                   Imperial Cancer Research Fund, London, UK
              Birkbeck College, University of London, London, UK


Kind, OS, HostName, Kernel, Processor: 4, Darwin M15-0003 19.4.0 x86_64
Date and time of compilation         

ModellerError: read_al_373E> Protein specified in ALIGN_CODES(i) was not found in the alignment file; ALIGN_CODES(       1) =  data/1W98.pdb

Now you are ready to check the quality of your models with [SAVES](https://servicesn.mbi.ucla.edu/SAVES/)