# Protein sequence library builder for AVCR and partners

## List of target proteins

* ALK1
* ALK2
* ALK3
* ALK4
* ALK5
* ALK6
* ALK7
* BMPR2
* ActR-IIA
* ActR-IIB
* TGFR2
* AMH-RII
* EGFR (control) 

In [12]:
#Enviroment init
from Bio.Blast import NCBIWWW, NCBIXML
from Bio import SeqIO, AlignIO, ExPASy
from Bio.Align.Applications import MafftCommandline as mafft
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
import time, os

In [2]:
target_handle ={
    "ALK1":"P37023",
    "ALK2":"Q04771",
    "ALK3":"P36894",
    "ALK4":"P36896",
    "ALK5":"P36897",
    "ALK6":"Q05438",
    "ALK7":"Q8NER5",
    "BMPR2":"Q13873",
    "ActR-IIA":"Q7SXW6",
    "ActR-IIB":"Q56A35",
    "TGFR2":"P37173",
    "AMH-RII":"Q16671",
    "EGFR":"P00533"
}

In [21]:
try:
    os.mkdir("Targets")
except:
    pass
for target in target_handle.keys():
    print("Now getting",target,"sequence file")
    filename = "Targets/"+ target + ".fasta"
    handle = ExPASy.get_sprot_raw(target_handle[target])
    temp = SeqIO.read(handle, "swiss")
    print("Writing",target,"fasta file")
    SeqIO.write(temp, filename, "fasta")
    print("Done!")
    time.sleep(0.5) #Some rest to not make it look like a DDoS

Now getting ALK1 sequence file
Writing ALK1 fasta file
Done!
Now getting ALK2 sequence file
Writing ALK2 fasta file
Done!
Now getting ALK3 sequence file
Writing ALK3 fasta file
Done!
Now getting ALK4 sequence file
Writing ALK4 fasta file
Done!
Now getting ALK5 sequence file
Writing ALK5 fasta file
Done!
Now getting ALK6 sequence file
Writing ALK6 fasta file
Done!
Now getting ALK7 sequence file
Writing ALK7 fasta file
Done!
Now getting BMPR2 sequence file
Writing BMPR2 fasta file
Done!
Now getting ActR-IIA sequence file
Writing ActR-IIA fasta file
Done!
Now getting ActR-IIB sequence file
Writing ActR-IIB fasta file
Done!
Now getting TGFR2 sequence file
Writing TGFR2 fasta file
Done!
Now getting AMH-RII sequence file
Writing AMH-RII fasta file
Done!
Now getting EGFR sequence file
Writing EGFR fasta file
Done!


In [27]:
target_name = target_handle.keys()
for target in target_name:
    filename ="Targets/"+target+".fasta"
    result_filename = "Blast_XML/" + target + "_blast.xml"
    if not os.path.exists(result_filename):
        print("Running BLAST againsts SwissProt using %s as query" %(target))
        with open(filename) as file:
            query =file.read()
            handle = NCBIWWW.qblast("blastp","swissprot",query,expect=0.01)
            print("Saving BLAST results of %s" %(target))
            open(result_filename,"w+").write(handle.read())
            print("Done!")
    time.sleep(0.5)

Running BLAST againsts SwissProt using ALK1 as query
Saving BLAST results of ALK1
Done!
Running BLAST againsts SwissProt using ALK2 as query
Saving BLAST results of ALK2
Done!
Running BLAST againsts SwissProt using ALK3 as query
Saving BLAST results of ALK3
Done!
Running BLAST againsts SwissProt using ALK4 as query
Saving BLAST results of ALK4
Done!
Running BLAST againsts SwissProt using ALK5 as query
Saving BLAST results of ALK5
Done!
Running BLAST againsts SwissProt using ALK6 as query


KeyboardInterrupt: 

In [25]:
os.path.exists(result_filename)