# Identificação de Homólogos

In [6]:
import os
from Bio import SeqIO
from Bio.Blast import NCBIWWW, NCBIXML
from scripts_python import BLASTer

a = %pwd
wd = (a.rsplit('/',2))[0]
seq_id = "NM_001114380"
prot_seq_id = "P20701"
gene = "ITGAL"
file_blast_ncbi = "00D8F7PY016-Alignment.xml"
file_blast_uniprot = "B20210131A94466D2655679D1FD8953E075198DA8030E49B.fasta"

In [7]:
result_handle= open(os.path.join( wd,"data/homologue", gene, file_blast_ncbi))
blast_ncbi = NCBIXML.read(result_handle)
result_handle.close()

print('Foram carregadas {0} seq.'.format(len(blast_ncbi.alignments)))

Foram carregadas 100 seq.


### Resumo

In [8]:
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import re

query_len = blast_ncbi.query_length
homologue = []
sources = {}

print("Accession", "Identity%")
for aln in blast_ncbi.alignments:
    for HSP in aln.hsps:
        if HSP.score > 80 and HSP.expect < 1.0e-10 and HSP.identities/HSP.align_length > 0.80:
            print(aln.accession,HSP.identities/HSP.align_length)
            species = re.findall('\[[^\]]*\]', aln.hit_def)[0].replace("[","").replace("]","")
            if species not in sources:
                sources[species] = 1
            else:
                sources[species] +=1
            homologue.append(SeqRecord(Seq(HSP.sbjct), aln.title.split( ">", 1)[0], "", ""))


print("Total:",len(homologue))

Accession Identity%
NP_002200 1.0
BAG36913 0.9991452991452991
XP_005255370 0.9991452991452991
CAA68747 0.9982905982905983
NP_001029341 0.9923076923076923
XP_034795955 0.9923076923076923
PNI12205 0.9923076923076923
XP_034795956 0.9914529914529915
XP_024209187 0.9914529914529915
EAW52244 0.9566639411283728
AAC31672 0.955846279640229
XP_024089734 0.9777777777777777
XP_032011574 0.9709401709401709
XP_024209188 0.976068376068376
XP_030653892 0.9675213675213675
XP_024209189 0.9675213675213675
XP_023046865 0.958974358974359
XP_023046867 0.958904109589041
XP_023046866 0.9581196581196582
XP_010386254 0.9555555555555556
EHH31590 0.9538461538461539
XP_025225979 0.9555555555555556
XP_037845568 0.9573742540494459
XP_009197106 0.9564102564102565
XP_033074013 0.9512820512820512
XP_014981430 0.9529914529914529
EHH60316 0.9511986301369864
XP_033074015 0.9511986301369864
XP_033074014 0.9504273504273504
XP_017736913 0.9512820512820512
XP_028697390 0.9253112033195021
XP_011544151 1.0
XP_034795957 0.991620

In [9]:
for source in sources.keys():
    print(source, ":", sources[source])

SeqIO.write(homologue, os.path.join(wd,"data/homologue", gene, f"{seq_id}_{gene}.fasta"), "fasta")

Homo sapiens : 11
Pan troglodytes : 11
Pan paniscus : 3
Pongo abelii : 3
Hylobates moloch : 1
Nomascus leucogenys : 3
Piliocolobus tephrosceles : 4
Rhinopithecus roxellana : 1
Macaca mulatta : 3
Theropithecus gelada : 1
Chlorocebus sabaeus : 1
Papio anubis : 4
Trachypithecus francoisi : 4
Macaca fascicularis : 1
Rhinopithecus bieti : 1
Mandrillus leucophaeus : 1
Sapajus apella : 2
Callithrix jacchus : 1
Aotus nancymaae : 1
Saimiri boliviensis boliviensis : 1
Macaca nemestrina : 1
synthetic construct : 2
Galeopterus variegatus : 1
Equus asinus : 2
Equus caballus : 2
Equus przewalskii : 1
Carlito syrichta : 1
Propithecus coquereli : 1
Microcebus murinus : 4
Zalophus californianus : 2
Callorhinus ursinus : 4
Odobenus rosmarus divergens : 1
Eumetopias jubatus : 2
Balaenoptera musculus : 2
Pteropus alecto : 2
Sousa chinensis : 1
Lagenorhynchus obliquidens : 2
Tursiops truncatus : 2
Pteropus vampyrus : 2
Globicephala melas : 2
Phoca vitulina : 2
Neophocaena asiaeorientalis asiaeorientalis : 

105

### Resultados UNIPROT

#### Comentários

In [10]:
path = os.path.join(wd, "data/homologue", gene, file_blast_uniprot)
blast_uniprot_raw = SeqIO.parse(path, format="fasta")
blast_uniprot = []
blast_uniprot_id = []

for protein in blast_uniprot_raw:
    blast_uniprot.append(protein)
    seq_id = re.findall('\|[^|]*\|', protein.id)[0].replace("|","")
    blast_uniprot_id.append(seq_id)

In [11]:
from wget import download

for accession in blast_uniprot_id:
    url = "https://www.uniprot.org/uniprot/{0}.xml".format(accession)
    path = os.path.join(wd, "data/homologue", gene, "uniprot","{0}.xml".format(accession))
    download(url, path)


In [12]:
import re

blast_matches = []

for accession in blast_uniprot_id:
    path = os.path.join(wd, "data/homologue", gene,"uniprot","{0}.xml".format(accession))
    blast_matches.append(SeqIO.read(path, format="uniprot-xml"))

comments = {}
an_domains = {}
keywords = {}
for match in blast_matches:
    if "keywords" in match.annotations.keys():
        for keyword in match.annotations["keywords"]:
            if keyword in keywords:
                keywords[keyword] = ", ".join([keywords[keyword],match.id])
            else:
                keywords[keyword] = match.id
    for annotation in match.annotations.keys():
        if re.match("(comment_)[a-z]*", annotation):
            comment_type = annotation.split("_",1)[1]
            if comment_type not in comments.keys():
                comments[comment_type] = {}
            comment = match.annotations[annotation][0]
            if comment in comments[comment_type].keys():
                comments[comment_type][comment] = ", ".join([comments[comment_type][comment], match.id])
            else:
                 comments[comment_type][comment] = match.id

for comment_type in comments.keys():
    print(comment_type)
    for comment in comments[comment_type].keys():
        print(comment, ":", comments[comment_type][comment])
    print("\n")

print("Keywords")
for keyword in keywords.keys():
    print(keyword, ":", keywords[keyword])
print("\n")

function
Integrin ITGAL/ITGB2 is a receptor for ICAM1, ICAM2, ICAM3 and ICAM4. Integrin ITGAL/ITGB2 is a receptor for F11R (PubMed:11812992, PubMed:15528364). Integin ITGAL/ITGB2 is a receptor for the secreted form of ubiquitin-like protein ISG15; the interaction is mediated by ITGAL (PubMed:29100055). Involved in a variety of immune phenomena including leukocyte-endothelial cell interaction, cytotoxic T-cell mediated killing, and antibody dependent killing by granulocytes and monocytes. Contributes to natural killer cell cytotoxicity (PubMed:15356110). Involved in leukocyte adhesion and transmigration of leukocytes including T-cells and neutrophils (PubMed:11812992). Required for generation of common lymphoid progenitor cells in bone marrow, indicating a role in lymphopoiesis (By similarity). Integrin ITGAL/ITGB2 in association with ICAM3, contributes to apoptotic neutrophil phagocytosis by macrophages (PubMed:23775590). : P20701


subunit
Heterodimer of an alpha and a beta subunit (P

### Features

In [13]:
features = {}
for match in blast_matches:
    for feature in match.features:
        if re.search("(domain|motif|bond|region)", feature.type):
            feature_type = feature.type
            if feature_type not in features.keys():
                features[feature_type] = {}

            feature_desc = str(feature.location)
            if "description" in feature.qualifiers.keys():
                feature_desc = " ".join([feature_desc,feature.qualifiers["description"]])

            if feature_desc in features[feature_type].keys():
                features[feature_type][feature_desc] = ", ".join([features[feature_type][feature_desc], match.id])
            else:
                 features[feature_type][feature_desc] = match.id

for feature_type in features.keys():
    print(feature_type)
    for feature in features[feature_type].keys():
        print(feature, ":", features[feature_type][feature])
    print("\n")


topological domain
[25:1090] Extracellular : P20701
[1111:1170] Cytoplasmic : P20701


transmembrane region
[1090:1111] Helical : P20701
[1089:1112] Helical : G3QIF0, A0A2K6U6U8
[1100:1123] Helical : A0A2I2ZQ56
[1088:1111] Helical : A0A2I3NE88, A0A2K6QK32, F6T5M4
[1123:1146] Helical : A0A2I3LFH4, A0A2K6QK36, A0A1U7T7C9
[1074:1097] Helical : A0A2K6LQH9, A0A2K5LEG9
[1050:1073] Helical : H2NQN6
[1067:1090] Helical : A0A2K5Z3W6
[1061:1084] Helical : G1S146
[1087:1110] Helical : A0A0D9R0P8, A0A2K6G9M3, F6Z146, A0A2U3VQC7, A0A2Y9I415, H0XIA7, A0A2U3Y2T7
[1088:1110] Helical : A0A2K5EEQ0
[1054:1077] Helical : A0A2K6LQP4
[1123:1145] Helical : A0A2K5EEC1
[1059:1084] Helical : A0A2K6E005
[1057:1080] Helical : A0A2K5S058
[1005:1028] Helical : A0A2I2YAL8, A0A2K6QK26, A0A096NMH3, A0A2K6U6T5, F6YL01
[1086:1109] Helical : A0A484GSS5, A0A6J3Q0J2, A0A340X115, A0A2Y9HXM5
[1085:1108] Helical : A0A6J3Q0L3, A0A341BFG9, A0A2Y9PL29, A0A340WUX6
[1083:1106] Helical : A0A384AGW4
[1214:1237] Helical : A0A4U1FKL2
