#  Projeto Bioinformática

Bruna Almeida PG51187

__1º Get phage genomes from NCBI__

In [4]:
pip install biopython

Note: you may need to restart the kernel to use updated packages.


__Pseudomonas phage__

In [27]:
from Bio import Entrez
from Bio import SeqIO
import os
from Bio.Seq import Seq

def download_phage_genomes(query):
    Entrez.email = "bruna_dfa@hotmail.com"
    handle = Entrez.esearch(db="nucleotide", term=query, retmax=100000)
    search_results = Entrez.read(handle)
    handle.close()
    
    genome_ids = search_results["IdList"]
    print(f"Total {len(genome_ids)} genome(s) found.")

    output_directory = "pseudomonas_phage2"
    os.makedirs(output_directory, exist_ok=True)
    
    for genome_id in genome_ids:
        handle = Entrez.efetch(db="nucleotide", id=genome_id, rettype="gb", retmode="text")
        record = SeqIO.read(handle, "gb")
        handle.close()

        phage_name = record.annotations["source"]
        filename = f"{output_directory}/{phage_name.replace(' ', '_').replace('/','_')}.fasta"
        
        protein_records = []
        for feature in record.features:
            if feature.type == "CDS" and "translation" in feature.qualifiers:
                protein_seq = feature.qualifiers["translation"][0]
                protein_id = feature.qualifiers["protein_id"][0]
                if "product" in feature.qualifiers:
                    protein_description = feature.qualifiers["product"][0]
                else:
                    protein_description = "not found"
                protein_record = SeqIO.SeqRecord(Seq(protein_seq), id=protein_id, description=protein_description)
                protein_records.append(protein_record)

        SeqIO.write(protein_records, filename, "fasta")
        print(f"Genome saved: {filename}")

# Example usage
query = "pseudomonas phage[Title] AND complete genome[Title]"
download_phage_genomes(query)

Total 1070 genome(s) found.
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeS_PAO1_HW12.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_Pae-SS2019XII.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Ps60.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_UMP151.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_H71.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_H72.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Fc02.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Ps59.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Fc22.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_JBD25.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PaVOB.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Y15.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Y13.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Y12.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Y11.fasta
Genome saved: pseud

Genome saved: pseudomonas_phage2/Pseudomonas_phage_VB_PaeS_VL1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Skulduggery.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_BUCT-PX-5.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeS_C1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_TehO.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Epa40.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Kopi.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Guyu.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeS-Yazdi-M.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Kaya.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_RP4_(wastewater_metagenome).fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_UFJF_PfSW6.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeM_B55.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeM_B31.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_v

Genome saved: pseudomonas_phage2/Pseudomonas_phage_ZC03.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeP_PAO1_1-15pyo.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiPsa17.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PaP4.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeS_PAO1_Ab19.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeM_C2-10_Ab02.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PaMx25.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_LKO4.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PA26.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeM_CEB_DP1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiKTN6.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeP_MAG4.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeM_MAG1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phi2.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa

Genome saved: pseudomonas_phage2/Pseudomonas_phage_PA10.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PA5.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Churi01.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Ka1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_IR-QUMS-PaBa1-GHS-2021.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PseudoP-SA22.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiCTX.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PMG1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeP_4034.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeP_4032.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeP_4029.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaM_SEMA.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_BUCT-PX-5.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PaeP_Ls.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiH1.fast

Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-O.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-N.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-M.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-L.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-J.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-I.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-H.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-G.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-F.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-C.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-B.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pa-A.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_SoKa.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PA_LZ01.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Astolliot.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Kremar.fast

Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeP_fHoPae04.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_zikora.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_MR14.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Itty13.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_billy.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_willy.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_goonie.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_fnug.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_datas.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_crassa.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_chunk.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_chumba.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_antinowhere.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_cory.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_oldone.fasta
Genome saved: pseudomonas_phage2/Ps

Genome saved: pseudomonas_phage2/Pseudomonas_phage_E2005-24-39.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Paer4.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_LY218.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiKMVC5-12.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiKMVC5-4.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_HU1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_S50.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_S12-3.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_R26.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_R12.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_U47.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeM_fHoPae01.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_pf8_ST274-AUS411.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaS_IME307.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeM_SMS29.fasta
Genome sav

Genome saved: pseudomonas_phage2/Pseudomonas_phage_tabernarius.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_SL2.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_VW-6B.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_VW-6S.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_IME180.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_ZC08.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_ZC01.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_ZC03.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeM_G1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Noxifer.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Skulduggery.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Phabio.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_Pae1396P-5.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_Pae575P-3.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_Pae436M-8.fasta
Genome saved: pse

Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiPsa17.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_vB_PaeP_p2-10_Or1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PaMx73.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_PhiCHU.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_H70.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_YH6.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_Pf-10.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_LKA1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_MP48.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiPSA1_(phiPSA1).fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiPSA2_(phiPSA2).fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_SPM-1.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_JD024.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_phiIBB-PAA2.fasta
Genome saved: pseudomonas_phage2/Pseudomonas_phage_TL.fasta
Genome saved: pse

__Acinetobacter phage__

In [2]:
from Bio import Entrez
from Bio import SeqIO
import os
from Bio.Seq import Seq

def download_phage_genomes(query):
    Entrez.email = "bruna_dfa@hotmail.com"
    handle = Entrez.esearch(db="nucleotide", term=query, retmax=100000)
    search_results = Entrez.read(handle)
    handle.close()
    
    genome_ids = search_results["IdList"]
    print(f"Total {len(genome_ids)} genome(s) found.")

    output_directory = "acinetobacter_phage"
    os.makedirs(output_directory, exist_ok=True)
    
    for genome_id in genome_ids:
        handle = Entrez.efetch(db="nucleotide", id=genome_id, rettype="gb", retmode="text")
        record = SeqIO.read(handle, "gb")
        handle.close()

        phage_name = record.annotations["source"]
        filename = f"{output_directory}/{phage_name.replace(' ', '_').replace('/','_')}.fasta"
        
        protein_records = []
        for feature in record.features:
            if feature.type == "CDS" and "translation" in feature.qualifiers:
                protein_seq = feature.qualifiers["translation"][0]
                protein_id = feature.qualifiers["protein_id"][0]
                if "product" in feature.qualifiers:
                    protein_description = feature.qualifiers["product"][0]
                else:
                    protein_description = "not found"
                protein_record = SeqIO.SeqRecord(Seq(protein_seq), id=protein_id, description=protein_description)
                protein_records.append(protein_record)

        SeqIO.write(protein_records, filename, "fasta")
        print(f"Genome saved: {filename}")

# Example usage
query = "acinetobacter phage[Title] AND complete genome[Title]"
download_phage_genomes(query)

Total 266 genome(s) found.
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaM_P1.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_P1068.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaP_EPab_B.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_EAb13.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaA_LLY.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaM_ABMM1.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Acba_4.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_F70-K44.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_nACB2.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaS_TCUP2199.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Acba_18.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Acba_15.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Acba_14.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Acba_13.fasta

Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaP_ABWU2101.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Ab1656-2.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Phab24.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_IME_Ab2712.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_APK20.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_APK15.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_APK86.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_APK16.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_APK77.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_APK09.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_BUCT629.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaP_WU2001.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Abp95.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_BUCT628.fasta
Genome saved: acinetobacter_phage/Acinet

Genome saved: acinetobacter_phage/Acinetobacter_phage_AbP2.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaP_AS11.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaP_AS12.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_LZ35.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaS_TRS1.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_phiAB6.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaM_phiAbaA1.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_YMC11_11_R3177.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaP_PD-AB9.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_vB_AbaP_PD-6A3.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Ab105-1phi.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_Fri1.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_AB3.fasta
Genome saved: acinetobacter_phage/Acinetobacter_phage_YMC11_12_R1215.fasta
Genome 

__Correr os genomas todos de uma vez para a ferramenta AcRanker__

In [19]:
pip install scikit-learn




In [20]:
pip install scipy

Note: you may need to restart the kernel to use updated packages.


In [31]:
import sklearn
import scipy

In [8]:
import subprocess

fasta_file = "C:/Users/Bruna/Documents/pseudomonas_phage2/lcl_AB008550.1_prot_BAA36226.1_1.fasta"
output_prefix = "C:/Users/Bruna/Documents/results"
resources_directory = "C:/Users/Bruna/Documents/ACRANKER22"
#C:\Users\Bruna\Documents\ACRANKER22

command = f'python3 acranker.py {fasta_file} {output_prefix}'

result = subprocess.Popen(command, shell=True).communicate()
print(result)
#stdout, stderr = result.communicate()

(None, None)


In [32]:
import subprocess

fasta_file = "C:/Users/Bruna/Documents/pseudomonas_phage2/2209258818.fasta"
output_prefix = "recognizer_output"
resources_directory = "/home/bruna/resources_directory"
acranker_script = "C:/Users/Bruna/Documents/ACRANKER22"  

command = f'python {acranker_script} -f {fasta_file} -o {output_prefix} -rd {resources_directory}'

result = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = result.communicate()

if result.returncode == 0:
    print("Comando executado com sucesso!")
else:
    print("Ocorreu um erro ao executar o comando:")
    print(stderr.decode("utf-8"))

Ocorreu um erro ao executar o comando:
C:\Users\Bruna\anaconda3\python.exe: can't find '__main__' module in 'C:\\Users\\Bruna\\Documents\\ACRANKER22'

