In [None]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.86-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (13 kB)
Downloading biopython-1.86-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m31.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.86


In [None]:
from Bio import Blast
from Bio import SeqIO
from io import StringIO

In [None]:
help(Blast.qblast)

Help on function qblast in module Bio.Blast:

qblast(program, database, sequence, url_base='https://blast.ncbi.nlm.nih.gov/Blast.cgi', auto_format=None, composition_based_statistics=None, db_genetic_code=None, endpoints=None, entrez_query='(none)', expect=10.0, filter=None, gapcosts=None, genetic_code=None, hitlist_size=50, i_thresh=None, layout=None, lcase_mask=None, matrix_name=None, nucl_penalty=None, nucl_reward=None, other_advanced=None, perc_ident=None, phi_pattern=None, query_file=None, query_believe_defline=None, query_from=None, query_to=None, searchsp_eff=None, service=None, threshold=None, ungapped_alignment=None, word_size=None, short_query=None, alignments=500, alignment_view=None, descriptions=500, entrez_links_new_window=None, expect_low=None, expect_high=None, format_entrez_query=None, format_object=None, format_type='XML', ncbi_gi=None, results_file=None, show_overview=None, megablast=None, template_type=None, template_length=None, username='blast', password=None)
    

In [None]:
from Bio import Blast
Blast.tool
Blast.email = "kosmasriolegowo@mail.ugm.ac.id"

In [None]:
def run_blast_for_sequence(label, record, program="blastn", database="nt"):
    print(f"Menjalankan {program} untuk {label} ({record.id}) ...")

    # Kirim query ke NCBI BLAST
    # - program : "blastn" untuk DNA, "blastp" untuk protein, dst.
    # - database: "nt" (nucleotide), "nr" (protein), dll.
    fasta_string = record.format("fasta")
    result_stream = Blast.qblast(
        program=program,
        database=database,
        sequence=fasta_string,   # bisa juga record.seq
        format_type="XML"        # default, cocok untuk Blast.read()
    )
    # Hasil dari qblast adalah stream bytes → simpan dulu ke file XML
    out_filename = f"{label}_blast.xml"
    with open(out_filename, "wb") as out_handle:
        out_handle.write(result_stream.read())
    result_stream.close()
    print(f"Hasil BLAST disimpan ke {out_filename}")

    # Parse hasil BLAST (new API)
    with open(out_filename, "rb") as in_handle:
        blast_record = Blast.read(in_handle)  # satu query → satu Record

    return blast_record

In [None]:
def summarize_blast_result(label, blast_record, max_hits=5):
    print(f"\n Ringkasan hasil BLAST untuk {label}: {blast_record.query.id}")
    print(f"Total hits: {len(blast_record)}")

    # blast_record adalah Bio.Blast.Record → list of Hit
    for i, hit in enumerate(blast_record[:max_hits], start=1):
        # Setiap hit adalah list of HSP (alignment); ambil HSP terbaik (indeks 0)
        best_hsp = hit[0]

        # Panjang alignment & identitas
        # HSP mewarisi Bio.Align.Alignment → shape = (n_rows, n_cols)
        _, align_len = best_hsp.shape              # n_cols = panjang alignment
        identity_count = best_hsp.annotations.get("identity", None)
        evalue = best_hsp.annotations.get("evalue", None)
        bit_score = best_hsp.annotations.get("bit score", None)

        if identity_count is not None and align_len:
            identity_pct = 100.0 * identity_count / align_len
            identity_str = f"{identity_pct:.2f}% ({identity_count}/{align_len})"
        else:
            identity_str = "n/a"

        print(f"\nHit #{i}")
        print(f"  > ID        : {hit.target.id}")
        print(f"    Deskripsi : {hit.target.description}")
        print(f"    Panjang   : {len(hit.target)}")
        print(f"    E-value   : {evalue}")
        print(f"    Bit score : {bit_score}")
        print(f"    Identity  : {identity_str}")


In [None]:
seq1 = SeqIO.read("sequence_tbc.fasta", "fasta") #tbc
seq2 = SeqIO.read("sequence_cacar.fasta", "fasta") #cacar air

Sequence 1 : https://www.ncbi.nlm.nih.gov/nuccore/MN221386.1?report=fasta

Sequence 2 : https://www.ncbi.nlm.nih.gov/protein/AAB24914.1?report=fasta

Mencoba blastn

In [None]:
blast_seq1 = run_blast_for_sequence("Seq1", seq1, program="blastn", database="nt")

summarize_blast_result("Seq1", blast_seq1)

Menjalankan blastn untuk Seq1 (MN221386.1) ...
Hasil BLAST disimpan ke Seq1_blast.xml

 Ringkasan hasil BLAST untuk Seq1: Query_474849
Total hits: 50

Hit #1
  > ID        : gi|1917459103|gb|MN221386.1|
    Deskripsi : Mycobacterium tuberculosis strain yt180643 RNA polymerase beta subunit (rpoB) mRNA, complete cds
    Panjang   : 3519
    E-value   : 0.0
    Bit score : 6347.34
    Identity  : 100.00% (3519/3519)

Hit #2
  > ID        : gi|2556671655|gb|CP130778.1|
    Deskripsi : Mycobacterium tuberculosis strain MTb-Oman-3213831 chromosome, complete genome
    Panjang   : 4321982
    E-value   : 0.0
    Bit score : 6338.32
    Identity  : 99.94% (3517/3519)

Hit #3
  > ID        : gi|3020066102|gb|CP194235.1|
    Deskripsi : Mycobacterium tuberculosis strain LP-0504069-RM2 chromosome, complete genome
    Panjang   : 4411637
    E-value   : 0.0
    Bit score : 6338.32
    Identity  : 99.94% (3517/3519)

Hit #4
  > ID        : gi|3020066110|gb|CP194243.1|
    Deskripsi : Mycobacterium 

Mencoba blastp

In [None]:
blast_seq2 = run_blast_for_sequence("Seq2", seq2, program="blastp", database="nr")
summarize_blast_result("Seq2", blast_seq2)

Menjalankan blastp untuk Seq2 (AAB24914.1) ...
Hasil BLAST disimpan ke Seq2_blast.xml

 Ringkasan hasil BLAST untuk Seq2: Query_910237
Total hits: 50

Hit #1
  > ID        : ref|NP_040159.1|
    Deskripsi : thymidine kinase [Human alphaherpesvirus 3] >sp|P09250.1| RecName: Full=Thymidine kinase [Human herpesvirus 3 strain Dumas] >gb|AAB24914.1| thymidine kinase, TK [varicella zoster virus VZV, Peptide, 341 aa] [Human alphaherpesvirus 3] >gb|AEW88445.1| thymidine kinase [Human alphaherpesvirus 3] >gb|AEW88733.1| thymidine kinase [Human alphaherpesvirus 3] >emb|CAA27919.1| deoxypyrimidine kinase [Human herpesvirus 3 strain Dumas]
    Panjang   : 341
    E-value   : 0.0
    Bit score : 709.909
    Identity  : 100.00% (341/341)

Hit #2
  > ID        : sp|P0C0E6.1|
    Deskripsi : RecName: Full=Thymidine kinase [Human herpesvirus 3 strain Oka vaccine] >pdb|1OSN|A Chain A, Thymidine kinase [Human alphaherpesvirus 3] >pdb|1OSN|B Chain B, Thymidine kinase [Human alphaherpesvirus 3] >pdb|1OSN|C

Mencoba blastx

In [None]:
blast_seq3= run_blast_for_sequence("Seq1 with blastp", seq1, program="blastx", database="nr")
summarize_blast_result("Seq1 with blastp", blast_seq3)

Menjalankan blastx untuk Seq1 with blastp (MN221386.1) ...
Hasil BLAST disimpan ke Seq1 with blastp_blast.xml

 Ringkasan hasil BLAST untuk Seq1 with blastp: Query_1005541
Total hits: 50

Hit #1
  > ID        : gb|QOL79194.1|
    Deskripsi : RNA polymerase beta subunit [Mycobacterium tuberculosis]
    Panjang   : 1172
    E-value   : 0.0
    Bit score : 2394.77
    Identity  : 99.91% (1171/1172)

Hit #2
  > ID        : pdb|5ZX2|C
    Deskripsi : Chain C, DNA-directed RNA polymerase subunit beta [Mycobacterium tuberculosis H37Rv] >pdb|5ZX3|C Chain C, DNA-directed RNA polymerase subunit beta [Mycobacterium tuberculosis H37Rv] >pdb|6JCX|C Chain C, DNA-directed RNA polymerase subunit beta [Mycobacterium tuberculosis H37Rv] >pdb|6JCY|C Chain C, DNA-directed RNA polymerase subunit beta [Mycobacterium tuberculosis H37Rv] >pdb|6KON|C Chain C, DNA-directed RNA polymerase subunit beta [Mycobacterium tuberculosis H37Rv] >pdb|6KOO|C Chain C, DNA-directed RNA polymerase subunit beta [Mycobacterium 