# Nucleotide Blast from NCBI #

###### Open Anaconda Powershell to install Biopython, then launch a Jupyter notebook.
###### Versions:
###### Anaconda navigator 2.6.2
###### Conda 24.7.1
###### Biopython 1.78
###### Python 3.12.4

In [3]:
#Import modules from Biopython
from Bio.Blast import NCBIWWW
from Bio import SeqIO, SearchIO

In [None]:
# help(NCBIWWW)

In [5]:
#Define the sequence and header
header = ">Example_Nucleotide_Sequence"
sequence = """CTTCTAGCGCTCGGCACCGGCGGGCCAGGCGCGTCCTGCCTTCATTTATCCAGCAGCTTTTCGGAAAATG
CATTTGCTGTTCGGAGTTTAATCAGAAGAGGATTCCTGCCTCCGTCCCCGGCTCCTTCATCGTCCCCTCT
CCCCTGTCTCTCTCCTGGGGAGGCGTGAAGCGGTCCCGTGGATAGAGATTCATGCCTGTGCCCGCGCGTG
TGTGCGCGCGTGTAAATTGCCGAGAAGGGGAAAACATCACAGGACTTCTGCGAATACCGGACTGAAAATT
GTAATTCATCTGCCGCCGCCGCTGCCTTTTTTTTTTCTCGAGCTCTTGAGATCTCCGGTTGGGATTCCTG
CGGATTGACATTTCTGTGAAGCAGAAGTCTGGGAATCGATCTGGAAATCCTCCTAATTTTTACTCCCTCT
CCCCGCGACTCCTGATTCATTGGGAA"""

#Open a file in write mode
with open("nuc_seq.fasta", "w") as fasta_file:
    #Write the header and the sequence to the file
    fasta_file.write(header + "\n") #Write the header line
    fasta_file.write(sequence + "\n") #Write the sequence

In [7]:
#Print the directory path of the Jupyter notebook 
import os
print(os.getcwd())

#List files in directory
#print(os.listdir())

#Print full path of the fasta file created above
for root, dirs, files in os.walk(os.getcwd()):
    for file in files:
        if file.endswith(".fasta"):
            print(os.path.join(root, file))

C:\Users\marbj610\Desktop\Projects\Probabilities
C:\Users\marbj610\Desktop\Projects\Probabilities\nuc_seq.fasta


In [11]:
#Check whether the file exists and read the sequence
try:
    nuc_record = SeqIO.read("nuc_seq.fasta", format = "fasta")
    #Get the length of the nucleotide record
    length = len(nuc_record)
    print("The length of the sequence is {}.".format(length))
except FileNotFoundError:
    print("The file 'nuc_seq.fasta' was not found.")
except Exception as e:
    print("An error occured: {}".format(e))

The length of the sequence is 446.


In [13]:
#Define nuc_record by reading a sequence from a FASTA file
nuc_record = SeqIO.read(r"C:\Users\marbj610\Documents\Repository\nuc_seq.fasta", "fasta")

#Use nuc_record in the NCBIWWW function
result_handle = NCBIWWW.qblast("blastn", "nt", nuc_record.seq)
blast_result = SearchIO.read(result_handle, "blast-xml")

In [14]:
print(blast_result[0:2])

Program: blastn (2.16.0+)
  Query: No (446)
         definition line
 Target: nt
   Hits: ----  -----  ----------------------------------------------------------
            #  # HSP  ID + description
         ----  -----  ----------------------------------------------------------
            0      1  gi|1830949194|ref|NM_000657.3|  Homo sapiens BCL2 apopt...
            1      1  gi|1830949192|ref|NM_000633.3|  Homo sapiens BCL2 apopt...


In [15]:
Seq = blast_result[0]
print("Sequence ID: {}".format(Seq.id))
print("Sequence Description: {}".format(Seq.description))
details = Seq[0]
print("E-value: {}".format(details.evalue))

Sequence ID: gi|1830949194|ref|NM_000657.3|
Sequence Description: Homo sapiens BCL2 apoptosis regulator (BCL2), transcript variant beta, mRNA
E-value: 0.0


In [16]:
print("alignment:\n{}".format(details.aln))

alignment:
Alignment with 2 rows and 446 columns
CTTCTAGCGCTCGGCACCGGCGGGCCAGGCGCGTCCTGCCTTCA...GAA No
CTTCTAGCGCTCGGCACCGGCGGGCCAGGCGCGTCCTGCCTTCA...GAA gi|1830949194|ref|NM_000657.3|
