# Biopython

In [1]:
# BioPython is a library for computational biology and bioinformatics, designed to work with biological data in Python.
# It provides tools for DNA and protein sequence analysis, alignment, structural bioinformatics, etc.

In [6]:
# install library
!pip install biopython



In [16]:
# Example 1: Reading and Analyzing DNA Sequences

from Bio.Seq import Seq

# Example DNA sequence
dna_sequence = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG")

# Calculate GC content
gc_content = 100 * float(dna_sequence.count("G") + dna_sequence.count("C")) / len(dna_sequence)
print(f"GC Content: {gc_content}%")

# Transcribe DNA to RNA
rna_sequence = dna_sequence.transcribe()
print(f"RNA Sequence: {rna_sequence}")

# In this code, we create a Seq object with our DNA sequence, calculate the GC content, and then transcribe the DNA into RNA.

GC Content: 56.41025641025641%
RNA Sequence: AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG


In [15]:
# Example 2: Sequence Alignment

from Bio.Align import PairwiseAligner

# Creating a PairwiseAligner object
aligner = PairwiseAligner()

# Two sequences to be aligned
sequence1 = Seq("ACCGT")
sequence2 = Seq("ACG")

# Perform alignment
alignments = aligner.align(sequence1, sequence2)

# Print the best alignment
best_alignment = alignments[0]
print(best_alignment)

# Here, we use the pairwise2 module to align two sequences globally. 
# The globalxx method aligns the sequences without considering scoring for matches, mismatches, and gaps.

target            0 ACCGT 5
                  0 ||-|- 5
query             0 AC-G- 3



In [17]:
# Example 3: Reading and Analyzing Sequences from a FASTA File

from Bio import SeqIO
# Path to the FASTA file
fasta_file = "Ecoli_ complete_genome.fna" 

# Reading and analyzing each sequence in the file
for seq_record in SeqIO.parse(fasta_file, "fasta"):
    sequence = seq_record.seq
    sequence_id = seq_record.id
    sequence_length = len(sequence)
    gc_content = 100 * float(sequence.count("G") + sequence.count("C")) / sequence_length

    print(f"Sequence ID: {sequence_id}")
    print(f"Sequence Length: {sequence_length}")
    print(f"GC Content: {gc_content:.2f}%\n")

# This example demonstrates how to handle multiple sequences from a standard file format in bioinformatics, providing a foundational skill for many bioinformatics analyses. 

Sequence ID: U00096.3
Sequence Length: 4641652
GC Content: 50.79%

