<a href="https://colab.research.google.com/github/Sanarazaaa/Biopython-Projects/blob/main/Biopython_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m36.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


# Convert DNA to RNA


In [None]:
!pip install biopython # Installs the biopython package
from Bio.Seq import Seq # Imports the Seq object from Bio.Seq

dna_seq = Seq("ATGCGA") # Creates a Seq object with the DNA sequence
rna_seq = dna_seq.transcribe() # Transcribes the DNA sequence to RNA
print(rna_seq)  # Output: AUGCGA # Prints the RNA sequence

AUGCGA


# Pairwise Sequence Alignment
**Project: Perform sequence alignment between two sequences using pairwise2.**

In [None]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

seq1 = "GATTACA"
seq2 = "GCATGCU"

# Perform global alignment
alignments = pairwise2.align.globalxx(seq1, seq2)

# Print alignments
for alignment in alignments:
    print(format_alignment(*alignment))


G-ATTA-CA-
| | |  |  
GCA-T-GC-U
  Score=4

G-ATTA-CA-
| ||   |  
GCAT--GC-U
  Score=4

G-ATTACA-
| | |.|  
GCA-TGC-U
  Score=4

G-ATTACA-
| || .|  
GCAT-GC-U
  Score=4

G-ATTACA-
| ||. |  
GCATG-C-U
  Score=4

G-ATTA-CA
| | |  |.
GCA-T-GCU
  Score=4

G-ATTA-CA
| ||   |.
GCAT--GCU
  Score=4

G-ATTACA
| | |.|.
GCA-TGCU
  Score=4

G-ATTACA
| || .|.
GCAT-GCU
  Score=4

G-ATTACA
| ||. |.
GCATG-CU
  Score=4





# Phylogenetic Tree Construction
**Project: Create a simple phylogenetic tree from given DNA sequences.**

In [None]:
from Bio import Phylo
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
from Bio.Align import MultipleSeqAlignment
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

# Create sample sequences
seqs = [
    SeqRecord(Seq("ATCGTACG"), id="A"),
    SeqRecord(Seq("ATCGTACC"), id="B"),
    SeqRecord(Seq("ATCGGACG"), id="C"),
    SeqRecord(Seq("ATCGGACC"), id="D")
]

alignment = MultipleSeqAlignment(seqs)

# Calculate distance matrix
calculator = DistanceCalculator('identity')
dm = calculator.get_distance(alignment)

# Construct the tree
constructor = DistanceTreeConstructor()
tree = constructor.nj(dm)

# Draw the tree
Phylo.draw_ascii(tree)


                                        _____________________________________ A
  _____________________________________|
 |                                     |_____________________________________ B
_|
 |_____________________________________ C
 |
 |_____________________________________ D



# DNA Sequence Analysis (GC Content Calculation)

*Project:* **Analyze a given DNA sequence and calculate the GC content (percentage of G and C nucleotides).**

In [None]:
from Bio.Seq import Seq

def gc_content(sequence):
    seq = Seq(sequence)
    g_count = seq.count("G")
    c_count = seq.count("C")
    gc_percentage = (g_count + c_count) / len(seq) * 100
    return gc_percentage

# Example usage
dna_sequence = "AGCTATAGCGTAGCTAGCGTAGCTA"
print(f"GC Content: {gc_content(dna_sequence):.2f}%")


GC Content: 48.00%
