In [None]:
# %pip install biopython
# %pip install matplotlib

# Sequence Manipulation:
1. Display the total length of Sequence X and Y.
2. Display which index the first time the codon (CGC) appears in Sequence X and Y.
3. Create a new sequence using the following task below:
- Take the first 10 nucleotides from Sequence X and the last 5 nucleotides from Sequence X.
- Take the first 20 nucleotides from Sequence Y and the last 3 nucleotides from Sequence Y.
- Combine both of the string above and reverse it to a new sequence, Sequence Z.

In [None]:
# Print the total length of sequence x and y in the fasta files x.fasta and y.fasta respectively.

from Bio import SeqIO, Seq

seq_x = SeqIO.read("x.fasta", "fasta").seq
seq_x = seq_x.replace("N", "")
seq_y = SeqIO.read("y.fasta", "fasta").seq
seq_y = seq_y.replace("N", "")

print("Total length of Sequence", len(seq_x))
print("Total length of Sequence", len(seq_y))

In [None]:
# Print the first index the codon CDC appears in sequence x and y.

from Bio.Seq import Seq

find_seq = Seq("CGC")

print("The first time codon CDC appears in sequence x:", seq_x.find(find_seq))
print(seq_x[51:])
print("The first time codon CDC appears in sequence y:", seq_y.find(find_seq))
print(seq_y[39:])

In [None]:
# Create a new sequence x by concatenating the first 10 and last 5 bases of sequence x.
new_seq_x = seq_x[:10] + seq_x[-5]
print("New sequence x:", new_seq_x)

# Create a new sequence y by concatenating the first 20 and last 3 bases of sequence y.
new_seq_y = seq_y[:20] + seq_y[-3]
print("New sequence y:", new_seq_y)

# Concatenate the new sequences x and y to create a sequence z.
seq_z = new_seq_x + new_seq_y
print("Sequence z:", seq_z)

# Reverse sequence z.
seq_z = seq_z[::-1]
print("Sequence z reversed:", seq_z)

# Sequence Analysis & Plotting
1. Display the nucleotides base (A and C) frequency for Sequence Z with a chart using matplotlib library. You are free to plot using bar or pie chart.
2. Display GC and AT content percentage for Sequence Z.
3. Display melting temperature for Sequence Z using Wallace method.
4. Display molecular weight for Sequence Z.


In [None]:
# Plotting (bar or pie) the number of A and C bases in sequence z.

A_count = seq_z.count("A")
C_count = seq_z.count("C")

from matplotlib import pyplot as plt

plt.bar(["A", "C"], [A_count, C_count], color=["orange", "blue"])
plt.grid(True)
plt.show()

# plt.pie([A_count, C_count], labels=["A", "C"], colors=["orange", "blue"])
# plt.show()


In [None]:
# Print out the percentage of GC and AT contents in sequence z.

# from Bio.SeqUtils import GC -> Lab PC BINUS
from Bio.SeqUtils import gc_fraction

GC_content = gc_fraction(seq_z)
print("Percentage of GC content in sequence z:", GC_content*100, "%")
AT_content = 1 - GC_content
print("Percentage of AT content in sequence z:", AT_content*100, "%")

In [None]:
# Print out the melting temperature of sequence z using the Wallace method.

from Bio.SeqUtils import MeltingTemp as MT

MT_content = MT.Tm_Wallace(seq_z)

print("Melting Temperature of sequence z using the Wallace method:", MT_content, "°C")

In [None]:
# Print out the molecular weight of sequence z.

from Bio.SeqUtils import molecular_weight as MW

MW_content = MW(seq_z)

print("Molecular Weight of sequence z:", MW_content, "g/mol")

# DNA and mRNA Protein Synthesis:
1. Transcribe and display the DNA sequence Z into mRNA Sequence.
2. Translate and display the mRNA sequence Z into amino acids Sequence.

In [None]:
# Transcribe sequence z to mRNA and translate it to amino acid sequence.

print("DNA sequence of sequence z:", seq_z)

mRNA_seq = seq_z.transcribe()
print("mRNA sequence of sequence z:", mRNA_seq)

protein_seq = mRNA_seq.translate()
print("Amino acid sequence of sequence z:", protein_seq)

from Bio.SeqUtils import seq1, seq3

print("Amino acid sequence of sequence z in 1-letter code:", seq1(protein_seq))
print("Amino acid sequence of sequence z in 3-letter code:", seq3(protein_seq))

# Sequence Alginment & Similarities:
1. Perform local alignment pairwise between sequence X and sequence Y and display the alignment score.
2. Perform global alignment pairwise between sequence Y and sequence Z and display the alignment score
3. Find and display the Hamming and Levenshtein distance between Sequence X and Y
