In [1]:
#sequence1 = ATGCATAG 
#sequence2=ATGAATGG 
#exact match between two DNA sequence 
#identify = (Number of exact matches /Total position)*100 
#identify = (6/8)*100 = 75%

In [21]:
sequence1 = "ATGCATAG"
sequence2 = "ATGAATGG"

if len(sequence1) != len(sequence2):
    raise ValueError("Sequences must be of equal length")

exact_matches = sum(1 for a, b in zip(sequence1, sequence2) if a == b)

total_positions = len(sequence1)
identity_percentage = (exact_matches / total_positions) * 100

print(f"Exact matches: {exact_matches}")
print("Total position: ",total_positions)
print(f"Identity: {identity_percentage:.2f}%")


Exact matches: 6
Total position:  8
Identity: 75.00%


In [22]:
sequences1 = "ATGCATAG"
sequences2 = "ATGAATGG"

min_length = min(len(sequence1), len(sequence2))

exact_matches = 0
for i in range(min_length):
    if sequences1[i] == sequences2[i]:
        exact_matches += 1
        print(f"Position {i+1}: {sequences1[i]} == {sequences2[i]} true")
    else:
        print(f"Position {i+1}: {sequences1[i]} != {sequences2[i]} false")

identity_percentage = (exact_matches / min_length) * 100
print(f"\nExact matches: {exact_matches}")
print("Total position: ",min_length)
print(f"Identity: {identity_percentage:.2f}%")


Position 1: A == A true
Position 2: T == T true
Position 3: G == G true
Position 4: C != A false
Position 5: A == A true
Position 6: T == T true
Position 7: A != G false
Position 8: G == G true

Exact matches: 6
Total position:  8
Identity: 75.00%


In [28]:
#load both DNA sequence
DNA1 = "ATGGCTGAACTG"
DNA2 = 'ATGGCCGAATTG'

#perform transcription and translation using the codon table
def complementary_strand(dna_seq):
    complement_map = str.maketrans("ATGC","TACG")
    complement = dna_seq.translate(complement_map)
    return complement


complement1 = complementary_strand(DNA1)
complement2 = complementary_strand(DNA2)

#create mRNA sequence
def dna_to_mrna(dna_seq):
    mrna = dna_seq.replace("T","U")
    return mrna

mrna1 = dna_to_mrna(complement1)
mrna2 = dna_to_mrna(complement2)

def split_into_codons(mrna_seq):
    codons = [mrna_seq[i:i+3] for i in
             range(0,len(mrna_seq),3)
             if len(mrna_seq[i:i+3]) == 3]
    return codons

codons1 = split_into_codons(mrna1)
codons2 = split_into_codons(mrna2)

print("Original DNA sequence:       ", DNA1)
print("Complement sequence:         ", complement1)
print("mRNA sequence:       ", mrna1)
print("Codons: ", codons1)

print("\n")

print("Original DNA sequence:       ", DNA2)
print("Complement sequence:         ", complement2)
print("mRNA sequence:       ", mrna2)
print("Codons: ", codons2)

Original DNA sequence:        ATGGCTGAACTG
Complement sequence:          TACCGACTTGAC
mRNA sequence:        UACCGACUUGAC
Codons:  ['UAC', 'CGA', 'CUU', 'GAC']


Original DNA sequence:        ATGGCCGAATTG
Complement sequence:          TACCGGCTTAAC
mRNA sequence:        UACCGGCUUAAC
Codons:  ['UAC', 'CGG', 'CUU', 'AAC']


In [29]:
def translate_to_amino_acids(codons):
    amino_acids = "".join(codon_table.get(codon,"?") for codon in codons)
    return amino_acids

codon_table = {
    "UUU": "F", 'UUC': 'F', 'UUA': 'L', 'UUG':'L',
    'CUU': 'L', 'CUC': 'L', 'CUA': 'L', 'CUG': 'L',
    'AUU': 'I', 'AUC': 'I', 'AUA': 'I','AUG': 'M',
    'GUU': 'V', 'GUC': 'V', 'GUA': 'V', 'GUG': 'V',
    'UCU': 'S', 'UCC': 'S', 'UCA': 'S', 'UCG': 'S', 
    'AGU': 'S', 'AGC': 'S',
    'CCU': 'P', 'CCC': 'P', 'CCA': 'P', 'CCG': 'P',
    'ACU': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T',
    'GCU': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
    'UAU': 'Y', 'UAC': 'Y',
    'CAU': 'H', 'CAC': 'H',
    'CAA': 'Q', 'CAG': 'Q',
    'AAU': 'N', 'AAC': 'N',
    'AAA': 'K', 'AAG': 'K',
    'GAU': 'D', 'GAC': 'D',
    'GAA': 'E', 'GAG': 'E',
    'UGU': 'C', 'UGC': 'C',
    'UGG': 'W',
    'CGU': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'AGA': 'R', 'AGG': 'R',
    'GGU': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G',
    'UAA': 'Stop', 'UAG': 'Stop', 'UGA': 'Stop'
}

#Codons =  ['UAC', 'AGG', 'AAA', 'AAU', 'AGC']
amino_acid_seq1 = translate_to_amino_acids(codons1)
amino_acid_seq2= translate_to_amino_acids(codons2)
print("Amino acid sequence: ",amino_acid_seq1)
print("Amino acid sequence: ",amino_acid_seq2)

Amino acid sequence:  YRLD
Amino acid sequence:  YRLN


In [31]:
#calculate the identity percentage between the two DNA sequence
if len(mrna1) != len(mrna2):
    raise ValueError("Sequences must be of equal length")

exact_matches = sum(1 for a, b in zip(DNA1, DNA2) if a == b)

total_positions = len(DNA1)
identity_percentage = (exact_matches / total_positions) * 100

print(f"Exact matches: {exact_matches}")
print("Total position: ",total_positions)
print(f"Identity: {identity_percentage:.2f}%")

Exact matches: 10
Total position:  12
Identity: 83.33%


In [36]:
#Based on the amino acids find the similarity

if len(amino_acid_seq1) != len(amino_acid_seq2):
    raise ValueError("Sequences must be of equal length")

exact_matches = sum(1 for a, b in zip(amino_acid_seq1, amino_acid_seq2) if a == b)

total_positions = len(amino_acid_seq1)
similarity_percentage = (exact_matches / total_positions) * 100

print(f"Exact matches: {exact_matches}")
print("Total position: ",total_positions)
print(f"Similarity: {similarity_percentage:.2f}%")

Exact matches: 3
Total position:  4
Similarity: 75.00%
