In [2]:
#Identity & Similarity
#exact match between two sequences
#identyfy=(Number of exact matches /total position)
#identity=(6/8)*100=75%

sequence1='ATGCATAG'
sequence2='ATGAATGG'

if len(sequence1) != len(sequence2):
    raise ValueError("Sequences must be the same length")


matches = sum(1 for a, b in zip(sequence1, sequence2) if a == b)


identity = (matches / len(sequence1)) * 100

print(f"Exact matches:", matches)
print(f"Total positions:",len(sequence1))
print(f"Identity: {identity:.2f}%")


Exact matches: 6
Total positions: 8
Identity: 75.00%


In [12]:
seq1='ATGGCTGAACTG'
seq2='ATGGCCGAATTG'

def complementary_strand(dna_seq):
    complement_map=str.maketrans("ATCG","TAGC")
    complement=dna_seq.translate(complement_map)
    return complement

comp1=complementary_strand(seq1)
comp2=complementary_strand(seq2)

print("Original DNA_seq1:",seq1)
print("Complementary DNA:",comp1)
print("\nOriginal DNA_seq2:",seq2)
print("Complementary DNA:",comp2)



Original DNA_seq1: ATGGCTGAACTG
Complementary DNA: TACCGACTTGAC

Original DNA_seq2: ATGGCCGAATTG
Complementary DNA: TACCGGCTTAAC


In [14]:
def dna_to_mrna(complement_seq):
    mrna = complement_seq.replace("T", "U")
    return mrna

mrna1 = dna_to_mrna(comp1)
mrna2 = dna_to_mrna(comp2)

print("mRNA_1:", mrna1)
print("mRNA_2:", mrna2)


mRNA_1: UACCGACUUGAC
mRNA_2: UACCGGCUUAAC


In [15]:
def split_into_codons(mrna_seq):
    codons=[mrna_seq[i:i+3] for i in
        range(0,len(mrna_seq),3)
        if len(mrna_seq[i:i+3])==3]
    return codons

codons1=split_into_codons(mrna1)
codons2=split_into_codons(mrna2)

print("Codons_1:",codons1)
print("Codons_2:",codons2)


Codons_1: ['UAC', 'CGA', 'CUU', 'GAC']
Codons_2: ['UAC', 'CGG', 'CUU', 'AAC']


In [16]:
codon_table = {
    "UUU": "F", "UUC": "F",
    "UUA": "L", "UUG": "L", "CUU": "L", "CUC": "L", "CUA": "L", "CUG": "L",
    "AUU": "I", "AUC": "I", "AUA": "I",
    "AUG": "M",
    "GUU": "V", "GUC": "V", "GUA": "V", "GUG": "V",
    "UCU": "S", "UCC": "S", "UCA": "S", "UCG": "S", "AGU": "S", "AGC": "S",
    "CCU": "P", "CCC": "P", "CCA": "P", "CCG": "P",
    "ACU": "T", "ACC": "T", "ACA": "T", "ACG": "T",
    "GCU": "A", "GCC": "A", "GCA": "A", "GCG": "A",
    "UAU": "Y", "UAC": "Y",
    "CAU": "H", "CAC": "H",
    "CAA": "Q", "CAG": "Q",
    "AAU": "N", "AAC": "N",
    "AAA": "K", "AAG": "K",
    "GAU": "D", "GAC": "D",
    "GAA": "E", "GAG": "E",
    "UGU": "C", "UGC": "C",
    "UGG": "W",
    "CGU": "R", "CGC": "R", "CGA": "R", "CGG": "R", "AGA": "R", "AGG": "R",
    "GGU": "G", "GGC": "G", "GGA": "G", "GGG": "G",
    "UAA": "*", "UAG": "*", "UGA": "*"
}

def trancelate_to_amino_acids(codons):
    amino_acid="".join(codon_table.get(codon,"?")
                      for codon in codons)
    return amino_acid

amino_acid_seq_1=trancelate_to_amino_acids(codons1)
amino_acid_seq_2=trancelate_to_amino_acids(codons2)
print("Amino acid sequence_1:",amino_acid_seq_1)
print("Amino acid sequence_2:",amino_acid_seq_2)

Amino acid sequence_1: YRLD
Amino acid sequence_2: YRLN


In [17]:
#identity
if len(seq1) != len(seq2):
    raise ValueError("Sequences must be the same length")


matches = sum(1 for a, b in zip(seq1, seq2) if a == b)


identity = (matches / len(seq1)) * 100

print("Exact matches:", matches)
print("Total positions:",len(seq1))
print(f"Identity: {identity:.2f}%")

Exact matches: 10
Total positions: 12
Identity: 83.33%


In [18]:
#Similarity
if len(amino_acid_seq_1) != len(amino_acid_seq_2):
    raise ValueError("Sequences must be the same length")


matches = sum(1 for a, b in zip(amino_acid_seq_1, amino_acid_seq_2) if a == b)


similarity = (matches / len(amino_acid_seq_1)) * 100

print("Exact matches:", matches)
print("Total positions:",len(amino_acid_seq_1))
print(f"Similarity: {similarity:.2f}%")


Exact matches: 3
Total positions: 4
Similarity: 75.00%
