# Counting DNA Nucleotides

In [2]:
def countDNANucleotides(DNAString):
  counter = {"A": 0, "C": 0, "G": 0, "T": 0}
  for nucleicAcid in DNAString: 
    counter[nucleicAcid] += 1
        
  return counter["A"], counter["C"], counter["G"], counter["T"]


In [3]:
sampleDNAString = "GTAGTACCACTCTAATTTTCTGTAGCGCATTGGTGGATAATAGGAGCTCCCTGATGCCATGGAAGCATTCCAAAGCCCCTCGTCCCATAAGTGGACAAGTCAGAATGTCCGGAGCATACTTCGCTGTTCAATGGCCGACAAGTCCGCCGAAGTCTGTTTAAGTCTCGTCACCAACCCCTTAGGACAAGCCCCGGTTCAATACTACCGGTCAGATCCAGAATCTCGACCTCCCGACCTCGCGTACACCCTGTATCTAGGTATCGTACCGCCGCCGGGCACGTTCTTTCCGGATGGGGACGAATGTATGAACATAAACGTCAGATCTAGGAAAATTGTACGATTAGGGCGACTTAATGCGAATCAATAGTTAGGCCCGCACTGCGTTTAAACTTCATTATCTATGTGACGTCATTATCGTTCCTTACGGATCGTAAACGTTTACACGCGGGCCGTGGGTCATAATGTAGCACGTCAAGCGCGAAGTTCTGTTATAAGGGAAACTACCCGTATGACTTTTTACGATGAAGCCTAAAAACCTTTTGGATCGGATGTAGGAGGTAGTTGGCATCCCCGAACAGACCTGGTTGCCCAAGACTACGCGACAATGGTAATCTGAGGTGACTGCTTATGGCATGGGACTTCGGTTTAGGTTTGACATTCAAACTTTACTAATTACTTAACCCGACCGGACTAACACAATCCATTAACGGCCCACCATGATTCCGGGGTTTTCACCTTACTGCCAAGAGATCCAATAGGTGTACGGTTATCGAATAGTTTACAAATCCGTTGTGCGGCTGGTCCGCGTACTGGCTGTTCCTGCTTGATTCTAAGCTGCCATGGAACTTTCGCCGCCTATGTTTCGC"
assert((215, 219, 201, 231) == countDNANucleotides(sampleDNAString))

# Transcribing DNA into RNA

In [6]:
def DNAtoRNA(DNAString):
    RNAString = ""
    for nucleicAcid in DNAString:
        if (nucleicAcid == "T"):
            RNAString += "U"
        else: 
            RNAString += nucleicAcid
    return RNAString

In [7]:
sampleDNAString = "GATGGAACTTGACTACGTAAATT"
assert("GAUGGAACUUGACUACGUAAAUU" == DNAtoRNA(sampleDNAString))

# Complementing a Strand of DNA 

In [8]:
def complementDNAStrand (DNAString):
    complementaryBases= {"A": "T", "T": "A", "C": "G", "G": "C"}
    complementaryDNAStrand = "" 
    for nucleicAcid in DNAString:
        complementaryDNAStrand += complementaryBases[nucleicAcid]

    return complementaryDNAStrand[::-1]

In [9]:
assert("ACCGGGTTTT" == complementDNAStrand("AAAACCCGGT"))

# Computing GC Content

In [17]:
def computeGCContentPercentage(NAString):
    GCcounter = 0
    for nucleicAcid in NAString:
        if nucleicAcid == "C":
            GCcounter += 1
        elif nucleicAcid == "G":
            GCcounter += 1
    return GCcounter / len(NAString) * 100

In [22]:
assert(computeGCContentPercentage("CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGACTGGGAACCTGCGGGCAGTAGGTGGAAT") == 60.91954022988506)

In [23]:
# Uncomment this line if you don't have biopython installed
# pip install biopython

def highestGCContent(file):
    from Bio import SeqIO
    maxGCContent = 0
    for record in SeqIO.parse(file, "fasta"):
        recordGC = computeGCContentPercentage(record.seq)
        if recordGC > maxGCContent:
            name = record.name
            maxGCContent = recordGC
    
    return name, maxGCContent


In [25]:
assert(highestGCContent("f.fasta") == ('Rosalind_0808', 60.91954022988506))

# Rabbits and Recurrence Relations

In [51]:
def rabbits(n, k):
    if n == 1:
        return 1
    if n == 2:
        return 1

    serie = [1, 1]
    for index in range(2, n):
        serie.append(serie[index - 2] * k + serie[index - 1])
    return serie[-1]


In [52]:
assert(rabbits(28, 4) == 66507086889)

# Counting Point Mutations

In [2]:
def calculateHammingDistance (DNAString1, DNAString2):
    Hammingcounter = 0
    for index in range(len(DNAString1)):
        if DNAString1[index] != DNAString2[index]:
            Hammingcounter += 1
    return Hammingcounter

In [6]:
assert(calculateHammingDistance("GAGCCTACTAACGGGAT", "CATCGTAATGACGGCCT") == 7)

# Mendel's First Law

In [20]:
def dominantAlleleProbabilityFromHomozygousDominant_Heterozygous_HomozygousRecessive(k, m, n):
    return (
        k * (k - 1) +
        2 * k * m + 
        2 * k * n + 
        m * (m - 1) * .75+
        2 * m * n * .5
    ) / (k + m + n) / (k + m + n - 1)
        

In [23]:
assert(dominantAlleleProbabilityFromHomozygousDominant_Heterozygous_HomozygousRecessive(26, 29, 24) == 0.7645245050308341)

# Translating RNA into Protein

In [9]:
 table = {
    'UUU': 'F', 'CUU': 'L', 'AUU': 'I', 'GUU': 'V',
    'UUC': 'F', 'CUC': 'L', 'AUC': 'I', 'GUC': 'V',
    'UUA': 'L', 'CUA': 'L', 'AUA': 'I', 'GUA': 'V',
    'UUG': 'L', 'CUG': 'L', 'AUG': 'M', 'GUG': 'V',
    'UCU': 'S', 'CCU': 'P', 'ACU': 'T', 'GCU': 'A',
    'UCC': 'S', 'CCC': 'P', 'ACC': 'T', 'GCC': 'A',
    'UCA': 'S', 'CCA': 'P', 'ACA': 'T', 'GCA': 'A',
    'UCG': 'S', 'CCG': 'P', 'ACG': 'T', 'GCG': 'A',
    'UAU': 'Y', 'CAU': 'H', 'AAU': 'N', 'GAU': 'D',
    'UAC': 'Y', 'CAC': 'H', 'AAC': 'N', 'GAC': 'D',
    'UAA': 'Stop', 'CAA': 'Q', 'AAA': 'K', 'GAA': 'E',
    'UAG': 'Stop', 'CAG': 'Q', 'AAG': 'K', 'GAG': 'E',
    'UGU': 'C', 'CGU': 'R', 'AGU': 'S', 'GGU': 'G',
    'UGC': 'C', 'CGC': 'R', 'AGC': 'S', 'GGC': 'G',
    'UGA': '', 'CGA': 'R', 'AGA': 'R', 'GGA': 'G',
    'UGG': 'W', 'CGG': 'R', 'AGG': 'R', 'GGG': 'G'
}

def mRNAtoAminoAcids(mRNAChain):
   from textwrap import wrap
   codons = wrap(mRNAChain, 3)
   assert(codons[0] == 'AUG')
   return ''.join([table[codon] for codon in codons])


In [11]:
assert('MAMAPRTEINSTRING', mRNAtoAminoAcids('AUGGCCAUGGCGCCCAGAACUGAGAUCAAUAGUACCCGUAUUAACGGGUGA'))

# Finding a Motif in DNA

In [35]:
def motifLocationFinder(DNAString, motif):
    motifLength = len(motif)
    motifLocations = ''
    for location in range(len(DNAString) - motifLength + 1): 
        if DNAString[location:location + motifLength] == motif:
            motifLocations += ' ' + str(location + 1)
    return motifLocations[1:]

In [37]:
assert(motifLocationFinder('GATATATGCATATA', 'ATAT') == '2 4 10')

# Consensus and Profile

In [None]:
def mostLikelyCommonAncestorFinder(DNAStrings):
    