In [6]:
#IN-CHAPTER EXERCISES: WRITING OUR OWN FUNCTIONS

#This is how you define a function to calculate AT content in a DNA sequence:
def get_at_content(dna):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return at_content
print(get_at_content('ACGTAGTCGTAGTCGATGCTAGTCGTAGCT'))
print(get_at_content('atcgatgctgatcgtagctgatgtgatcgtcgat'))
#Just like in loops, any variables defined within the function only exist there, not outside it in the rest of the code
#We can also include other inputs, such as if we want specific significant figures:
def get_at_content(dna, sig_figs = 2):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    return round(at_content, sig_figs)
print(get_at_content('ACGTAGTCGTAGTCGATGCTAGTCGTAGCT'))
print(get_at_content('ACGTAGTCGTAGTCGATGCTAGTCGTAGCT', 5))
#Printing also can work like this, like in R:
print(get_at_content(dna = 'ACGTAGTCGTAGTCGATGCTAGTCGTAGCT', sig_figs = 3))
#Since we made sig_figs equal 2 in the function definition, this makes a DEFAULT of 2 sig figs,
#which is what the function will round to if we don't define other sig figs, as I do in the
#second print statement
#Functions don't always have to return a value, they can just print something. But that makes
#them less versatile:
def print_at_content(dna):
    length = len(dna)
    a_count = dna.upper().count('A')
    t_count = dna.upper().count('T')
    at_content = (a_count + t_count) / length
    print(str(round(at_content, 2)))
print_at_content('ATGCTAGTCGTAGTCGATGTCGAT')
#This is how you test a function:
assert get_at_content('ACGTAGTCGTAGTCGATGCTAGTCGTAGCT', 5) == 0.5
#It won't produce output if correct, but it will give you an error message if incorrect:
#assert get_at_content('ACGTAGTCGTAGTCGATGCTAGTCGTAGCT', 5) == 0.2


0.5
0.5294117647058824
0.5
0.5
0.5
0.54


In [7]:
#END-OF-CHAPTER EXERCISES

In [8]:
#Percentage of amino acid residues, part one
def my_function(protein_sequence, amino_acid_residue):
    length = len(protein_sequence)
    protein_sequence = protein_sequence.upper()
    amino_acid_residue = amino_acid_residue.upper()
    letter_count = protein_sequence.count(amino_acid_residue)
    percent_amino = (letter_count / length) * 100
    return percent_amino
print(my_function("MSRSLLLRFLLFLLLLPPLP", "M"))
assert my_function("MSRSLLLRFLLFLLLLPPLP", "M") == 5
assert my_function("MSRSLLLRFLLFLLLLPPLP", "r") == 10
assert my_function("MSRSLLLRFLLFLLLLPPLP", "L") == 50
assert my_function("MSRSLLLRFLLFLLLLPPLP", "Y") == 0

5.0


In [9]:
#Percentage of amino acid residues, part two
def my_function2(protein_sequence, amino_acid_residue_list = ['A', 'I', 'L', 'M', 'F', 'W', 'Y', 'V']):
    length = len(protein_sequence)
    protein_sequence = protein_sequence.upper()
    letters_count_list = []
    for letter in amino_acid_residue_list:
        letter = letter.upper()
        letters_count = protein_sequence.count(letter)
        letters_count_list.append(letters_count)
    sum_amino = sum(letters_count_list)
    percent_amino = (sum_amino / length) * 100
    return round(percent_amino, 0)
print(my_function2("MSRSLLLRFLLFLLLLPPLP", ['M', 'L']))
assert my_function2("MSRSLLLRFLLFLLLLPPLP", ["M"]) == 5
assert my_function2("MSRSLLLRFLLFLLLLPPLP", ['M', 'L']) == 55
assert my_function2("MSRSLLLRFLLFLLLLPPLP", ['F', 'S', 'L']) == 70
assert my_function2("MSRSLLLRFLLFLLLLPPLP") == 65

55.0
