In [90]:
def avg_phred(qual_scores):
    """
    The function takes a character sequence of Illumina Quality scores
    and converts it into an average Quality Score for the sequence.
    """
    qual_scores = qual_scores.strip()
    N = len(qual_scores)
    Total = 0
    for char in qual_scores:
        phred_score = ord(char)-33
        Total += phred_score
    avg = Total/N
    return avg



In [102]:
assert avg_phred("!!!!!!!!") == 0
assert avg_phred("--------") == 12
assert avg_phred("IIII!!!!") == 20
assert avg_phred("IIIIIIII") == 40


In [103]:
def reverse_complement(sequence):
    """
    Takes a sequence of nucleotides (A,C,G,T) and returns 
    the reverse complement sequence.
    """
    reverse_sequence = sequence[::-1]
    comp_dict = {"A":"T","C":"G","G":"C","T":"A"}
    rev_comp = ""
    for nuc in reverse_sequence:
        rev_comp += comp_dict[nuc]
    return rev_comp



In [107]:
assert reverse_complement("AAAAAAAA") == "TTTTTTTT"
assert reverse_complement("ATCGATCG") == "CGATCGAT"
assert reverse_complement("TATATATA") == "TATATATA"
assert reverse_complement("ATGATGAT") == "ATCATCAT"

In [108]:
def hammax(a,b):
    """
    description: hammax takes 2 strings of equal length and returns
    their hamming distance.
    INPUT: 2 STRINGS
    OUTPUT: Hamming Distance <int>
    """
    if isinstance(a,str) != True:
        what = str(type(a))
        raise Exception("hammax takes STRINGS as input. Your 1st input was a {}.".format(what))
    if isinstance(b,str) != True:
        why = str(type(b))
        raise Exception("hammax takes STRINGS as input. Your 2nd input was a {}.".format(why))
    if len(a) != len(b):
        raise Exception("{} and {} must be of equal length".format(a,b))
    mismatch = 0
    match = 0
    x = len(a)
    for i in range(x):
        if a[i] == b[i]:
            continue
        else:
            mismatch+= 1
            continue
    return mismatch



In [111]:
assert hammax("aBc","AbC") == 3
assert hammax("AGT","AGT") == 0
assert hammax("TAT","AAA") == 2
assert hammax("ATT","ATG") == 1

In [120]:
def keepORnot(seq, a_set):
    """
    INPUT:  seq=A STRING
            a_set=AN ITERABLE OBJECT (set, list, dict, tuple) CONTAINING
                  A SET OF DISTINCT STRINGS.
    OUTPUT: RETURNS 
    """
    if isinstance(seq,str) != True:
        what = str(type(a))
        raise Exception("hammax takes a STRING as the first input. Your input was a {}.".format(what))
    if isinstance(a_set,set) != True:
        why = str(type(a_set))
        raise Exception("hammax takes a SET as the second input. Your input was a {}.".format(why))
    for s in a_set:
        if hammax(seq,s) < 2:
            return s
        else:
            continue
    return ""


In [121]:
setobj = set()
setobj.add("ACTGATGC")
setobj.add("CCCCGGGG")
setobj.add("IMTIRED!")

In [122]:
assert keepORnot("ACTGATGT",setobj) == "ACTGATGC"
assert keepORnot("CCCCGCGG",setobj) == "CCCCGGGG"
assert keepORnot("IMTIREE!", setobj) == "IMTIRED!"