# Programming Exercises 10.26.22
<br>

## Exercise 1.
Consider 100 randomly mutated sequences. Index them from 0 to 99.<br>
Select only those with at least **2 deletions** and **GC% > 0.45** using higher order functions.<br><br>
*Return their index and the GC% value.*<br><br><br>

In [1]:
# Importing required functions

import random as r


def random_human_dna():
    '''This function takes no arguments.
    When called, it returns a random human DNA string of length between 5 and 50 nt'''
    random_dna_string = "".join(r.choices(["A", "C", "G", "T"],
                                            weights = [29, 21, 21, 29],
                                            k = r.randint(5, 50)))
    return random_dna_string


def mut(seq):
    '''This function takes as input a DNA sequence `seq`.
    It returns the DNA string containing three random mutations in three random positions:
    either substitution, insertion (shown in lowercase letters) or deletion (marked with a dash)'''
    for i in range(3):
        # generating a single mutation
        mut_pos = r.randint(0, len(seq) - 1)
        mut_type = r.randint(0, 2)
        if mut_type == 0:
            # substitution
            replacement = r.choice(["a", "c", "g", "t"])
            seq = seq[:mut_pos] + replacement + seq[mut_pos + 1:]
        elif mut_type == 1:
            # insertion
            inserted_elem = r.choice(["a", "c", "g", "t"])
            seq= seq[:mut_pos] + inserted_elem + seq[mut_pos:]
        else:
            # deletion
            seq = seq[:mut_pos] + "-" + seq[mut_pos + 1:]
    return seq

<br><br><br>

In [2]:
# Defining some functions to check whether a DNA sequence has at least 2 deletions and a GC% higher than 45%


def dels(seq):
    '''This function takes as input a DNA sequence `seq`.
    
    It returns the same DNA sequence if it contains at least two deletions (dashes);
    Otherwise, it returns an empty string
    '''
    count = 0
    for nuc in seq:
        if nuc == '-':
            count += 1
    if count >= 2:
        return seq
    else:
        return ''

def gc(seq):
    '''This function takes as input a DNA sequence `seq`.
    If it is non-empty, it returns the GC% content of the sequence.
    '''
    if len(seq) > 0:
        gc_count = 0
        dash_count = 0
        for c in seq:
            if c == 'G' or c == 'C':
                gc_count += 1
            elif c == '-':
                dash_count += 1
        return round(gc_count/(len(seq) - dash_count), 3)
    
def checkgc(seq):
    '''This function takes as input a DNA sequence `seq`.
    If calls the `gc` function in order to compute its GC%.
    If the GC% is higher than 45%, it returns the same DNA sequence.
    Otherwise, it returns an empty string.
    '''
    if len(seq) > 0 and gc(seq) > 0.45:
        return seq
    else:
        return ''

<br><br><br>

In [3]:
# Performing the task

l = []  
for i in range(100):
    dna_seq = random_human_dna()
    mut_sequence = mut(dna_seq)
    l.append(mut_sequence)
    l = list(map(dels, l))
    l = list(map(checkgc, l))
    if len(l[i]) > 0:
        print(f'''\nDNA Sequence:\t\t{dna_seq}
Mutated Sequence:\t{mut_sequence}\n
\t\t\tIndex: {i}\tGC%: {gc(mut_sequence)}\n''', '─' * 75)


DNA Sequence:		ACTGGAACTAGACGAAGTCGGATGGGGGG
Mutated Sequence:	ACTGGAACTAGA-GAAGTCGGA-GGGGG-

			Index: 37	GC%: 0.577
 ───────────────────────────────────────────────────────────────────────────

DNA Sequence:		GAGGTAACAGGTATCCAGTCACAG
Mutated Sequence:	G-GGTAACAGGTATtCAGT-ACAG

			Index: 39	GC%: 0.455
 ───────────────────────────────────────────────────────────────────────────

DNA Sequence:		TATAAGAGACCGTTTACGCGGGATTGTGAGATCGCCG
Mutated Sequence:	TATAA-AGACCGTTTACGC-GGATTGTGAGATCcGCCG

			Index: 64	GC%: 0.472
 ───────────────────────────────────────────────────────────────────────────

DNA Sequence:		GGGGCTTTCGTTCGGGAGACCAGGGCGCATCGC
Mutated Sequence:	-GGGCTTTCGcTCGGGAGACCAGGGC-CATCGC

			Index: 89	GC%: 0.677
 ───────────────────────────────────────────────────────────────────────────
