# Functions

In [None]:
import random

def some_func():
    print(random.choice(['A', 'C', 'G', 'T']))
    
for _ in range(5):
    some_func()

In [None]:
def f(a):
    return a ** 2
    
b = f(5.5)
print(b)

In [None]:
def func(arg1, arg2):
    return (arg1 + 2 * arg2) / 3
    
print(func(3, 6))
print(func(13, 13))

In [None]:
# Providing argument values by position vs. by keyword
print(func(1, arg2 = 2))
print(func(arg2 = 2, arg1 = 1))

In [None]:
#print(func( arg1 = 13, 13))

In [None]:
# Default value

def duplicate(string, number = 2):
    return string * number
    
print(duplicate('foo'))
print(duplicate('foo', 2))
print(duplicate('foo', number = 2))
print(duplicate('foo', 5))

In [None]:
def g(a, b = 2, c):
    return a + b + c

In [None]:
# In Python, all functions return something (at least none, even if we do not explicitly return it)
# It's better to explicitly return None, but this would work as well...

def calc(x, y):
    if x > y:
        return x - y
        
print(calc(5, 3))
print(calc(3, 5))

In [None]:
# Like everything else, functions are objects
f = calc
print(f(5, 3))
print(type(f))

In [None]:
# Functions can also be arguments of other functions

def convert_seq(seq, convert_letter_function):

    converted_seq = []
    
    for letter in seq:
        converted_seq.append(convert_letter_function(letter))

    return ''.join(converted_seq)
    
def dna_nt_to_rna_nt(nt):
    if nt == 'T':
        return 'U'
    else:
        return nt
    
def rna_nt_to_dna_nt(nt):
    if nt == 'U':
        return 'T'
    else:
        return nt
    
print(convert_seq('ATTCGA', dna_nt_to_rna_nt))
print(convert_seq('UUUAGU', rna_nt_to_dna_nt))

In [None]:
# Can have functions within functions
# Helper function is only available within the context of the outer function

def dna_to_rna(dna_seq):

    def convert_nt(nt):
        if nt == 'T':
            return 'U'
        else:
            return nt
            
    rna_seq = ''
            
    for nt in dna_seq:
        rna_seq += convert_nt(nt)
        
    return rna_seq
    
print(dna_to_rna('AAAGAGAGAATGTTGC'))

In [None]:
# Global vs. local variables

x = 5

def f():
    x = 3
    # x = 3
    return x + 1
    
print(f())
print(x)

In [None]:
# local variables have a "priority" within a function
# var1 = var1 + var2 -> var1 doesn't exist at this point
var1, var2 = 5, 6

def f():
    var1 += var2
    
f()

In [None]:
# it is not recommended for functions to change global variables, but you can still do it

def f2():
    global var1, var2
    var1 += var2
    
f2()
print(var1)

In [None]:
# 'global' is required only when changing the actual variable
# append doesn't modify the global variable, it just extends it in memory, but x still retains the same id
# += changes the original global variable
# it is recommended to state global x for f1 as well, for clarity

x = []

def f1():
    x.append(5)
    
def f2(x=None):
    # global x
    x += [6]
    
print(x)
f1()
print(x)
f2()
print(x)

# Builtin functions

In [None]:
print(sum(range(5)))
print(max([1, 5, 6, 2]))
print(min(1, 5, 6, 2))
print(abs(-5))

In [None]:
# dir will give you all of the attributes associated with this object type

s = 'string'
dir(s)

In [None]:
# dir alone gives you everything defined within the scope of the current environment

dir()

In [None]:
# dir on __builtin__ returns all of the available built in stuff (for the current environment)

dir(__builtin__)

In [None]:
for s in dir(__builtin__):
    if 'set' in s.lower():
        print(s)

In [None]:
# you can find info about any function...

help(reversed)

In [None]:
sum?

In [None]:
str.lower?

# Modules

In [None]:
import random
print(random.random())

In [None]:
# like everything else in Python, modules are also an object

rrr1 = random
print(rrr1.random())

In [None]:
# this is the recommended syntax:

import random as rrr2
print(rrr2.random())

In [None]:
from random import randint
print(randint(0, 10))

from random import randint, choice as c
print(c('acde'))

from random import * # Not recommended!
print(random())

In [None]:
import sys
print(sys.version)
print('*' * 20)
print(sys.path) # a list of directories which Python looks at (in this order) when importing stuff

In [None]:
import math
print(math.exp(2))
print(math.e ** 2)
print(math.pi)
print(math.sin(math.pi / 6))
print(math.log(100))
print(math.log10(100))
print(math.log2(100))
print(math.log(81) / math.log(3))

# Useful bound functions

In [None]:
string = 'The sea was wet as wet could be, the sands were dry as dry.'

print(string.upper())
print(string.lower())

In [None]:
# there is a difference between .find and .index; usage depends on your needs

print(string.find('the'))
print(string.index('the'))

print(string.find('seas'))
print(string.index('seas'))

In [None]:
print(string.startswith('The sea'))
print(string.startswith('The seas'))
print(string.endswith('dry'))

In [None]:
print(string.replace('as', 'more than'))
print(string.replace(' as ', ' more than '))

In [None]:
print(string.split(' '))
print(string.split())
print(string.split(' as '))

In [None]:
long_string = \
'''The sea was wet as wet could be,
The sands were dry as dry.
You could not see a cloud, because
No cloud was in the sky:
No birds were flying overhead--
There were no birds to fly.'''

print(long_string.splitlines())
print(long_string.split('\n'))

In [None]:
print('123456'.isdigit()) # True
print('A123456'.isdigit()) # False
print('123 456'.isdigit()) # False

print('AaBbCc'.isalpha()) # True
print('A123456'.isalpha()) # False
print('A '.isalpha()) # False

print('123 abc !'.islower()) # True
print('123 abc A'.islower()) # False

print('123 ABC !'.isupper()) # True
print('123 ABC a'.isupper()) # False

print(' \t\r\n'.isspace()) # True
print(' \t\r\n1'.isspace()) # False

In [None]:
# strip (only) from both sides (not from the middle)

print('   GAAGACT   \t\r\n'.strip())
print('NNGATGCGNNAGATGGGTNNNN'.strip('N'))

In [None]:
codons = ['ATG', 'TGT', 'AGG', 'GAA', 'TGT', 'ATG', 'ATC', 'TAG']
print(codons)

print(codons.count('TGT'))

print(codons.pop())
print(codons)

print(codons.pop(1))
print(codons)

In [None]:
codons = ['ATG', 'TGT', 'AGG', 'GAA', 'TGT', 'ATG', 'ATC', 'TAG']
aa_seq = 'MCRECMI*'

codon_and_aa = list(zip(codons, aa_seq))
print(codon_and_aa)

codon_table = dict(codon_and_aa)
print(codon_table)

In [None]:
print(list(zip(codons, aa_seq[:-1])))
print(list(zip(range(5), ['zero', 'one', 'two', 'three', 'four'], [None, None, True, True, False])))

In [None]:
print(list(enumerate(codons)))

In [None]:
for i, codon in enumerate(codons):
    print('Codon %d: %s' % (i, codon))

In [None]:
for i, codon_and_aa in enumerate(zip(codons, aa_seq)):
    codon, aa = codon_and_aa
    print('Codon %d: %s (%s)' % (i, codon, aa))

In [None]:
a, (b, c) = [1, [2, 3]]
print('a = %d, b = %d, c = %d' % (a, b, c))

In [None]:
for i, (codon, aa) in enumerate(zip(codons, aa_seq)):
    print('Codon %d: %s (%s)' % (i, codon, aa))

# Sorting

In [None]:
values = [1.0, 3.0, -1.2, 5.61, 2.11, 0.002, -2]
print(sorted(values))
print(sorted(values, reverse = True))

In [None]:
print(values)
values.sort()
print(values)

In [None]:
print(sorted(values, key = abs))
print(sorted(values, key = abs, reverse = True))

In [None]:
tuples = [(1, 2), (-1, 3), (0, 2), (5, 0), (3, 4), (0, -1)]
print(sorted(tuples))

In [None]:
def get_value(pair):
    return pair[1]
    
print(sorted(tuples, key = get_value))

In [None]:
# if we want to sort it according to the first item, when the second ones are the same
def get_value(pair):
    return pair[1],pair[0]
    
print(sorted(tuples, key = get_value))

In [None]:
animals = ['dog', 'cat', 'mouse', 'elephant']
print(sorted(animals))
print(sorted(animals, key = len))

# Files

In [None]:
f = open(r'C:\Users\mirza\OneDrive\Desktop\Fakultet\Biological Data Analysis with Python\Code\2 BRCA1.txt', 'r')
print(f)
print('*' * 20)

content = f.read()
print(content)

In [None]:
print(f.read())

In [None]:
# Never forget to close a file!
f.close()

In [None]:
# Parsing is a common programming routine

meta = {}
exons = []

for line in content.splitlines():
    
    key, value = line.split('\t')
    
    if key == 'exon':
        start, end = value.split('..')
        exons.append((int(start), int(end)))
    else:
        meta[key] = value
        
        
# Print the parsed data to see we got it right

for key, value in meta.items():
    print('%s: %s' % (key, value))

print('*' * 20)
print('Exons:')
print(exons)

In [None]:
exon_lengths = []

for start, end in exons:
    exon_lengths += [end - start + 1] # 1-based numbering stops without the inclusion of the last number (by default)
                                      # 0-based doesn't have that "problem"  
    
print('Exons: %d' % len(exons))
print('Min exon length: %d' % min(exon_lengths))
print('Max exon length: %d' % max(exon_lengths))
print('Average exon length: %.2f' % (sum(exon_lengths) / len(exons)))

In [None]:
f = open(r'c://downloads/exon_lengths.txt', 'w')
f.write(str(exon_lengths)[1:-1])
f.close()

# More useful modules

In [None]:
import os

print(os.path.join(r'C://temp/dir/', 'something/file.txt'))
print(os.listdir(r'C://'))

In [None]:
from collections import Counter, defaultdict

In [None]:
aa_seq = 'MQAEQTRCAAARGSAEMESLWHAAPGDEEIPLHPPPTPGAMSLESDSSLDTLAEKIECDLMDLLGDMGPPCDIDEEEDQLFAEALPPLYS'
aa_count = Counter()

print(aa_count)

for aa in aa_seq:
    aa_count[aa] += 1
    
print(aa_count)

In [None]:
print(Counter(aa_seq))

In [None]:
aa_count['A'] += 1
print(aa_count)

aa_count['A'] += 3
print(aa_count)

In [None]:
print(aa_count['W'])
print(aa_count['*']) # Doesn't exist in the counter.

In [None]:
for aa, count in aa_count.most_common()[:5]:
    print('%s: %d' % (aa, count))

In [None]:
aa_count = Counter(aa_seq)
print(aa_count)
aa_count.update('AALLL')
print(aa_count)

In [None]:
print(dict(aa_count))

In [None]:
print(Counter('the sea was wet as wet could be the sands were dry as dry'.split()))

In [None]:
aa_positions = defaultdict(list)

for i, aa in enumerate(aa_seq):
    aa_positions[aa].append(i)
    
print(aa_positions)
print('*' * 20)
print(aa_positions['Z'])

In [None]:
print(dict(aa_positions))

In [None]:
# mini homework: go through the code and understand it...

def create_empty_counter():
    return Counter()

next_aa_counter = defaultdict(create_empty_counter)

for i, aa in enumerate(aa_seq[:-1]):
    next_aa = aa_seq[i + 1]
    next_aa_counter[aa][next_aa] += 1

print(next_aa_counter)
print('*' * 20)
print(next_aa_counter['A'])
print(next_aa_counter['A']['E'])
print(next_aa_counter['Z']['E'])
print('*' * 20)
print(next_aa_counter)