In [None]:
import os

DIR = r'c://downloads'


# Recap

In [None]:
import csv

f = open(os.path.join(DIR, 'viperdb.csv'), 'r')
csv_reader = csv.reader(f)

viperdb_headers = next(csv_reader)
viperdb_records = [dict(zip(viperdb_headers, line)) for line in csv_reader]

f.close()

print(viperdb_records[:5])

In [None]:
avg_outer_radius = sum([int(record['Outer Radius']) for record in viperdb_records]) / len(viperdb_records)
print('Avg. outer radius = %d Angstrom' % avg_outer_radius)

# operator.itemgetter

In [None]:
from operator import itemgetter

In [None]:
# itemgetter(i) is equivalent to the [i] syntax.
get_2nd_item = itemgetter(1)
get_2nd_item('abc')

In [None]:
pairs = [(1, 6), (4, 2), (8, 1), (7, 6)]
print(sorted(pairs, key = lambda pair: pair[1]))
print(sorted(pairs, key = get_2nd_item))

In [None]:
# The [] syntax works with any object, and so does itemgetter.
# Here we see strings instead of integers:

sorted_viperdb_records1 = sorted(viperdb_records, key = lambda record: record['Genome'])
sorted_viperdb_records2 = sorted(viperdb_records, key = itemgetter('Genome'))
print(sorted_viperdb_records1 == sorted_viperdb_records2)

The `operator` module contains functional versions of all of Python's syntax. For example, `operator.lt(x, y)` is equivalent to `x < y`.

# itertools

In [None]:
# The 'itertools' module contains a lot of goodies for working with sequences and iterating over them that can be
# surprisingly useful.

from itertools import repeat, chain, cycle, product, permutations, groupby

In [None]:
A_10_times = repeat('A', 10)
print(A_10_times)
print(next(A_10_times))

In [None]:
print(list(repeat('A', 10)))

In [None]:
print(list(chain([1, 2, 3], [4, 5, 6], [7, 8])))

In [None]:
cyclic_counter = cycle(range(3))

for _ in range(10):
    print(next(cyclic_counter))

In [None]:
print(list(product('AB', 'CD')))

In [None]:
BASES = 'ACGT' # The convention is upper-case names for constants
print(list(product(BASES, BASES, BASES)))

In [None]:
all_codons = [''.join(codon) for codon in product(BASES, BASES, BASES)]
print(all_codons)

In [None]:
print(list(permutations(BASES, 3)))

In [None]:
for codon_prefix, codon_group in groupby(all_codons, lambda codon: codon[:2]):
    print(codon_prefix, list(codon_group))

In [None]:
# When using groupby, it's important to have the data sorted first!

for codon_prefix, codon_group in groupby(all_codons, lambda codon: codon.count('C') + codon.count('G')):
    print(codon_prefix, list(codon_group))

In [None]:
sorted_viperdb_records = sorted(viperdb_records, key = itemgetter('Genome'))

for genome_type, genome_group in groupby(sorted_viperdb_records, lambda record: record['Genome'].split()[0]):
    families = {record['Family'] for record in genome_group}
    print('%s (%d families):' % (genome_type, len(families)))
    print('\t' + '\n\t'.join(families))

# More one-liners

In [None]:
for i in [1, 2, 3, 5, 6, 9, 11]:
    if i % 2 == 0:
        print('%d is even' % i)
    else:
        print('%d is odd' % i)

In [None]:
for i in [1, 2, 3, 5, 6, 9, 11]:
    label = 'even' if i % 2 == 0 else 'odd'
    print('%d is %s' % (i, label))

In [None]:
# Can do it all in one line! 
for i in [1, 2, 3, 5, 6, 9, 11]: print('%d is %s' % (i, 'even' if i % 2 == 0 else 'odd'))

Please do not do it though. Always start a new line after a loop.

In [None]:
dna_seq = 'AATGCGATGCAGTGAGTAAGTCAAAAGTAA'
rna_seq = ''.join([('U' if nt == 'T' else nt) for nt in dna_seq])
print(rna_seq)

In [None]:
# It's considered a bad practice to do that.
a = 1; b = 2; print(a + b)

# However, can be useful for running from command-line, e.g: python -c "import sys; print(sys.version)"

# Do not abuse one-liners

In [None]:
# One-liners can often make your code more compact and readable, but sometimes it's better to use more than one line.
print([n for n in range(2, 100) if len([i for i in range(2, int(n ** 0.5) + 1) if n % i == 0]) == 0])

# \*args and \*\*kwargs

In [None]:
# Let's take a look at Python's max() function:
print(max([1, 2, 3]))
print(max(1, 2))
print(max(1, 2, 3, 4, 5))

In [None]:
# It seems to take a variable number of arguments - can we do that in our own functions?

def f(*args):
    # 'args' will be a tuple containing all the non-named arguments in order.
    print(args)

f(1, 2)
f(1, 2, 3)
f([1, 2, 3])
f()

In [None]:
# 'args' is just a tuple, so we can check its length, iterate and get values from it.

def my_max(*args):
    
    if len(args) == 1:
        values = args[0]
    else:
        values = args
    
    max_value = None
    
    for value in values:
        if max_value is None or max_value < value:
            max_value = value
    
    return max_value

print(my_max([1, 2, 3]))
print(my_max(1, 2))
print(my_max(1, 2, 3, 4, 5))

In [None]:
# *args can come after regular arguments. Here f can takes 2 or more arguments:

def f(a, b, *args):
    return [a * x + b for x in args]
    
print(f(2, 5, 10, 11, 12))

In [None]:
# *args should be at the end of the argument list.

def f(a, *args, b):
    return [a * x + b for x in args]
    
f(1, 2, 3, 4, 5)

In [None]:
# There is also an equivalent for keyword arguments. Consider the function str.format():
print('{who} expects the {nation} Inquisition!'.format(who = 'Nobody', nation = 'Spanish'))

In [None]:
# 'kwargs' will be a dictionary collecting all of the undeclared keyword arguments passed to the function:

def f(**kwargs):
    print(kwargs)

f(white = 'black', true = 'false', answer = 42)
f()

In [None]:
# Option 1

def f(dictionary):
    return ', '.join(['%s = %s' % (str(key), str(value)) for key, value in sorted(dictionary.items())])
    
print(f({'a': 1, 'b': 3}))


# Option 2

def f(**kwargs):
    return ', '.join(['%s = %s' % (str(key), str(value)) for key, value in sorted(kwargs.items())])
    
print(f(b = 3, a = 1))

In [None]:
# Also a legitimate way to define dictionaries.
dict(a = 1, b = 3)

In [None]:
def f(a, b = 0, **kwargs):
    return {key: a * value + b for key, value in kwargs.items()}
    
print(f(3, x = 5, y = 6))

In [None]:
# *args and **kwargs can be combined
def f(*args, **kwargs):
    return (args, kwargs)
    
print(f(1, 7, '8', aaa = 'bbb', ccc = 'ddd'))

In [None]:
# It also works the other way!

def f(a, b, c):
    return a * b + c
    
abc_args = [2, 3, 4]
print(f(*abc_args))

abc_kwargs = dict(a = 2, b = 3, c = 4)
print(f(**abc_kwargs))

In [None]:
def f(a, b, *args, **kwargs):
    return [a * x + b for x in args], {key: a * value + b for key, value in kwargs.items()}
    
print(f(10, *[1, 5, 8, 10], **{'cat': 6, 'dog': -6}))

In [None]:
# zip is the inverse of zip

x = list('abc')
y = list(range(3))
print(x, y)

zipped_xy = list(zip(x, y))
print(zipped_xy)

unzipped_xy = list(zip(*zipped_xy))
print(unzipped_xy)

unzipped_x, unzipped_y = map(list, zip(*zipped_xy))
print(unzipped_x == x, unzipped_y == y)