# Code reorganization
Using notebooks has lots of good sides. But notebooks [have downsides as well](https://docs.google.com/presentation/d/1n2RlMdmv1p25Xy5thJUhkKGvjtV-dkAIsUXP-AL4ffI/edit). In short:
- Encourage bad conding practice (copy-paste, global variables, low reusability)
- Hard to manage (hidden states & gloabal variables)
- Hard to debug

**DRY** - **D**on't **R**epeat **Y**ourself  
Rule of three -- [when similar code is used three times, it should be extracted into a new procedure](https://en.wikipedia.org/wiki/Rule_of_three_(computer_programming))

What to do:
- Make function containing often used functionality (eg., conputing statistics, data transforms, models, etc)
- Put those function into the file
- Use the functions, improve and update them

In [1]:
%load_ext autoreload
%autoreload 2

### Function

In [2]:
def sqr(x):
    return x*x

print("10 ^ 2 =", sqr(10))
print("1234567890 ^ 2 =", sqr(1234567890))

10 ^ 2 = 100
1234567890 ^ 2 = 1524157875019052100


In [3]:
import math

def sqrt(x):
    if x >= 0:
        return math.sqrt(x)
    else:
        return None
    
print("sqrt(10) =", sqrt(10))
print("sqrt(-1) =", sqrt(-1))
# print("sqrt(-1) =", math.sqrt(-1)) # this would give an error

sqrt(10) = 3.1622776601683795
sqrt(-1) = None


In [4]:
def mean(xs):
    res = 0
    for x in xs:
        res += x
    res /= len(xs)
    return res

mean([1,2,3,4,5])

3.0

In [5]:
import random

def generate_gaussian_vector(size, mu, sigma):
    res = [0] * size
    for k in range(size):
        res[k] = random.gauss(mu, sigma)
    return res

random.seed(42)
generate_gaussian_vector(10, 0, 1)

[-0.14409032957792836,
 -0.1729036003315193,
 -0.11131586156766247,
 0.7019837250988631,
 -0.12758828378288709,
 -1.4973534143409575,
 0.33231834406771527,
 -0.2673374784971682,
 -0.21695868414519504,
 0.11588478670085507]

In [6]:
def generate_gaussian_vector(size, mu=0, sigma=1):
    res = [0] * size
    for k in range(size):
        res[k] = random.gauss(mu, sigma)
    return res

random.seed(42)
generate_gaussian_vector(10)

[-0.14409032957792836,
 -0.1729036003315193,
 -0.11131586156766247,
 0.7019837250988631,
 -0.12758828378288709,
 -1.4973534143409575,
 0.33231834406771527,
 -0.2673374784971682,
 -0.21695868414519504,
 0.11588478670085507]

### Import file

In [7]:
import utils

In [8]:
random.seed(42)
print("12 ^ 2 =", utils.sqr(12))
print("random gaussian vector:", utils.generate_gaussian_vector(10))

12 ^ 2 = 144
random gaussian vector: [-0.14409032957792836, -0.1729036003315193, -0.11131586156766247, 0.7019837250988631, -0.12758828378288709, -1.4973534143409575, 0.33231834406771527, -0.2673374784971682, -0.21695868414519504, 0.11588478670085507]


### List comprehensions

In [9]:
# "old" way to generate array of random numbers from previous lecture
random.seed(42)
size = 10
xs = [0] * size
for k in range(size):
    xs[k] = random.random()
xs

[0.6394267984578837,
 0.025010755222666936,
 0.27502931836911926,
 0.22321073814882275,
 0.7364712141640124,
 0.6766994874229113,
 0.8921795677048454,
 0.08693883262941615,
 0.4219218196852704,
 0.029797219438070344]

In [10]:
# with comprehension
random.seed(42)
size=10
xs = [random.random() for _ in range(size)]
xs

[0.6394267984578837,
 0.025010755222666936,
 0.27502931836911926,
 0.22321073814882275,
 0.7364712141640124,
 0.6766994874229113,
 0.8921795677048454,
 0.08693883262941615,
 0.4219218196852704,
 0.029797219438070344]

In [11]:
# make random categorical variable
cats = [random.choice(["a", "b", "c", "d"]) for _ in range(size)]
cats

['b', 'b', 'a', 'b', 'd', 'b', 'd', 'c', 'a', 'b']

In [12]:
# compare with list(range(1, 11))
[k+1 for k in range(10)]

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [13]:
list(range(1, 11))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [14]:
# compare with list(range(0, 10, 2))
[k for k in range(10) if k % 2 == 0]

[0, 2, 4, 6, 8]

In [15]:
list(range(0, 10, 2))

[0, 2, 4, 6, 8]

In [16]:
random.seed(42)

size = 5
xs1 = [random.random() for _ in range(size)]
xs2 = [random.random() for _ in range(size)]
diffs = [x1-x2 for x1, x2 in zip(xs1, xs2)]

print("xs1:", xs1)
print("xs2:", xs2)
print("diffs:", diffs)

xs1: [0.6394267984578837, 0.025010755222666936, 0.27502931836911926, 0.22321073814882275, 0.7364712141640124]
xs2: [0.6766994874229113, 0.8921795677048454, 0.08693883262941615, 0.4219218196852704, 0.029797219438070344]
diffs: [-0.037272688965027556, -0.8671688124821785, 0.1880904857397031, -0.19871108153644768, 0.7066739947259421]


In [17]:
[xs1[k] - xs2[k] for k in range(size)]  # sampe as using zip

[-0.037272688965027556,
 -0.8671688124821785,
 0.1880904857397031,
 -0.19871108153644768,
 0.7066739947259421]

In [18]:
# nicer output
print("xs1:", ["{:.4f}".format(x1) for x1 in xs1])
print("xs2:", ["{:.4f}".format(x2) for x2 in xs2])
print("diffs:", ["{:.4f}".format(diff) for diff in diffs])

xs1: ['0.6394', '0.0250', '0.2750', '0.2232', '0.7365']
xs2: ['0.6767', '0.8922', '0.0869', '0.4219', '0.0298']
diffs: ['-0.0373', '-0.8672', '0.1881', '-0.1987', '0.7067']


In [19]:
# conditional comprehension
x1_less_than_x2 = ["{:.4f}".format(x1) for x1, x2 in zip(xs1, xs2) if x1 < x2]
x1_less_than_x2

['0.6394', '0.0250', '0.2232']

In [20]:
# ex. write list comprehension to compute mean values of the vector and compare with utils.mean
xs = utils.generate_gaussian_vector(10)

print(mean([x for x in xs]))
print(utils.mean(xs))

0.19027727304636768
0.19027727304636768


In [21]:
# ex. write list comprehension to compute mean squared error.
random.seed(42)
size=10

gt = [random.choice([0, 1]) for _ in range(size)]
pred = [random.random() for _ in range(size)]

# list comprehension
mse = mean([(gt_item - pred_item) ** 2 for gt_item, pred_item in zip(gt, pred)])
print(mse)

# iteration
mse = 0
for k in range(size):
    mse += (gt[k] - pred[k]) ** 2
mse /= size
print(mse)

0.192538980857274
0.192538980857274


In [22]:
# ex. write list comprehension to compute mean absolute error.

# list comprehension
mae = mean([abs(gt_item - pred_item) for gt_item, pred_item in zip(gt, pred)])
print(mae)

mae = 0
for k in range(size):
    mae += abs(gt[k] - pred[k])
mae /= size
print(mae)

0.3603884839734178
0.3603884839734178
