In [None]:
data = [(i-5) * (i-5) for i in range(10)]
print(data, " max: ", max(data), ", min: ", min(data))

# Must Know about Python!
- use collections!
- use lambdas!
- use functionals!

In [None]:
iterable = range(10)  # Creates an iterable that goes from 0 to 9
print(iterable)
# dir(iterable)  # Shows what you can do with it.

In [None]:
some_list = list(iterable)  # Converts to a list. Python does not use proper 'arrays' naturally.
print(some_list)
some_set = set(iterable)
print(some_set)
some_dict = {i: i*i for i in iterable}  # This is the "List Comprehension" syntax
print(some_dict)

In [None]:
# Lambdas - Functions can be treated as objects
print(lambda some_object: some_object)

In [None]:
x = lambda number: number + 1
y = x
print(y(1))

In [None]:
z = lambda number: x(x(number))
print(z(1))

In [None]:
print(list(map(z, iterable)))

In [None]:
# filter and is_divisible
def is_divisible_by(factor):
    return (lambda number: number % factor == 0)

even = is_divisible_by(2)
print(list(filter(even, iterable)))
odd = lambda number: not(even(number))
print(list(filter(odd, iterable)))

In [None]:
# map and multiply_by
def multiply_by(factor_to_multiply_by):
    return (lambda input_number: factor_to_multiply_by * input_number)

doubler = multiply_by(2)
print(list(map(doubler, some_list)))
tripler = multiply_by(3)
print(list(map(tripler, some_list)))

In [None]:
# (arg)min
print(min(range(10), key=lambda value: (value-5.2)*(value-5.2)))
# Python allows named parameters, like 'key' for when there are long lists of optional parameters.
# The lambda object created here is passed as the parameter called "key". See below for a clearer example.

In [None]:
data = [5, 0, 4, 7, -1]
print(min(data))

In [None]:
print(min(range(len(data)), key=(lambda index: data[index])))

___
# Numpy
Time to start thinking in terms of matrices and parallel data.  
There are a few ways to create matrices in Numpy:

In [None]:
import numpy as np

In [None]:
arr1 = np.array([1, 8, 2, 7])
print(arr1.shape)
print(arr1)

In [None]:
arr2 = np.arange(4)  # Note that this is "a. range", not "arrange". Short for "Array Range".
print(arr2.shape)
print(arr2)

In [None]:
arr3 = np.arange(20).reshape((4,5))
print(arr3.shape)
print(arr3)

In [None]:
arr4 = np.zeros((4,5))
print(arr4.shape)
print(arr4)

In [None]:
arr4_1 = arr4 + .5
print(arr4_1)
arr4_2 = arr4_1 * 2
print(arr4_2)

In [None]:
arr5 = np.ones((5,4))
print(arr5.shape)
print(arr5)

In [None]:
arr6 = np.identity(4)
print(arr6.shape)
print(arr6)

In [None]:
arr6[1,0] = 42 # We can do assignment to any indexed/sliced submatrix
print(arr6)

# Slicing
Numpy extends the indexing syntax of arrays to allow selection of contiguous submatrices, or even conditional selection.
Numpy also allows appending of matrices.

In [None]:
from sklearn import datasets
from matplotlib import pyplot as plt

In [None]:
iris = datasets.load_iris()
iris_data = iris.data
iris_labels = iris.target
print("Data: ", iris_data.shape)
print("Data Transposed: ", iris_data.T.shape)  # Note the .T used to get the transpose.
print("Labels: ", iris_labels.shape)

In [None]:
# https://docs.scipy.org/doc/numpy-1.13.0/user/basics.indexing.html

# Select only the second 2 features in the middle 50 rows:
subset_data = iris_data[50:100, 2:]
print(subset_data.shape)

In [None]:
plt.scatter(subset_data[:,0], subset_data[:,1], c=iris_labels[50:100])
plt.show()

(For those with too much time on your hands, you may notice only 36 points, which is not the 50 we sliced. In the iris dataset, measurements were only made to the nearest tenth of a centimeter, leading to some points having identical coordinates in this 2D subspace.)

In [None]:
moons_data, moons_labels = datasets.make_moons(n_samples=200, noise=.05)
print(moons_labels[:10])  # The points from the different classes are all mixed up.
plt.scatter(moons_data[:,0], moons_data[:,1], c=moons_labels)
plt.show()

In [None]:
# Let's filter for only one half-circle.  Start by appending labels to data.
print(moons_data.shape)
print(moons_labels.shape)
combined = np.append(moons_data, moons_labels, 1)  # 1 is the dimension along which the appending happens.  0 for rows, 1 for columns.
print (combined.shape)

In [None]:
print(moons_data.shape)
moons_labels = moons_labels.reshape(len(moons_labels), 1)
print(moons_labels.shape)
combined = np.append(moons_data, moons_labels, 1)  # 1 is the dimension along which the appending happens.  0 for rows, 1 for columns.
print (combined.shape)

In [None]:
condition = combined[:,2]==0  # This is subtle. First, we select the final column.  Then, an element-wise boolean operation is performed.
circle_0 = combined[condition]
plt.scatter(circle_0[:,0], circle_0[:,1], c=circle_0[:,2])
plt.show()

# Matrix Multiplication!

In [None]:
fib_mat = np.array([[1, 1], [1, 0]])
print(fib_mat)

In [None]:
print(fib_mat @ fib_mat)
print(fib_mat @ fib_mat @ fib_mat)
print(fib_mat @ fib_mat @ fib_mat @ fib_mat)

In [None]:
print(np.linalg.matrix_power(fib_mat, 5))

# Question:
If we were to write our own fibonacci computing function, what are some things we could do to make it as fast as possible?

In [None]:
def numpy_fib(n):
    return np.linalg.matrix_power(fib_mat, n-1)[0,0]

def by_hand_mat_mult(two_by_two_matrix, two_by_two_matrix_2):
    tbt = two_by_two_matrix
    tbt2 = two_by_two_matrix_2
    return [[tbt[0][0]*tbt2[0][0] + tbt[0][1] * tbt2[1][0], tbt[0][0]*tbt2[0][1] + tbt[0][1] * tbt2[1][1]],
            [tbt[1][0]*tbt2[0][0] + tbt[1][1] * tbt2[1][0], tbt[1][0]*tbt2[0][1] + tbt[1][1] * tbt2[1][1]]]

def by_hand_mat_exp(matr, n):
    if n<=1:
        return matr
    elif n%2==0:
        sub_mat = by_hand_mat_exp(matr, n/2)
        return by_hand_mat_mult(sub_mat, sub_mat)
    else:
        return by_hand_mat_mult(matr, by_hand_mat_exp(matr, n-1))

def by_hand_fib(n):
    return by_hand_mat_exp(fib_mat, n-1)[0][0]

In [None]:
print(numpy_fib(6))
print(by_hand_fib(6))

In [None]:
# Ignore this - just some timing and utilities:

# Series Statistics: Utilities
def mean_sd_se(list_of_values):
    mean = sum(list_of_values)/max(1, len(list_of_values))
    var = sum([(value - mean)*(value - mean) for value in list_of_values])/max(1, len(list_of_values))
    std_dev = math.sqrt(var)
    std_err = std_dev/math.sqrt(max(1, len(list_of_values)))
    return (mean, std_dev, std_err)

def series_statistics(list_of_lists):
    means = []
    std_devs = []
    std_errs = []
    for list_of_values in list_of_lists:
        (mean, sd, se) = mean_sd_se(list_of_values)
        means.append(mean)
        std_devs.append(sd)
        std_errs.append(se)
    return (means, std_devs, std_errs)

import timeit

In [None]:
# An experiment to compare the runtime of our implementation vs. the one-line numpy solution.

import math
timings = []
timings_2 = []
indices = [int(math.pow(2,i)) for i in range(1,20+1)]
for index in indices:
    print('index: ', index)
    times = []
    times_2 = []
    for repeat_index in range(50000):
        start_time = timeit.default_timer()
        val = numpy_fib(index)
        elapsed = timeit.default_timer() - start_time
        times.append(elapsed)
        start_time = timeit.default_timer()
        val_2 = by_hand_fib(index)
        elapsed = timeit.default_timer() - start_time
        times_2.append(elapsed)
    timings.append(times)
    timings_2.append(times_2)
(means, sds, ses) = series_statistics(timings)
(means_2, sds_2, ses_2) = series_statistics(timings_2)
plt.errorbar(indices, means, yerr=ses, label='numpy')
plt.errorbar(indices, means_2, yerr=ses_2, label='by_hand')
plt.legend()
plt.ylabel('seconds')
plt.xlabel('fibonacci index')
plt.show()

# Final Note
By default, Python only imports a module once; after that, future imports of the same module have no effect. This can catch you off guard if you are actively modifying a custom module that you import into a notebook. An easy fix is to restart the kernel when you modify an imported file. Alternately, `import importlib` and `importlib.reload(<package_name>)`. By doing most of your development in notebooks and then putting finished work in imported modules, you can have the best of both environments.

Another common issue is modifying one collection and having those changes reflected in a different one. This can happen when you have code like the _bottom_ cell below. This can be resolved by using the `copy` library from python (with `copy` or `deepcopy` functions), or by using `np.copy`.

For more common Python "gotcha's", check out http://docs.python-guide.org/en/latest/writing/gotchas/.

In [None]:
import importlib
importlib.reload(np)

In [None]:
npmat = np.arange(100).reshape((10,10)) + 1  # Creates table 1-100
npmat_same = npmat
npmat_different = np.copy(npmat)
npmat_extended_different = np.append(npmat, npmat[:,0].reshape((len(npmat), 1)), 1)  # Appending (more columns here), creates a new matrix.
npmat[5:9,5:9]=-1
print(npmat, "original")
print(npmat_same, "assignment")
print(npmat_different, "np.copy")
print(npmat_extended_different, "np.append")