LINEAR ALGEBRA

In [80]:
import numpy as np
from typing import List 
import math
Vector = List[float]
#Addition function -> sum of two vectors with same length
def add(v:Vector, w: Vector) -> Vector:
    assert len(v) == len(w)
    return [v_i + w_i for v_i, w_i in zip(v, w)]
#Subtraction function
def subtract(v: Vector, w: Vector) -> Vector:
    assert len(v) == len(w)
    return [v_i - w_i for v_i, w_i in zip(v, w)]
def vector_sum(vectors: List[Vector]) -> Vector:
    assert vectors
    num_elements = len(vectors[0])
    assert all(len(v) == num_elements for v in vectors), "different sizes!"
    return [sum(vector[i] for vector in vectors)
            for i in range(num_elements)]
#Scalar multiply 
def scalar_multiply(c: float, v: Vector) -> Vector:
    return [c * v_i for v_i in v]
#Vector mean
def vector_mean(vectors: List[Vector]) -> Vector:
    n = len(vectors)
    return scalar_multiply(1/n, vector_sum(vectors))
#dot function, product of two vectors.
def dot(v: Vector, w: Vector) -> float:
    assert len(v) == len(w)
    return sum(v_i * w_i for v_i, w_i in zip(v, w))
#sum of squares
def sum_of_squares(v: Vector) -> float:
    return dot(v, v)
#sqrt of sum of squares
import math
def magnitude(v: Vector) -> float:
    return math.sqrt(sum_of_squares(v))
#distance between two vectors
def squared_distance(v: Vector, w: Vector) -> float:
    return sum_of_squares(subtract(v, w))
def distance(v: Vector, w: Vector) -> float:
    return math.sqrt(squared_distance(v, w))

v = [1,2,3]
w = [4,5,6]
z = [[1, 2], [3, 4], [5, 6], [7, 8]]
c = 3
print("Addition :", add(v,w))
print("Subtraction :", subtract(v,w))
print("vector Sum :", vector_sum(z))
print("Scalar multiply :", scalar_multiply(c,v))
print("vector mean : ", vector_mean(z))
print("sum of product of two vectors : ", dot(v,w))
print("Magnitude :", magnitude(v))
print("sum of squares :", sum_of_squares(v))
print("squared distance :", squared_distance(v,w))
print("distance between two vectors : ", distance(v,w))


Addition : [5, 7, 9]
Subtraction : [-3, -3, -3]
vector Sum : [16, 20]
Scalar multiply : [3, 6, 9]
vector mean :  [4.0, 5.0]
sum of product of two vectors :  32
Magnitude : 3.7416573867739413
sum of squares : 14
squared distance : 27
distance between two vectors :  5.196152422706632


In [81]:
v = [1,2,3]
w = [4,5,6]
z = [[1, 2], [3, 4], [5, 6], [7, 8]]
c = 3

print("Addition :", np.ndarray.tolist(np.add(v,w)))
print("Subtraction :", np.ndarray.tolist(np.subtract(v,w)))
print("vector Sum :", np.ndarray.tolist(np.vstack(z).sum(axis=0)))
print("Scalar multiply :", np.ndarray.tolist(np.dot(c,v)))
print("Scalar multiply :", np.ndarray.tolist(np.multiply(c,v)))
print("vector mean : ", np.ndarray.tolist(np.vstack(z).mean(axis=0)))
print("sum of product of two vectors : ", np.dot(v,w))
print("Magnitude :", np.linalg.norm(v))
print("sum of squares :", np.dot(v,v))
print("squared distance :", np.dot(np.subtract(v,w),np.subtract(v,w)))
print("distance between two vectors : ", np.linalg.norm(np.subtract(v,w)))


Addition : [5, 7, 9]
Subtraction : [-3, -3, -3]
vector Sum : [16, 20]
Scalar multiply : [3, 6, 9]
Scalar multiply : [3, 6, 9]
vector mean :  [4.0, 5.0]
sum of product of two vectors :  32
Magnitude : 3.7416573867739413
sum of squares : 14
squared distance : 27
distance between two vectors :  5.196152422706632


STATISTICS

In [88]:
from collections import Counter
#Central tendency 
def mean(xs: List[float]) -> float:
    return sum(xs) / len(xs)
def _median_odd(xs: List[float]) -> float:
    return sorted(xs)[len(xs) // 2]
def _median_even(xs: List[float]) -> float:
    sorted_xs = sorted(xs)
    hi_midpoint = len(xs) // 2  # e.g. length 4 => hi_midpoint 2
    return (sorted_xs[hi_midpoint - 1] + sorted_xs[hi_midpoint]) / 2
def median(v: List[float]) -> float:
    return _median_even(v) if len(v) % 2 == 0 else _median_odd(v)
def quantile(xs: List[float], p: float) -> float:
    p_index = int(p * len(xs))
    return sorted(xs)[p_index]
def mode(x: List[float]) -> List[float]:
    counts = Counter(x)
    max_count = max(counts.values())
    return [x_i for x_i, count in counts.items()
            if count == max_count]

#Measure of dispersion
def data_range(xs: List[float]) -> float:
    return max(xs) - min(xs)
def de_mean(xs: List[float]) -> List[float]:
    x_bar = mean(xs)
    return [x - x_bar for x in xs]
def variance(xs: List[float]) -> float:
    assert len(xs) >= 2
    n = len(xs)
    deviations = de_mean(xs)
    return sum_of_squares(deviations) / (n - 1)
def standard_deviation(xs: List[float]) -> float:
    return math.sqrt(variance(xs))
def interquartile_range(xs: List[float]) -> float:
    return quantile(xs, 0.75) - quantile(xs, 0.25)

#Covariance
def covariance(xs: List[float], ys: List[float]) -> float:
    assert len(xs) == len(ys)
    return dot(de_mean(xs), de_mean(ys)) / (len(xs) - 1)

#Correlation
def correlation(xs: List[float], ys: List[float]) -> float:
    stdev_x = standard_deviation(xs)
    stdev_y = standard_deviation(ys)
    if stdev_x > 0 and stdev_y > 0:
        return covariance(xs, ys) / stdev_x / stdev_y
    else:
        return 0    # if no variation, correlation is zero

v = [1,2,3,4,5,6,7,8,8,9,9]
w = [4,5,6,10,20,1,3,2,1,4,9]
p = 0.75

print("Mean :", mean(v))
print("Median :", median(v))
print("Mode :", set(mode(v)))
print("quantile :", quantile(v,0.9))
print("range: ", data_range(v))
print("Variance : ", variance(v))
print("Standard Deviation :", standard_deviation(v))
print("Interquartile range : ", interquartile_range(v))
print("Covariance :", covariance(v,w))
print("Correlation :", correlation(v,w))


Mean : 5.636363636363637
Median : 6
Mode : {8, 9}
quantile : 9
range:  8
Variance :  8.054545454545455
Standard Deviation : 2.8380531098880892
Interquartile range :  5
Covariance : -2.6363636363636362
Correlation : -0.16822851365035074


In [98]:
v = [1,2,3,4,5,6,7,8,8,9,9]
w = [4,5,6,10,20,1,3,2,1,4,9]
p = 0.75

print("Mean :", np.mean(v))
print("Median :", np.median(v))
#print("Mode :", set(mode(v)))
#print("quantile :", np.quantile(v,0.9))
#print("range: ", data_range(v))
print("Variance : ", np.var(v, ddof =1))
print("Standard Deviation :", np.std(v, ddof=1))
#print("Interquartile range : ", interquartile_range(v))
print("Covariance :", np.cov(v,w))
print("Correlation :", np.corrcoef(v,w))


Mean : 5.636363636363637
Median : 6.0
Variance :  8.054545454545455
Standard Deviation : 2.8380531098880892
Covariance : [[ 8.05454545 -2.63636364]
 [-2.63636364 30.49090909]]
Correlation : [[ 1.         -0.16822851]
 [-0.16822851  1.        ]]


PROBABILITY

In [101]:
#Create a CDF Uniform - Pr rv less than or equal to certain value  
def uniform_cdf(x: float) -> float:
    """Returns the probability that a uniform random variable is <= x"""
    if x < 0:   return 0    # uniform random is never less than 0
    elif x < 1: return x    # e.g. P(X <= 0.4) = 0.4
    else:       return 1    # uniform random is always less than 1
import random
random.seed(100)
#normal pdf
SQRT_TWO_PI = math.sqrt(2 * math.pi)
def normal_pdf(x: float, mu: float = 0, sigma: float = 1) -> float:
    return (math.exp(-(x-mu) ** 2 / 2 / sigma ** 2) / (SQRT_TWO_PI * sigma))

#Normal distribution CDF
def normal_cdf(x: float, mu: float = 0, sigma: float = 1) -> float:
    return (1 + math.erf((x - mu) / math.sqrt(2) / sigma)) / 2

#Inverse normal distribution using binary search 
def inverse_normal_cdf(p: float,
                       mu: float = 0,
                       sigma: float = 1,
                       tolerance: float = 0.00001) -> float:
    """Find approximate inverse using binary search"""
   # if not standard, compute standard and rescale
    if mu != 0 or sigma != 1:
        return mu + sigma * inverse_normal_cdf(p, tolerance=tolerance)
    low_z = -10.0                      # normal_cdf(-10) is (very close to) 0
    hi_z  =  10.0                      # normal_cdf(10)  is (very close to) 1
    while hi_z - low_z > tolerance:
        mid_z = (low_z + hi_z) / 2     # Consider the midpoint
        mid_p = normal_cdf(mid_z)      # and the cdf's value there
        if mid_p < p:
            low_z = mid_z              # Midpoint too low, search above it
        else:
            hi_z = mid_z               # Midpoint too high, search below it

    return mid_z


GRADIENT DESCENT 

In [114]:
#GRADIENT DESCENT
#Compute gradient function 
def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector:
    """Moves `step_size` in the `gradient` direction from `v`"""
    assert len(v) == len(gradient)
    step = scalar_multiply(step_size, gradient)
    return add(v, step)

def sum_of_squares_gradient(v: Vector) -> Vector:
    return [2 * v_i for v_i in v]

v = [1,2,3]
gradient = [2,3,4]
step_size = 1

print('Gradient step : ', gradient_step(v,gradient, step_size))
print('Sum of Square Gradient : ', sum_of_squares_gradient(v))


Gradient step :  [3, 5, 7]
Sum of Square Gradient :  [2, 4, 6]


In [118]:
v = [1,2,3]
gradient = [2,3,4]
step_size = 1

print('Gradient step : ', np.ndarray.tolist(np.add(v,np.dot(step_size,gradient))))
print('Sum of Square Gradient : ', np.dot(2,v))


Gradient step :  [3, 5, 7]
Sum of Square Gradient :  [2 4 6]


In [123]:
#MACHINE LEARNING
import random
from typing import TypeVar, List, Tuple
X = TypeVar('X')  # generic type to represent a data point

def split_data(data: List[X], prob: float) -> Tuple[List[X], List[X]]:
    """Split data into fractions [prob, 1 - prob]"""
    data = data[:]                    # Make a shallow copy
    random.shuffle(data)              # because shuffle modifies the list.
    cut = int(len(data) * prob)       # Use prob to find a cutoff
    return data[:cut], data[cut:]     # and split the shuffled list there.

data = [n for n in range(1000)]
train, test = split_data(data, 0.75)

Y = TypeVar('Y')  # generic type to represent output variables

def train_test_split(xs: List[X],
                     ys: List[Y],
                     test_pct: float) -> Tuple[List[X], List[X], List[Y], List[Y]]:
    # Generate the indices and split them.
    idxs = [i for i in range(len(xs))]
    train_idxs, test_idxs = split_data(idxs, 1 - test_pct)

    return ([xs[i] for i in train_idxs],  # x_train
            [xs[i] for i in test_idxs],   # x_test
            [ys[i] for i in train_idxs],  # y_train
            [ys[i] for i in test_idxs])   # y_test