# Workshop PSIML 2018
## Numpy workshop

NumPy is the fundamental package for scientific computing in Python. It is a Python library that provides a multidimensional array object, various derived objects (such as masked arrays and matrices), and an assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier transforms, basic linear algebra, basic statistical operations, random simulation and much more.

In [None]:
import numpy as np

## Arrays

In [None]:
# Define array
a = np.array([1,2,3])

# Some basic properties
print("Array a: ", a)
print("\nShape of array a: ", a.shape)
print("\nData type of array a: ", a.dtype)

In [None]:
# Define matrix
b = np.array([[1, 2, 3], [4, 5, 6]])

# Some basic properties
print("Matrix b: \n", b)
print("\nShape of matrix b: ", b.shape)
print("\nData type of matrix b: ", b.dtype)

In [None]:
# Multidim arrays - tensor
c = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])

# Some basic properties
print("Tensor c: \n", c)
print("\nShape of tensor c: ", c.shape)
print("\nData type of tensor c: ", c.dtype)

## Initiation functions

In [None]:
# All zeros
print("All zeros: \n", np.zeros((2,2)))

# All ones
print("\nAll ones: \n", np.ones((2,2)))

# All same value
print("\nAll same value: \n", np.full((2,2), 2))

# All random
# Setting a random seed is important for reproducibility of the code.
# It is good practice to use it in ML before moving to actual training as it makes debuging a lot easier.
np.random.seed(5)
print("\nAll random: \n", np.random.random((2,2)))

# Identity matrix
print("\nIdentity matrix: \n", np.eye(3))

## Array indexing

Indexing goes from 0 for the first element. It is possible to use negative indexes (for example -1 for last element of array)

In [None]:
print("Array a: ", a)
print("First element of a: ", a[0])
print("Last element of a: ", a[2])
print("Last element of a: ", a[-1])

Indexing in matrix and tensor is the same and we can index any column, row etc.

In [None]:
print("Tensor c: \n", c)
print("\nValue of c[0]: \n", c[0])
print("\nValue of c[-2]: \n", c[-2])
print("\nValue of c[0][1]: ", c[0][1])
print("Value of c[0][0][0]: ", c[0][0][0])
print("Value of c[0, 0, 0]: ", c[0, 0, 0])
print("\nValue of c[0, :, 0:2]: \n", c[0, :, 0:2])

## Basic operations

In [None]:
x = np.array([[1, 2], [3, 4]], dtype=np.float64)
y = np.array([[5, 6], [7, 8]], dtype=np.float64)

print("Matrix x: \n", x)
print("\nMatrix y: \n", y)

In [None]:
print("Addition:\n", x + y)
print("Substruction:\n", y - x)
print("Elementwise multiplication:\n", x * y)
print("Multiplication:\n", np.matmul(x, y))
print("Divion:\n", x / y)
print("Square root:\n", np.sqrt(x))
print("Exp:\n", np.exp(x))
print("Dot product:\n", np.dot(x[1], y[0]))
print("Transpose:\n", x.T)

## Broadcasting

Broadcasting is one of the most important numpy option. The term broadcasting describes how numpy treats arrays with different shapes during arithmetic operations. Subject to certain constraints, the smaller array is “broadcast” across the larger array so that they have compatible shapes. Broadcasting provides a means of vectorizing array operations so that looping occurs in C instead of Python. It does this without making needless copies of data and usually leads to efficient algorithm implementations. 

In [None]:
a = np.array([1.0, 2.0, 3.0])
b = np.array([2.0, 2.0, 2.0])
print("a * b, a as vector, b as vector:", a * b)

b = np.array([2])
print("a * b, a as vector, b as scalar:", a * b)

In [None]:
a = np.array([[1,2,3], [4,5,6]])
b = np.array([2,4,6])

print("a + b, a as matrix, b as vector:\n", a + b)
print("a * b, a as matrix, b as vector:\n", a * b)
print("Dot product of a and b:\n", np.dot(a, b))

## Important ML functions:
### Sigmoid function:

\begin{equation*}
S(x) = \frac{1}{1 + e^{-x}}
\end{equation*}

You can find more at *https://en.wikipedia.org/wiki/Sigmoid_function*

In [None]:
def sigmoid(x):
    # [TODO] Implement sigmoid computation
    return 0

In [None]:
print("Sigmoid of \"0\":", sigmoid(0))
print("Expected value: 0.5")
testArray = np.array([1,5])
print("Sigmoid of [1,5]:", sigmoid(testArray))
print("Expected value: [0.73105858 0.99330715]")

### Ploting Sigmoid

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline 

x = np.arange(-10., 10., 0.2)
y = sigmoid(x)
plt.plot(x,y)
plt.show()

### ReLu function:

\begin{equation*}
f(x)  = \begin{cases}
    x & \mbox{if } x > 0 \\
    0 & \mbox{otherwise}
\end{cases}
\end{equation*}

You can find more at *https://en.wikipedia.org/wiki/Rectifier_(neural_networks)*

In [None]:
def relu(x):
    # [TODO] Implement ReLu funcion
    return 0

In [None]:
print("Relu of \"-5\":", relu(-5))
print("Expected value: 0")

print("Relu of \"5\":", relu(5))
print("Expected value: 5")

testArray = np.array([3,0,-1,2,5,-2])
print("Relu of [3,0,-1,2,5,-2]:", relu(testArray))
print("Expected value: [3 0 0 2 5 0]")

### Ploting ReLu

In [None]:
x = np.arange(-10., 10., 0.2)
rel = relu(x)
plt.plot(x,rel)
plt.show()

### Softmax:

\begin{equation*}
P(y=i\mid \mathbf{x}) = \frac{e^{\mathbf{x}_i}}{\sum_{k=1}^K e^{\mathbf{x}_k}}
\end{equation*}

You can find more at *https://en.wikipedia.org/wiki/Softmax_function*

In [None]:
def softmax(x):
    # [TODO] Implement softmax function
    return 0

In [None]:
testArray = np.array([-1,0.1899,0.4449,0.98990])
print("Softmax of [-1,0.1899,0.4449,0.98990]: ", softmax(testArray))
print("Expected value: [0.06311943 0.20745794 0.26771651 0.46170613]")

## LR workshop

### Data

Problem and data taken from *https://www.kaggle.com/c/titanic*

## Goal 

Based on the provided information about person predict if person survived Titanic crash or not.

## Feature explanation

| Variable | Definition | Key |
| ------------- | ------------- | ------------- |
| survival | Survival | 0 = No, 1 = Yes |
| pclass | Ticket class | 1 = 1st, 2 = 2nd, 3 = 3rd |
| sex | Sex | |
| Age | Age in years | |
| sibsp | # of siblings / spouses aboard the Titanic | |
| parch | # of parents / children aboard the Titanic | |
| ticket | Ticket number | |
| fare | Passenger fare | |
| cabin | Cabin number | |
| embarked | Port of Embarkation | C = Cherbourg, Q = Queenstown, S = Southampton |

### Variable Notes

**pclass**: A proxy for socio-economic status (SES)  
1st = Upper  
2nd = Middle  
3rd = Lower  


**age**: Age is fractional if less than 1. If the age is estimated, is it in the form of xx.5  

**sibsp**: The dataset defines family relations in this way...  
Sibling = brother, sister, stepbrother, stepsister  
Spouse = husband, wife (mistresses and fiancés were ignored)  

**parch**: The dataset defines family relations in this way...  
Parent = mother, father  
Child = daughter, son, stepdaughter, stepson  

Some children travelled only with a nanny, therefore parch=0 for them.  

## Reading the dataset

In [None]:
import numpy as np
import csv

dataPath = r'Data\train.csv'

def readCSVasNumpy(dataPath):
    with open(dataPath,'r') as dest_f:
        data_iter = csv.reader(dest_f, delimiter = ',', quotechar = '"')
        data = [data for data in data_iter]
    data_array = np.asarray(data, dtype = None)
    return data_array

data = readCSVasNumpy(dataPath)

## Exploring the dataset - Feature extraction

In [None]:
print(data)

In [None]:
labels = (data[1:,1]).astype(int)

print(labels)

In [None]:
print(data[0:2])

In [None]:
important_fields = [2, 4, 5, 6, 7, 9]
features = data[1:, important_fields]
print(features)

In [None]:
features[:,1] = (features[:,1]=="male").astype(float)
print(features)

In [None]:
ageFeatures = features[:,2]
age_is_present_mask = np.where(ageFeatures != "")
averageAge = np.mean(ageFeatures[age_is_present_mask].astype(float))

features[np.where(ageFeatures == ""),2] = str(averageAge)

features = features.astype(float)
print(features)

## Train/Test set split

In [None]:
trainIndexes = np.sort(np.random.choice(features.shape[0], int(features.shape[0]*0.7), replace=False))

train_features = features[trainIndexes]
test_features = np.delete(features, trainIndexes, axis=0)

train_labels = labels[trainIndexes]
test_labels = np.delete(labels, trainIndexes, axis=0)

## Feature normalization

In [None]:
maxFeatures = np.max(train_features, axis=0)
minFeatures = np.min(train_features, axis=0)

train_features = (train_features - minFeatures) / (maxFeatures - minFeatures)
test_features = (test_features - minFeatures) / (maxFeatures - minFeatures)

train_features = np.append(np.ones((train_features.shape[0], 1)), train_features, axis=1)
test_features = np.append(np.ones((test_features.shape[0], 1)), test_features, axis=1)
print(features)

## Model

### Hypothesis:

\begin{equation*}
h(x) = \frac {1}{1+e^{-\sum_{i=0}^K{w_i x_i}}}
\end{equation*}


In [None]:
class LRmodel:
    def __init__(self, weights, bias):
        self.w = weights
        self.b = bias
        
    def __init__(self, feature_space_size):
        self.w = np.ones(feature_space_size-1)
        self.b = np.ones(1)
            
    def evaluate(self, features):   
        return sigmoid(np.dot(features[:,1:], self.w) + self.b)
    
    def getModelParams(self):
        return np.append(self.b, self.w)
    
    def setModelParams(self, params):
        self.b = params[0]
        self.w = params[1:]


model = LRmodel(train_features.shape[1])

print("Model weights: ", model.w)
print("Expected values: [1. 1. 1. 1. 1. 1.]")

print("Feature vector shape: ", train_features[:,1:].shape)
print("Expected values: (623, 6)")

print("First 3 model evaluations: ", model.evaluate(train_features)[0:3])
print("Expected values: [0.96802565 0.84066383 0.96923841]")

## Trainer

### Cost function:

\begin{equation*}
J = \frac {1} {n} \sum_{i=i}^n{cost(h(X_i),Y_i)} 
\end{equation*}
\begin{equation*}
J = \frac {1} {n} \sum_{i=i}^n{[-ylog(h(x)) - (1-y)log(1 -h(x))]} 
\end{equation*}

### Gradient descent:

\begin{equation*}
w_i = w_i - \alpha \sum_{j=1}^n {(h(X^{(j)}) - y^{(j)}) x_i^{(j)}}
\end{equation*}


In [None]:
class Trainer:
    def __init__(self, model):
        self.model = model
    
    def calculateCost(self, features, labels):
        # [TODO] Implement cost function based on the formulas above
        return 0
    
    def calculateGradients(self, features, labels):
        # [TODO] Implement gradients function based on the formulas above
        return 0
    
    def updateModel(self, gradient, learningRate):
        # [TODO] Implement model update based on the gradients


## Training without regularization

In [None]:
model = LRmodel(train_features.shape[1])
trainer = Trainer(model)

learning_rate = 0.1

print("Starting cost training: ", trainer.calculateCost(train_features, train_labels))
print("Starting cost test: ", trainer.calculateCost(test_features, test_labels))

costFunctionValues = []
iters = np.arange(1, 2000, 1)

for i in iters:
    #print("Itteration: ", i)
    
    # [TODO] Implement one itteration of training
    
    train_cost = trainer.calculateCost(train_features, train_labels)
    test_cost = trainer.calculateCost(test_features, test_labels)
    
    costFunctionValues.append([train_cost, test_cost])
    
    #print("Training cost: ", train_cost)
    #print("Test cost: ", test_cost)

costFunctionValues = np.array(costFunctionValues)
print("End cost training: ", costFunctionValues[-1][0])
print("End cost test: ", costFunctionValues[-1][1])

plt.figure(1, figsize=(20, 15))
plt.subplot(211)
plt.plot(iters, costFunctionValues[:,0])
plt.subplot(212)
plt.plot(iters, costFunctionValues[:,1])
plt.show()

## Evaluation

In [None]:
class Evaluator:
    def __init__(self, model):
        self.model = model
        
    def evaluate(self, features):
        predictions = self.model.evaluate(features)
        return predictions
    
    def calculateAPR(self, features, labels, threshold):
        predictions = self.evaluate(features)
        
        numExamples = predictions.shape[0]
        binaryPredictions = (predictions > threshold).astype(int)
        
        positivePredictions = np.where(binaryPredictions == 1)
        negativePredictions = np.where(binaryPredictions == 0)
        
        # [TODO] Implement calculation of TP, FP, TN, FN, Precision, Recall and Accuracy
        
        # TP - Count of examples that were correctlly predicted as positive examples
        
        # FP - Count of examples that were incorectlly predicted as positive examples
        
        # TP - Count of examples that were incorectlly predicted as negative examples
        
        # TP - Count of examples that were correctlly predicted as negative examples
               
        return Precision, Recall, Accuracy
    
    def plotAPR(self, resultsTest, resultsTrain, ranges):
        plt.figure(1, figsize=(20, 15))
        plt.subplot(211)
        plt.plot(ranges, np.matrix(resultsTrain)[:,0], ranges, np.matrix(resultsTrain)[:,1], ranges, np.matrix(resultsTrain)[:,2])
        plt.subplot(212)
        plt.plot(ranges, np.matrix(resultsTest)[:,0], ranges, np.matrix(resultsTest)[:,1], ranges, np.matrix(resultsTest)[:,2])
        plt.show()

## Evaluation of the model

In [None]:
evaluator = Evaluator(model)
t = np.arange(0., 1., 0.001)
resultsTest = []
resultsTrain = []
for i in t:
    resultsTest.append(evaluator.calculateAPR(test_features, test_labels, i))
    resultsTrain.append(evaluator.calculateAPR(train_features, train_labels, i))

evaluator.plotAPR(resultsTest, resultsTrain, t)

print("Model w: ", model.w, "\nModel b: ", model.b)

## Regularization

### Regularization cost: 
\begin{equation*}
J = \frac {1} {n} \sum_{i=1}^n{cost(h(X_i),Y_i)} + \frac {μ}{2}\sum_{i=1}^m{w_i^2}
\end{equation*}
\begin{equation*}
J = \frac {1} {n} \sum_{i=1}^n{[-y log(h(x)) - (1-y) log(1 - h(x))]} + \frac {μ}{2}\sum_{i=1}^m{w_i^2}
\end{equation*}

### Regularization gradient: 

\begin{equation*}
w_0 = w_0 - \alpha [\frac{1}{n} \sum_{j=1}^n {(h(X^{(j)}) - y^{(j)}) x_i^{(j)}}]
\end{equation*}
\begin{equation*}
w_i = w_i - \alpha [\frac{1}{n} \big\langle\sum_{j=1}^n {(h(X^{(j)}) - y^{(j)}) x_i^{(j)}}\big\rangle + {μ} w_i]
\end{equation*}


In [None]:
class TrainerReg:
    def __init__(self, model):
        self.model = model

    def calculateCost(self, features, labels, regParameter):
        # [TODO] Implement cost function based on the formulas above
        return 0

    
    def calculateGradients(self, features, labels, regParameter):
        # [TODO] Implement gradients function based on the formulas above
        return 0

    
    def updateModel(self, gradient, learningRate):
        # [TODO] Implement model update based on the gradients


## Training with regularization

In [None]:
model = LRmodel(train_features.shape[1])
trainer = TrainerReg(model)

regularization_param = 0.01
learning_rate = 0.1

print("Starting cost training: ", trainer.calculateCost(train_features, train_labels, regularization_param))
print("Starting cost test: ", trainer.calculateCost(test_features, test_labels, regularization_param))

costFunctionValues = []
iters = np.arange(1, 2000, 1)

for i in iters:
    #print("Itteration: ", i)
    
    # [TODO] Implement one itteration of training
    
    train_cost = trainer.calculateCost(train_features, train_labels,regularization_param)
    test_cost = trainer.calculateCost(test_features, test_labels,regularization_param)
    
    costFunctionValues.append([train_cost, test_cost])
    
    #print("Training cost: ", train_cost)
    #print("Test cost: ", test_cost)

costFunctionValues = np.array(costFunctionValues)
print("End cost training: ", costFunctionValues[-1][0])
print("End cost test: ", costFunctionValues[-1][1])

plt.figure(1, figsize=(20, 15))
plt.subplot(211)
plt.plot(iters, costFunctionValues[:,0])
plt.subplot(212)
plt.plot(iters, costFunctionValues[:,1])
plt.show()

## Evaluation of trained model

In [None]:
evaluator = Evaluator(model)
t = np.arange(0., 1., 0.001)
resultsTest = []
resultsTrain = []
for i in t:
    resultsTest.append(evaluator.calculateAPR(test_features, test_labels, i))
    resultsTrain.append(evaluator.calculateAPR(train_features, train_labels, i))

evaluator.plotAPR(resultsTest, resultsTrain, t)


plt.show()
print("Model w: ", model.w, "\nModel b: ", model.b)