# Exercises

There are three exercises in this notebook:

1. Use the cross-validation method to test the linear regression with different $\alpha$ values, at least three.
2. Implement a SGD method that will train the Lasso regression for 10 epochs.
3. Extend the Fisher's classifier to work with two features. Use the class as the $y$.

## 1. Cross-validation linear regression

You need to change the variable ``alpha`` to be a list of alphas. Next do a loop and finally compare the results.

In [26]:
from sklearn.model_selection import cross_validate
import numpy as np

def reg_predict(inputs, w, b):
    results = []
    for inp in inputs:
        results.append(inp*w+b)
    return results

x = np.array([188, 181, 197, 168, 167, 187, 178, 194, 140, 176, 168, 192, 173, 142, 176]).reshape(-1, 1).reshape(15,1)
y = np.array([141, 106, 149, 59, 79, 136, 65, 136, 52, 87, 115, 140, 82, 69, 121]).reshape(-1, 1).reshape(15,1)
temp = x
x = np.asmatrix(np.c_[np.ones((15,1)),x])

I = np.identity(2)
alpha = 0.1 # change here
alphas = np.linspace(-1, 1, 100, endpoint=False)
coeffs = []

# add 1-3 line of code here
for alpha in alphas:
    w = np.linalg.inv(x.T*x + alpha * I)*x.T*y
    w=w.ravel()
    w=w.tolist()[0]
    coeffs.append(w)
    
from sklearn.metrics import mean_squared_error
# add 1-3 lines to compare the results
min_val = 1000000
min_index = 0

for i, coeff in enumerate(coeffs):
    error = mean_squared_error(y, reg_predict(temp.flatten(), coeff[1], coeff[0]) )
    if min_val > abs(error):
        min_val = abs(error)
        min_index = i
print("optimal alpha is ", alphas[i], min_val)
    




optimal alpha is  0.98 372.3312921517967


## 2. Implement based on the Ridge regression example, the Lasso regression.

Please implement the SGD method and compare the results with the sklearn Lasso regression results. 

In [27]:
"""
def E(w,x, y):
    # E(a,b) = (y - (a*x +b))^2 
    return (y-(w[1]*x+w[0]))**2


# This is the gradient of the Error function used to update the equation's coefficients (a and b) for SGD
def gradientE(w,x, y):
    
    gradientW1 = -2*x*(y-(w[1]*x+w[0]))
    gradientW0 = -2*(y-(x*w[1]+w[0]))

    return np.array([gradientW0.item(0),gradientW1.item(0)])
def stochasticGradientDescent(x_data, y_data, learningRate):
    coefHistory = [] # Used to save the value of a and b at each iteration
    lossHistory = [] # Used to save the value of the loss at each iteration
    gradientHistory = [] # Used to save the value of the gradient at each iteration
    w = np.array([1.0, 1.0]) #starting coeficients
    nbIteration = 400
    for _ in range((int)(nbIteration/len(y_data))):
         for i in range(len(y_data)):
            loss = E(w,x_data[i, 1], y_data[i].item(0)) # Compute the loss for one data and the coefficients a and b
            grad = gradientE(w,x_data[i, 1], y_data[i]) # Compute the gradient for one data

            # Save the coefficients, loss and gradient of the current data in the lists
            coefHistory.append(w)
            lossHistory.append(loss)
            gradientHistory.append(grad)

            w = w-learningRate*grad # Update the coefficients a and b 
            
    return lossHistory, w"""




from sklearn.metrics import mean_squared_error

def sgd(x_data, y_data, alpha):
    norma = np.linalg.norm(x_data, axis=0)
    R1 = 1 / (norma[1] * norma[1])
    R0 = 1 / (norma[0] * norma[0])
    def apply_penalty(w, x, y_delta):
        if w > 0:
            dW1 = (-x.T.dot(y_delta) * 2 + alpha) * R1
        else:
            dW1 = (-x.T.dot(y_delta) * 2 - alpha) * R1
        dW0 = (-np.sum(y_delta) * 2) * R0
        return (dW1, dW0)
    def update_weights(dW1, dW0, w):
        w[0] = w[0] - alpha * dW0
        w[1] = w[1] - alpha * dW1
    
    w = np.array([1.0, 1.0])
 
    epochs = 710
    for i in range(epochs):
        x = x_data[:, 1].reshape(-1, 1)
        y_hat = x * w[1] + w[0]
        y_delta = y - y_hat
        
        dW1, dW0 = apply_penalty(w[1], x, y_delta)
        update_weights(dW1, dW0, w)
        
        
    return w


max_values_x = x.max(0)
max_values_y = y.max(0)

#stochasticGradientDescent(np.divide(x,max_values_x), np.divide(y,max_values_y), 0.1)
sgd(x, y, 0.1)

array([-101.92592152,    1.16900386])

In [28]:
import pandas as pd
x = np.array([188, 181, 197, 168, 167, 187, 178, 194, 140, 176, 168, 192, 173, 142, 176]).reshape(-1, 1).reshape(15,1)
y = np.array([141, 106, 149, 59, 79, 136, 65, 136, 52, 87, 115, 140, 82, 69, 121]).reshape(-1, 1).reshape(15,1)

x = np.asmatrix(np.c_[np.ones((15,1)),x])
"""
max_values_x = x.max(0)
max_values_y = y.max(0)
x = np.divide(x,max_values_x)
y = np.divide(y,max_values_y)
"""

I = np.identity(2)
alpha = 0.1 

sgd_coef = sgd(x, y, alpha)
sgd_coef = sgd_coef.ravel()

w = np.linalg.inv(x.T*x + alpha * I)*x.T*y # update this line
w = w.ravel()

compare_coef = np.asarray([sgd_coef[0], sgd_coef[1], w.item(0), w.item(1)])
compare_coef = compare_coef.flatten()

final_result = pd.DataFrame(data=compare_coef.reshape(2, 2), index=['sgd', 'lasso'], columns=["bias", "w[1]"])
final_result


Unnamed: 0,bias,w[1]
sgd,-101.925922,1.169004
lasso,-101.723971,1.169788


## 3. Extend the Fisher's classifier

Please extend the targets of the ``iris_data`` variable and use it as the $y$.

In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris

iris_data = load_iris()
iris_df = pd.DataFrame(iris_data.data,columns=iris_data.feature_names)
iris_df.head()

x = iris_df[['sepal width (cm)','sepal length (cm)']].values # change here
y = iris_data.target.reshape(-1, 1)# change here

dataset_size = np.size(y)

mean_x, mean_y = np.mean(x), np.mean(y)

SS_xy = np.sum(y * x) - dataset_size * mean_y * mean_x
SS_xx = np.sum(x * x) - dataset_size * mean_x * mean_x

a = SS_xy / SS_xx
b = mean_y - a * mean_x


y_pred = a * x + b


0.8132192495093759
