<a href="https://colab.research.google.com/github/FabriDeCastelli/ML-Regression-Assignment/blob/main/deliverable/example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 1
Student: Fabrizio De Castelli

--- 
# IMPORTANT: all the submitted code should be in 2 cells
1) How you trained, evaluated and saved your model
2) How to load your model from a file, load the data and evaluate the model. Cell 2) should be running independently (even if cell 1 is not run)

In [13]:
# Import libraries
import io
import pandas as pd
import requests
import numpy as np
import matplotlib.pyplot as plt
import pickle

# Load data 
url = 'https://drive.switch.ch/index.php/s/TeDwnbYsBKRuJjv/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively yo can load the data from file
# data_path = '../data/data.npz' # path to the .npz file storing the data
# data = np.load(data_path)

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = data.f.x

# y is a Numpy array of shape (n_samples, ) with the targets
y = data.f.y

# Feature extracion for the task
ones = np.ones(shape=(x.shape[0], 1))
sin_x2 = np.sin(x[:, 1]).reshape(-1,1)
x1_times_x2 = np.multiply(x[:, 0], x[:, 1]).reshape(-1,1)
X = np.hstack((ones, x, sin_x2, x1_times_x2)) 

# T1
print('-------- T1 ---------')

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error 

# Generate training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=0)

# Init the model
linear_regression = LinearRegression(fit_intercept=False)  

# Fit data in the model
linear_regression.fit(X_train, y_train)

# Best computed parameter configuration
theta_hat = linear_regression.coef_

# Save the model in a pickle file 
with open('linear_regression.pickle', 'wb') as f:
  pickle.dump(linear_regression, f, pickle.HIGHEST_PROTOCOL)

# Generate prediction on training set and evaluate performance with MSE
train_prediction = linear_regression.predict(X_train)
train_performance = mean_squared_error(train_prediction, y_train)

# Generate prediction on test set and evaluate performance with MSE
test_prediction = linear_regression.predict(X_test)
test_performance = mean_squared_error(test_prediction, y_test)

print('theta_hat = {}'.format(theta_hat))
print('MSE on training set: {}'.format(train_performance))
print('MSE on test set: {}'.format(test_performance))

# T2
print('-------- T2 ---------')

from sklearn.preprocessing import PolynomialFeatures

# Degree of Polynomial
degree = 11

# Init polynomial feature extraction
pol_feat = PolynomialFeatures(degree=degree, include_bias=False) 

# Split original dataset
x_train, x_test = train_test_split(x, train_size=0.7, shuffle=True, random_state=0)

# Transform data points with polynomial features
Xpol_train = pol_feat.fit_transform(x_train)
Xpol_test = pol_feat.fit_transform(x_test)

# Fit data in the model
linear_regression.fit(Xpol_train, y_train)

# Generate prediction on training set and evaluate performance with MSE
train_prediction = linear_regression.predict(Xpol_train)
train_performance = mean_squared_error(train_prediction, y_train)

# Generate prediction on test set and evaluate performance with MSE
test_prediction = linear_regression.predict(Xpol_test)
test_performance = mean_squared_error(test_prediction, y_test)

print('MSE on training set: {}'.format(train_performance))
print('MSE on test set: {}'.format(test_performance))

# T3 (Bonus)

print('-------- T3 ---------')

from keras.models import Sequential
from keras.layers import Dense
from keras import losses
from tensorflow import keras

def nonlinear_model(train, targets, epochs):
  """
  Init the nonlinear model for this task. In this case it is a
  Neural Network with two hidden layers.
  :param train: the training set
  :param targets: the test set
  :param epochs: the number of epochs used to train the NN
  :return: the trained Neural Netowrk on input data
  """
  network = Sequential()
  network.add(Dense(20, activation="relu"))
  network.add(Dense(13, activation="tanh"))
  network.add(Dense(1))
  network.compile(optimizer="sgd", loss="mean_squared_error")
  network.fit(train, targets, epochs = epochs, verbose = 0)
  return network

# Init the model
network = nonlinear_model(X_train, y_train, 1200)

# Save the model in a pickle file 
with open('nonlinear_regression.pickle', 'wb') as f:
  pickle.dump(network, f, pickle.HIGHEST_PROTOCOL)

# Generate prediction on test set and evaluate performance with MSE
test_prediction = network.predict(X_test)
test_performance = mean_squared_error(test_prediction, y_test)

print('MSE on test set: {}'.format(test_performance))


-------- T1 ---------
theta_hat = [ 1.31635295 -0.05128958 -0.57659976  0.42026517  0.03686637]
MSE on training set: 0.696251517689963
MSE on test set: 0.7516362518990539
-------- T2 ---------
MSE on training set: 0.019384844163405014
MSE on test set: 0.07082283622259962
-------- T3 ---------
Epoch 1/1200
44/44 - 1s - loss: 0.8532 - 624ms/epoch - 14ms/step
Epoch 2/1200
44/44 - 0s - loss: 0.6204 - 96ms/epoch - 2ms/step
Epoch 3/1200
44/44 - 0s - loss: 0.5909 - 97ms/epoch - 2ms/step
Epoch 4/1200
44/44 - 0s - loss: 0.5423 - 100ms/epoch - 2ms/step
Epoch 5/1200
44/44 - 0s - loss: 0.5018 - 88ms/epoch - 2ms/step
Epoch 6/1200
44/44 - 0s - loss: 0.4723 - 98ms/epoch - 2ms/step
Epoch 7/1200
44/44 - 0s - loss: 0.4418 - 85ms/epoch - 2ms/step
Epoch 8/1200
44/44 - 0s - loss: 0.4316 - 102ms/epoch - 2ms/step
Epoch 9/1200
44/44 - 0s - loss: 0.4033 - 99ms/epoch - 2ms/step
Epoch 10/1200
44/44 - 0s - loss: 0.3926 - 88ms/epoch - 2ms/step
Epoch 11/1200
44/44 - 0s - loss: 0.3690 - 87ms/epoch - 2ms/step
Epoch 1

# Example on how to use baseline model:

In [17]:
# Import libraries
import joblib
import io
import requests
import numpy as np

def evaluate_predictions(y_true, y_pred):
    """
    Evaluates the mean squared error between the values in y_true and the values
    in y_pred.
    ### YOU CAN NOT EDIT THIS FUNCTION ###
    :param y_true: Numpy array, the true target values from the test set;
    :param y_pred: Numpy array, the values predicted by your model.
    :return: float, the mean squared error between the two arrays.
    """
    assert y_true.shape == y_pred.shape
    return ((y_true - y_pred) ** 2).mean()


def load_model(filename):
    """
    Loads a Scikit-learn model saved with joblib.dump.
    This is just an example, you can write your own function to load the model.
    Some examples can be found in src/utils.py.
    :param filename: string, path to the file storing the model.
    :return: the model.
    """
    model = joblib.load(filename)

    return model

# Load the data
# This will be replaced with our private test data when grading the assignment

# Load data from url
url = 'https://drive.switch.ch/index.php/s/TeDwnbYsBKRuJjv/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively yo can load the data from file
# data_path = '../data/data.npz'
# data = np.load(data_path)

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = data.f.x
# y is a Numpy array of shape (n_samples, ) with the targets
y = data.f.y

# Load the trained model
linear_model_path = './nonlinear_regression.pickle'
linear_model = load_model(linear_model_path)

# Change input
ones = np.ones(shape=(x.shape[0], 1))
sin_x2 = np.sin(x[:, 1]).reshape(-1,1)
x1_times_x2 = np.multiply(x[:, 0], x[:, 1]).reshape(-1,1)
x = np.hstack((ones, x, sin_x2, x1_times_x2)) 

# Predict on the given samples
y_pred = linear_model.predict(x).flatten()

############################################################################
# STOP EDITABLE SECTION: do not modify anything below this point.
############################################################################

# Evaluate the prediction using MSE
mse = evaluate_predictions(y_pred, y)
print(f'MSE on whole dataset: {mse}')

# NOTE: NOW THIS CELL IS NOT WORKING SINCE YOU NEED TO CHANGE THE INPUT.
# DO IT AND EVERYTHING RUNS SMOOTH


MSE on whole dataset: 0.016603451696046982
