<a href="https://colab.research.google.com/github/GiorgiaAuroraAdorni/ML-bachelor-course-assignments-sp23/blob/main/assignment%201/deliverable/example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 1
Student: Guillaume Baranzini

--- 
# IMPORTANT: all the submitted code should be in 2 cells
1) How you trained, evaluated and saved your model
2) How to load your model from a file, load the data and evaluate the model. Cell 2) should be running independently (even if cell 1 is not run)

In [34]:
import pickle
import io
import requests
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression


# Load data from URL
url = 'https://drive.switch.ch/index.php/s/TeDwnbYsBKRuJjv/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Separate features (x) and target (y)
x = data.f.x
y = data.f.y

# Split data into train and test sets for x and y
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.90, shuffle=True, random_state=2)

# Print shapes of the train and test sets
print('train_data shape:', x_train.shape)
print('test_data shape:', x_test.shape)

# Prepare the input features
x_1 = x_train[:,0].reshape(-1,1)  # First feature
x_2 = x_train[:,1].reshape(-1,1)  # Second feature
x_3 = np.sin(x_2)                 # Third feature
x_4 = x_1 * x_2    # Fourth feature

# Add a column of ones to the input matrix
ones_vector = np.ones(shape=(len(x_train),1))
X = np.hstack((ones_vector, x_1, x_2, x_3, x_4))

# Prepare the input features for test set
x_1_test = x_test[:, 0].reshape(-1, 1)
x_2_test = x_test[:, 1].reshape(-1, 1)
x_3_test = np.sin(x_2_test)
x_4_test = x_1_test * x_2_test
ones_vector_test = np.ones(shape=(len(x_test), 1))
X_test = np.hstack((ones_vector_test, x_1_test, x_2_test, x_3_test, x_4_test))



# Print the shape of the input matrix
print('Input (X) shape:\t', X.shape)

# Train the model using linear regression
model = LinearRegression(fit_intercept=False)
model.fit(X, y_train)

# Get the coefficients (theta)
theta_hat = model.coef_

# Print the optimal parameters and the corresponding function
print('Optimal parameters:\t', theta_hat)
print("f(x, θ):", theta_hat[0], "+", theta_hat[1], "*x1 +", theta_hat[2], "*x2 +", theta_hat[3], "*sin(x2) +", theta_hat[4], "*x1*x2")

# Evaluate the model using mean squared error
train_predictions = model.predict(X)
train_performance = mean_squared_error(train_predictions, y_train)
test_predictions = model.predict(X_test)
test_mse = mean_squared_error(test_predictions, y_test)

# Print the train and test performance
print("Train performance: \t", train_performance)
print('Test MSE:', test_mse)

# Save the trained model to a pickle file
with open('linear_regression.pickle', 'wb') as file:
    pickle.dump(model, file)

    
##################################################################################
    
# T2 
from sklearn.linear_model import SGDRegressor

# Train the model using SGDRegressor
model1 = SGDRegressor(loss='squared_error', penalty=None , max_iter=10000, learning_rate='adaptive')
model1.fit(X, y_train)

# Get the coefficients (theta)
theta = model1.coef_
print('Optimal parameters:\t', theta)

# Evaluate the model
train_predictions = model1.predict(X)
train_performance = mean_squared_error(y_train, train_predictions)

test_predictions = model1.predict(X_test)
test_mse = mean_squared_error(y_test, test_predictions)

print("Train performance: \t", train_performance)
print('Test MSE:', test_mse)

# Save the trained model to a pickle file
with open('nonlinear_model.pickle', 'wb') as file:
    pickle.dump(model1, file)

    
###########################################################

# T3
from keras.models import Sequential
from keras.layers import Dense

# Define the model
def create_model():
    model = Sequential()
    model.add(Dense(16, input_dim=2, activation='relu')) # input 
    model.add(Dense(16, activation='relu'))              # add a layer with 16 neurons
    model.add(Dense(16, activation='relu'))              # add a layer with 16 neurons
    model.add(Dense(16, activation='relu'))              # add a layer with 16 neurons
    model.add(Dense(1, activation='linear'))             # output
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])
    return model

# Create the model
model2 = create_model()

# Train the model
history = model2.fit(x_train, y_train, epochs=126, batch_size=8, verbose=0, validation_split=0.1)

# Evaluate the model on test data
_, mse = model2.evaluate(x_test, y_test)
print('MSE: %.4f' % mse)

# Saving the model as a pickle file
with open('model_task_3.pickle', 'wb') as f:
    pickle.dump(model2, f)

train_data shape: (1800, 2)
test_data shape: (200, 2)
Input (X) shape:	 (1800, 5)
Optimal parameters:	 [ 1.24205736 -0.04503563 -0.56462773  0.47663575  0.04029328]
f(x, θ): 1.24205735591937 + -0.04503563207910319 *x1 + -0.5646277318068914 *x2 + 0.4766357540857329 *sin(x2) + 0.040293276438477466 *x1*x2
Train performance: 	 0.7103436190376121
Test MSE: 0.7296013997475128
Optimal parameters:	 [ 0.61499839 -0.04200242 -0.56047268  0.47982239  0.0390856 ]
Train performance: 	 0.7103496414012812
Test MSE: 0.7297227972134515
MSE: 0.0178


# Example on how to use baseline model:

In [38]:
# Import librairies 
import pickle
import joblib
import io
import requests
import numpy as np


def evaluate_predictions(y_true, y_pred):
    """
    Evaluates the mean squared error between the values in y_true and the values
    in y_pred.
    ### YOU CAN NOT EDIT THIS FUNCTION ###
    :param y_true: Numpy array, the true target values from the test set;
    :param y_pred: Numpy array, the values predicted by your model.
    :return: float, the mean squared error between the two arrays.
    """
    assert y_true.shape == y_pred.shape
    return ((y_true - y_pred) ** 2).mean()


def load_model(filename):
    """
    Loads a Scikit-learn model saved with joblib.dump.
    This is just an example, you can write your own function to load the model.
    Some examples can be found in src/utils.py.
    :param filename: string, path to the file storing the model.
    :return: the model.
    """
    model = joblib.load(filename)

    return model

# Load the data
# This will be replaced with our private test data when grading the assignment

# Load data from url
url = 'https://drive.switch.ch/index.php/s/TeDwnbYsBKRuJjv/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively yo can load the data from file
# data_path = '../data/data.npz'
# data = np.load(data_path)

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = data.f.x
# y is a Numpy array of shape (n_samples, ) with the targets
y = data.f.y

# For Task 1 and 2 input
x_data_1 = x[:,0].reshape(-1,1)
x_data_2 = x[:,1].reshape(-1,1)
x_data_3 = np.sin(x_data_2)
x_data_4 = x_data_1*x_data_2
ones_vector_data = np.ones(shape=(len(x[:,0]), 1))

X_data = np.hstack((ones_vector_data, x_data_1, x_data_2, x_data_3, x_data_4)) # matrix 2000*5 


# Load the trained model

# Evaluate Task 1
#baseline_model_path = './linear_regression.pickle' 

# Evaluate Task 2
#baseline_model_path = './nonlinear_model.pickle'

# Evaluate Task 3
baseline_model_path = './model_task_3.pickle' 

baseline_model = load_model(baseline_model_path)

# Change input

# for Task 1 and 2 5 inputs

#x = X_data

# for Task 3 2 inputs
x = x 

# Predict on the given samples
 
y_pred = baseline_model.predict(x).flatten()


############################################################################
# STOP EDITABLE SECTION: do not modify anything below this point.
############################################################################

# Evaluate the prediction using MSE
mse = evaluate_predictions(y_pred, y)
print(f'MSE on whole dataset: {mse}')

# NOTE: NOW THIS CELL IS NOT WORKING SINCE YOU NEED TO CHANGE THE INPUT.
# DO IT AND EVERYTHING RUNS SMOOTH 

MSE on whole dataset: 0.016810102806928323
