In [30]:
import io
import requests
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Lasso
import joblib
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import ttest_rel
import torch

# data_path = 'data/data.npz' # path to the .npz file storing the data
# data = np.load(data_path)
# x = data.f.x
# y = data.f.y

url = 'https://drive.switch.ch/index.php/s/37RuoA3Mgt9Rqah/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))
x = data.f.x
y = data.f.y


# T1 --------------------------------------------------------------
# using my GPU NVIDIA RTX 3070 with CUDA integration
# is_cuda_available = torch.cuda.is_available()
# device_id = torch.cuda.current_device()
# num_gpu = torch.cuda.device_count()
# device_name = torch.cuda.get_device_name(device_id)
# print(is_cuda_available, device_id, num_gpu, device_name)
# print(torch.device('cuda:0' if is_cuda_available else 'cpu'))
def perf_assessment(model, x, y):
    prediction = model.predict(x)
    return mean_squared_error(prediction, y)


LINE = '-'* 100
print(f'{LINE}\n T1 \n{LINE}')

x1 = x[:,0]
x2 = x[:,1]
ones = np.ones(shape=x1.shape)
cos_x1 = np.cos(x1)
tanh_x1 = np.tanh(x1)
x2_squared = np.square(x2)
X = np.vstack((ones, x1, x2, cos_x1, x2_squared, tanh_x1)).transpose()
Y = y

# generate train set following ratio from lectures (70 train, 15 validation, 15 test)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7, shuffle=True, random_state=20020309)

# linear regression with normalization fit_intercept by default
model_linear = LinearRegression()
model_linear.fit(X_train, Y_train)
theta_hat_linear = model_linear.coef_

# prediction on training and test set, save model
train_performance_linear = perf_assessment(model_linear, X_train, Y_train)
test_performance_linear = perf_assessment(model_linear, X_test, Y_test)
joblib.dump(model_linear, 'model/model_linear.pickle')

print(f'Coefficients {theta_hat_linear = }',
    f'MSE on {train_performance_linear = }',
    f'MSE on {test_performance_linear = }',
    sep='\n')

print(f'{LINE}\n Lasso \n{LINE}')
# lasso regression model implementing fit_intercept normalization by default
model_lasso = Lasso(alpha=0.001) # tweak alpha to promote/demote coefficient params
model_lasso.fit(X_train, Y_train)
theta_hat_lasso = model_lasso.coef_

# prediction on training and test set, save model
train_performance_lasso = perf_assessment(model_lasso, X_train, Y_train)
test_performance_lasso = perf_assessment(model_lasso, X_test, Y_test)
joblib.dump(model_lasso, 'model/model_lasso.pickle')

print(f'Coefficients {theta_hat_lasso = }',
      f'MSE on {train_performance_lasso = }',
      f'MSE on {test_performance_lasso = }',
      sep='\n')

# T2 --------------------------------------------------------------
print(f'{LINE}\n T2 \n{LINE}')

model_forest = RandomForestRegressor(random_state=20020309)
model_forest.fit(X_train, Y_train)

# prediction on training and test set, save model
train_performance_forest = perf_assessment(model_forest, X_train, Y_train)
test_performance_forest = perf_assessment(model_forest, X_test, Y_test)
joblib.dump(model_forest, 'model/model_forest.pickle')

print(f'MSE on {train_performance_forest = }',
      f'MSE on {test_performance_forest = }',
      sep='\n')

print(f'{LINE}\n Statistical Comparison \n{LINE}')

se_linear = (model_linear.predict(X_test) - Y_test) ** 2
se_forest = (model_forest.predict(X_test) - Y_test) ** 2
t_statistic, p_value = ttest_rel(se_linear, se_forest)

print(f'{t_statistic = }',
      f'{p_value = }',
      sep='\n')

# T3 (Bonus) --------------------------------------------------------------
print(f'{LINE}\n T3 \n{LINE}')

class MyNN(torch.nn.Module):
    def __init__(self):
        super(MyNN, self).__init__()
        self.layer1_in = torch.nn.Linear(5, 30)
        self.layer2_in = torch.nn.Linear(30, 13)
        self.layer2_out = torch.nn.Linear(13, 1)
        
    def forward(self, x):
        x = torch.sigmoid(self.layer1_in(x))
        x = torch.tanh(self.layer2_in(x))
        x = self.layer2_out(x)
        return x
    
# load bonus dataset
data_path_bonus = 'data/data_bonus.npz'
data_bonus = np.load(data_path_bonus)
x_bonus = data_bonus.f.x
y_bonus = data_bonus.f.y

X_train_bonus, X_test_bonus, Y_train_bonus, Y_test_bonus = \
    train_test_split(x_bonus, y_bonus, train_size=0.7, shuffle=True, random_state=20020309)

x_train_tensor = torch.tensor(X_train_bonus, dtype=torch.float32)
y_train_tensor = torch.tensor(Y_train_bonus.reshape(-1, 1), dtype=torch.float32)
x_test_tensor = torch.tensor(X_test_bonus, dtype=torch.float32)
y_test_tensor = torch.tensor(Y_test_bonus.reshape(-1, 1), dtype=torch.float32)

model_nn = MyNN()
stop = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model_nn.parameters(), lr=0.05) # tweaked learning rate

# Training loop with modified early stopping
for epoch in range(10000):
    optimizer.zero_grad()
    outputs = model_nn(x_train_tensor)
    loss = stop(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    if loss < 0.010:
        break
        
# performance evaluation
with torch.no_grad():
    outs = model_nn(x_test_tensor)
    mse_nn = mean_squared_error(outs.numpy(), Y_test_bonus)
    
torch.save(model_nn, 'model/bonus.pt')
    
print('MSE on NN = ', mse_nn)

In [29]:
# Import libraries
import joblib
import io
import requests
import torch
import numpy as np

def evaluate_predictions(y_true, y_pred):
    """
    Evaluates the mean squared error between the values in y_true and the values
    in y_pred.
    ### YOU CAN NOT EDIT THIS FUNCTION ###
    :param y_true: Numpy array, the true target values from the test set;
    :param y_pred: Numpy array, the values predicted by your model.
    :return: float, the mean squared error between the two arrays.
    """
    assert y_true.shape == y_pred.shape
    return ((y_true - y_pred) ** 2).mean()


def load_model(filename):
    """
    Loads a torch model saved.
    This is just an example, you can write your own function to load the model.
    Some examples can be found in src/utils.py.
    :param filename: string, path to the file storing the model.
    :return: the model.
    """
    model = torch.jit.load(filename)

    return model

# Load the data
# This will be replaced with our private test data when grading the assignment

# Load data from url
url = 'https://drive.switch.ch/index.php/s/Wp0I2gb33mhERFN/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively yo can load the data from file
#data_path = 'data_bonus_test.npz'
#data = np.load(data_path)

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = torch.tensor(data.f.x, dtype=torch.float32)
# y is a Numpy array of shape (n_samples, ) with the targets
y =  torch.tensor(data.f.y,dtype=torch.float32).reshape(-1, 1)

# Load the trained model
baseline_model_path = 'baseline.pt'
baseline_model = load_model(baseline_model_path)

# Predict on the given samples
y_pred_ours = baseline_model(x)

############################################################################
# STOP EDITABLE SECTION: DO NOT modify anything above this point.
############################################################################

############################################################################
# ADD HERE YOUR CODE TO READ MODEL OF TASK 3
############################################################################

# Load the trained model
baseline_model_path = './model/bonus.pt'
baseline_model =  torch.load(baseline_model_path)
# Predict on the given samples FROM YOUR MODEL
y_pred_yours = baseline_model(x)


############################################################################
# STOP EDITABLE SECTION: do not modify anything below this point.
############################################################################

# Evaluate the prediction using MSE
mse = evaluate_predictions(y_pred_yours, y)
print(f'MSE on whole dataset: {mse}')

# NOTE: NOW THIS CELL IS NOT WORKING SINCE YOU NEED TO CHANGE THE INPUT.
# DO IT AND EVERYTHING RUNS SMOOTH