In [99]:
# Import libraries
import io
import requests
from sklearn.model_selection import KFold
import numpy as np
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from scipy import stats
import os
import warnings
warnings.filterwarnings("ignore")

linearpkl = "linear_regression.pkl"
lassopkl = "lasso_regression.pkl"
polypkl = "nonlinear_model.pkl"

# Load data 
url = 'https://drive.switch.ch/index.php/s/37RuoA3Mgt9Rqah/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively you can load the data from file
data_path = '../Data/data.npz'
data = np.load(data_path)

# x is a Numpy array of shape (n_samples, n_features) with the inputs
X = data["x"]
# y is a Numpy array of shape (n_samples, ) with the targets
y = data["y"]

# T1

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=0
)


def equation(x):
    design_matrix = np.column_stack(
        (x[:, 0], x[:, 1], np.cos(x[:, 0]), x[:, 1] ** 2, np.tanh(x[:, 0]))
    )
    return design_matrix


print("Task 1.a")
if os.path.exists(linearpkl):
    with open(linearpkl, 'rb') as f:
        model = pickle.load(f)
    f.close()
else:
    model = LinearRegression()
    model.fit(equation(X_train), y=y_train)
    with open(linearpkl, 'wb') as f:
        pickle.dump(model, f)
    f.close()

theta_z = np.array([model.intercept_])
theta = np.concatenate((theta_z, model.coef_), axis=0)

print("Linear Regression parameters: ",theta)


print("Task 1.b")
y_pred_train = model.predict(X=equation(X_train))
print("Linear Regression MSE Training data:", mean_squared_error(y_true=y_train, y_pred=y_pred_train))
y_pred = model.predict(X=equation(X_test))
print("Linear Regression MSE: ", mean_squared_error(y_true=y_test, y_pred=y_pred))

print("Task 1.c")
if os.path.exists(lassopkl):
    with open(lassopkl, 'rb') as f:
        model = pickle.load(f)
    f.close()
else:
    model = Lasso(alpha=0.001)
    model.fit(X=equation(X_train), y=y_train)
    with open(lassopkl, 'wb') as f:
        pickle.dump(model, f)
    f.close()

theta_z = np.array([model.intercept_])
theta = np.concatenate((theta_z, model.coef_), axis=0)
print("Lasso Regression parameters: ", theta)
y_pred_train = model.predict(X=equation(X_train))
print("Linear Regression MSE Training data:", mean_squared_error(y_true=y_train, y_pred=y_pred_train))
y_pred = model.predict(equation(X_test))
print("Lasso Regression MSE: ", mean_squared_error(y_true=y_test, y_pred=y_pred))


# T2
print("Task 2.a")
if os.path.exists(polypkl):
    with open(polypkl, "rb") as f:
        model = pickle.load(f)
    f.close()
    pol_feat_train = PolynomialFeatures(degree=7, include_bias=False)
    X_pol_train = pol_feat_train.fit_transform(X_train)
else:
    pol_feat_train = PolynomialFeatures(degree=7, include_bias=False) # degree has lowest MSE according to grid_search
    X_pol_train = pol_feat_train.fit_transform(X_train)

    model = LinearRegression(fit_intercept=True)
    model.fit(X_pol_train, y_train)
    with open(polypkl, 'wb') as f:
        pickle.dump(model, f)
    f.close()

X_pol_test = pol_feat_train.transform(X_test)
y_pred_train = model.predict(X=X_pol_train)
print("Poly Regression MSE Training data:", mean_squared_error(y_true=y_train, y_pred=y_pred_train))
y_pred = model.predict(X_pol_test)
print("Poly Regression MSE: ", mean_squared_error(y_true=y_test, y_pred=y_pred))

print("Task 2.b")

kfcv = KFold(n_splits=10, random_state=0, shuffle=True)
fold_iterator = kfcv.split(X_train, y_train)
mse_linear = []
mse_poly = []

for idx_train, idx_val in fold_iterator:
    X_train, y_train = X[idx_train], y[idx_train]
    X_val, y_val = X[idx_val], y[idx_val]
    
    pol_feat_train = PolynomialFeatures(degree=7, include_bias=False)
    X_pol_train = pol_feat_train.fit_transform(X_train)

    linear = LinearRegression(fit_intercept=True)
    linear.fit(X_pol_train, y_train)
    mse_poly.append(linear.score(pol_feat_train.transform(X_val), y_val))
    linear.fit(equation(X_train), y_train)
    mse_linear.append(linear.score(equation(X_val), y_val))

T, p_val = stats.ttest_rel(mse_linear, mse_poly)
print('t-test: T={:.2f}, p-value={:.4f}'.format(T, p_val))
print("is T={:.2f} in 95\% confidence interval (-1.96, 1.96) ?".format(T))
    
# T3 (Bonus)

print("Everything useful")

Task 1.a
Linear Regression parameters:  [ 0.96551392  5.05153814 -4.00489622  7.02109756  1.99768997 -0.10373533]
Task 1.b
Linear Regression MSE Training data: 1.4136786666328538
Linear Regression MSE:  1.4921846102530014
Task 1.c
Lasso Regression parameters:  [ 0.96831877  5.0460651  -4.00466747  7.01669269  1.99764278 -0.09065975]
Linear Regression MSE Training data: 1.4137032958414137
Lasso Regression MSE:  1.491732212071426
Task 2.a
Poly Regression MSE Training data: 1.3989151732725733
Poly Regression MSE:  1.5039563843971995
Task 2.b
t-test: T=4.27, p-value=0.0021
is T=4.27 in 95\% confidence interval (-1.96, 1.96) ?
Everything useful


In [None]:
# Import libraries
import joblib
import io
import requests
import torch
import numpy as np

def evaluate_predictions(y_true, y_pred):
    """
    Evaluates the mean squared error between the values in y_true and the values
    in y_pred.
    ### YOU CAN NOT EDIT THIS FUNCTION ###
    :param y_true: Numpy array, the true target values from the test set;
    :param y_pred: Numpy array, the values predicted by your model.
    :return: float, the mean squared error between the two arrays.
    """
    assert y_true.shape == y_pred.shape
    return ((y_true - y_pred) ** 2).mean()


def load_model(filename):
    """
    Loads a torch model saved.
    This is just an example, you can write your own function to load the model.
    Some examples can be found in src/utils.py.
    :param filename: string, path to the file storing the model.
    :return: the model.
    """
    model = torch.jit.load(filename)

    return model

# Load the data
# This will be replaced with our private test data when grading the assignment

# Load data from url
url = 'https://drive.switch.ch/index.php/s/Wp0I2gb33mhERFN/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively yo can load the data from file
#data_path = 'data_bonus_test.npz'
#data = np.load(data_path)

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = torch.tensor(data.f.x, dtype=torch.float32)
# y is a Numpy array of shape (n_samples, ) with the targets
y =  torch.tensor(data.f.y,dtype=torch.float32).reshape(-1, 1)

# Load the trained model
baseline_model_path = 'baseline.pt'
baseline_model = load_model(baseline_model_path)

# Predict on the given samples
y_pred_ours = baseline_model(x)

############################################################################
# STOP EDITABLE SECTION: DO NOT modify anything above this point.
############################################################################

############################################################################
# ADD HERE YOUR CODE TO READ MODEL OF TASK 3
############################################################################

# Load the trained model
baseline_model_path = 'YOUR_MODEL_PATH'
baseline_model =  ...   # LOAD YOU MODEL and predict x
# Predict on the given samples FROM YOUR MODEL
y_pred_yours = ...


############################################################################
# STOP EDITABLE SECTION: do not modify anything below this point.
############################################################################

# Evaluate the prediction using MSE
mse = evaluate_predictions(y_pred_yours, y)
print(f'MSE on whole dataset: {mse}')

# NOTE: NOW THIS CELL IS NOT WORKING SINCE YOU NEED TO CHANGE THE INPUT.
# DO IT AND EVERYTHING RUNS SMOOTH