In [4]:
# Import libraries
import io
import requests
import numpy as np

# Load data 
url = 'https://drive.switch.ch/index.php/s/37RuoA3Mgt9Rqah/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively you can load the data from file
#data_path = 'data.npz' # path to the .npz file storing the data
#data = np.load(data_path)

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = data.f.x
# y is a Numpy array of shape (n_samples, ) with the targets
y = data.f.y

# T1
LINE = '-'* 100
print('T1' + LINE)

x1 = x[:,0]
x2 = x[:,1]
ones = np.ones(shape=x1.shape)
cos_x1 = np.cos(x1)
tanh_x1 = np.tanh(x1)
x2_squared = np.square(x2)
X = np.vstack((ones, x1, x2, cos_x1, x2_squared, tanh_x1)).transpose()
Y = y

from sklearn.model_selection import train_test_split
# generate train set following ratio from lectures (70 train, 15 validation, 15 test)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7, shuffle=True, random_state=20020309)

from sklearn.linear_model import LinearRegression
lin_regression = LinearRegression(fit_intercept=True)
lin_regression.fit(X_train, Y_train)
theta_hat = lin_regression.coef_

from sklearn.metrics import mean_squared_error
# prediction on training set
train_prediction = lin_regression.predict(X_train)
train_performance = mean_squared_error(Y_train, train_prediction)

# prediction on test set for performance evaluation
test_prediction = lin_regression.predict(X_test)
test_performance = mean_squared_error(Y_test, test_prediction)

print(f"theta_hat: {theta_hat}")
print(f"Mean squared error on training for learning procedure: {train_performance}")
print(f"Mean squared error for performance evaluation: {train_performance}")

# visualize T1
import matplotlib.pyplot as plt

# T2

print("Everything useful")

# T3 (Bonus)

print("Everything useful")

T1----------------------------------------------------------------------------------------------------
theta_hat: [ 0.          5.01958025 -4.00014878  6.98350772  1.9973534  -0.08801428]
Mean squared error on training for learning procedure: 1.4258837224839525
Mean squared error for performance evaluation: 1.4258837224839525
Everything useful
Everything useful


In [None]:
# Import libraries
import joblib
import io
import requests
import torch
import numpy as np

def evaluate_predictions(y_true, y_pred):
    """
    Evaluates the mean squared error between the values in y_true and the values
    in y_pred.
    ### YOU CAN NOT EDIT THIS FUNCTION ###
    :param y_true: Numpy array, the true target values from the test set;
    :param y_pred: Numpy array, the values predicted by your model.
    :return: float, the mean squared error between the two arrays.
    """
    assert y_true.shape == y_pred.shape
    return ((y_true - y_pred) ** 2).mean()


def load_model(filename):
    """
    Loads a torch model saved.
    This is just an example, you can write your own function to load the model.
    Some examples can be found in src/utils.py.
    :param filename: string, path to the file storing the model.
    :return: the model.
    """
    model = torch.jit.load(filename)

    return model

# Load the data
# This will be replaced with our private test data when grading the assignment

# Load data from url
url = 'https://drive.switch.ch/index.php/s/Wp0I2gb33mhERFN/download'
response = requests.get(url)
data = np.load(io.BytesIO(response.content))

# Alternatively yo can load the data from file
#data_path = 'data_bonus_test.npz'
#data = np.load(data_path)

# x is a Numpy array of shape (n_samples, n_features) with the inputs
x = torch.tensor(data.f.x, dtype=torch.float32)
# y is a Numpy array of shape (n_samples, ) with the targets
y =  torch.tensor(data.f.y,dtype=torch.float32).reshape(-1, 1)

# Load the trained model
baseline_model_path = 'baseline.pt'
baseline_model = load_model(baseline_model_path)

# Predict on the given samples
y_pred_ours = baseline_model(x)

############################################################################
# STOP EDITABLE SECTION: DO NOT modify anything above this point.
############################################################################

############################################################################
# ADD HERE YOUR CODE TO READ MODEL OF TASK 3
############################################################################

# Load the trained model
baseline_model_path = 'YOUR_MODEL_PATH'
baseline_model =  ...   # LOAD YOU MODEL and predict x
# Predict on the given samples FROM YOUR MODEL
y_pred_yours = ...


############################################################################
# STOP EDITABLE SECTION: do not modify anything below this point.
############################################################################

# Evaluate the prediction using MSE
mse = evaluate_predictions(y_pred_yours, y)
print(f'MSE on whole dataset: {mse}')

# NOTE: NOW THIS CELL IS NOT WORKING SINCE YOU NEED TO CHANGE THE INPUT.
# DO IT AND EVERYTHING RUNS SMOOTH

In [None]:
import torch

In [None]:
# using my GPU NVIDIA RTX 3070 with CUDA integration
is_cuda_available = torch.cuda.is_available()
device_id = torch.cuda.current_device()
num_gpu = torch.cuda.device_count()
device_name = torch.cuda.get_device_name(device_id)
print(is_cuda_available, device_id, num_gpu, device_name)
print(torch.device('cuda:0' if is_cuda_available else 'cpu'))

In [1]:
import numpy as np
import requests
import io
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import StandardScaler


def load_data(tmpurl):
    tmpresponse = requests.get(tmpurl)
    tmpdata = np.load(io.BytesIO(tmpresponse.content))
    return tmpdata['x'], tmpdata['y']

def center_and_scale_inputs(X):
    tmpX_centered = X - np.mean(X, axis=0)
    scaler = StandardScaler()
    tmpX_scaled = scaler.fit_transform(tmpX_centered)
    return tmpX_scaled

def feature_engineering(tmpx):
    tmpx1 = tmpx[:, 0]
    tmpx2 = tmpx[:, 1]
    tmpones = np.ones(shape=tmpx1.shape)
    tmpcos_x1 = np.cos(tmpx1)
    tmptanh_x1 = np.tanh(tmpx1)
    tmpx2_squared = np.square(tmpx2)
    tmpX = np.vstack((tmpones, tmpx1, tmpx2, tmpcos_x1, tmpx2_squared, tmptanh_x1)).T
    return tmpX

def evaluate_model(model, X_train, Y_train, X_test, Y_test):
    # Fit the model
    model.fit(X_train, Y_train)

    # Predictions
    train_prediction = model.predict(X_train)
    test_prediction = model.predict(X_test)

    # Mean Squared Error
    train_mse = mean_squared_error(Y_train, train_prediction)
    test_mse = mean_squared_error(Y_test, test_prediction)
    return train_mse, test_mse

def main():
    # Data Loading
    url = 'https://drive.switch.ch/index.php/s/37RuoA3Mgt9Rqah/download'
    x, y = load_data(url)

    # Feature Engineering
    X = feature_engineering(x)
    Y = y

    # Centering and Scaling Inputs
    X = center_and_scale_inputs(X)

    # Train-test split
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.7, random_state=20020309)

    # Model Initialization - Ridge Regression with alpha=0.1 (you can tune this parameter)
    ridge_regression = Ridge(alpha=0.3, fit_intercept=True)

    # Model Evaluation
    train_mse, test_mse = evaluate_model(ridge_regression, X_train, Y_train, X_test, Y_test)

    print(f"Mean Squared Error on Training Set: {train_mse}")
    print(f"Mean Squared Error on Test Set: {test_mse}")

if __name__ == "__main__":
    main()


Mean Squared Error on Training Set: 1.4259236286772943
Mean Squared Error on Test Set: 1.4668845960300088
