# 1. random dataset

In [1]:
import pickle


def load_dataset(filename):
    with open(filename, 'rb') as f:
        datasets = pickle.load(f)
    return datasets

In [8]:
from linear_regression import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


for i in range(3):
    # Load the datasets
    train_set, dev_set, test_set = load_dataset(f'myrandomdataset{i}.pkl')

    # Unpack the datasets
    X_train, y_train = train_set
    X_dev, y_dev = dev_set
    X_test, y_test = test_set

    print(f"Training set: X_train shape = {X_train.shape}, y_train shape = {y_train.shape}")
    print(f"Development set: X_dev shape = {X_dev.shape}, y_dev shape = {y_dev.shape}")
    print(f"Test set: X_test shape = {X_test.shape}, y_test shape = {y_test.shape}")

    regr = LinearRegression(alpha=0.001, max_iter=100, early_stopping=True, n_iter_no_change=10, batch_size=100)

    regr.fit(X_train, y_train)

    y_pred = regr.predict(X_test)
    # print(y_pred)
    
    print("Mean squared error: %.2f" % mean_squared_error(y_test, y_pred))
    
    print("Coefficient of determination: %.2f" % r2_score(y_test, y_pred))



Training set: X_train shape = (10, 850), y_train shape = (850,)
Development set: X_dev shape = (10, 50), y_dev shape = (50,)
Test set: X_test shape = (10, 100), y_test shape = (100,)
Iteration 1/100: Training Loss = 2245.9618535604964
Validation Loss = 2245.9618535604964
Iteration 2/100: Training Loss = 776.1036501627084
Validation Loss = 776.1036501627084
Iteration 3/100: Training Loss = 274.9157442532406
Validation Loss = 274.9157442532406
Iteration 4/100: Training Loss = 98.88831208337116
Validation Loss = 98.88831208337116
Iteration 5/100: Training Loss = 35.520467441660685
Validation Loss = 35.520467441660685
Iteration 6/100: Training Loss = 13.376467409950367
Validation Loss = 13.376467409950367
Iteration 7/100: Training Loss = 5.4083714169607955
Validation Loss = 5.4083714169607955
Iteration 8/100: Training Loss = 2.599498869654456
Validation Loss = 2.599498869654456
Iteration 9/100: Training Loss = 1.539702587584912
Validation Loss = 1.539702587584912
Iteration 10/100: Training

# 2. diabets

In [3]:
import numpy as np
from sklearn import datasets
from linear_regression import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the diabetes dataset
diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
diabetes_X = diabetes_X.T

# Split the data into training/testing sets
diabetes_X_train, diabetes_X_test = diabetes_X[:,:-20], diabetes_X[:,-20:]

# Split the targets into training/testing sets
diabetes_y_train, diabetes_y_test = diabetes_y[:-20], diabetes_y[-20:]

regr = LinearRegression(alpha=0.01, max_iter=10000, early_stopping=True, n_iter_no_change=10, batch_size=100)

regr.fit(diabetes_X_train, diabetes_y_train)

y_pred = regr.predict(diabetes_X_test)

print("Mean squared error: %.2f" % mean_squared_error(diabetes_y_test, y_pred))
    
print("Coefficient of determination: %.2f" % r2_score(diabetes_y_test, y_pred))

Iteration 1/100: Training Loss = 25449.92370342507
Validation Loss = 25449.92370342507
Iteration 2/100: Training Loss = 22539.462108174353
Validation Loss = 22539.462108174353
Iteration 3/100: Training Loss = 20068.95459294532
Validation Loss = 20068.95459294532
Iteration 4/100: Training Loss = 17961.459117692462
Validation Loss = 17961.459117692462
Iteration 5/100: Training Loss = 16175.05104493762
Validation Loss = 16175.05104493762
Iteration 6/100: Training Loss = 14632.618595276545
Validation Loss = 14632.618595276545
Iteration 7/100: Training Loss = 13325.821871215505
Validation Loss = 13325.821871215505
Iteration 8/100: Training Loss = 12215.548674823147
Validation Loss = 12215.548674823147
Iteration 9/100: Training Loss = 11264.403630068813
Validation Loss = 11264.403630068813
Iteration 10/100: Training Loss = 10467.87834457023
Validation Loss = 10467.87834457023
Iteration 11/100: Training Loss = 9790.559854590147
Validation Loss = 9790.559854590147
Iteration 12/100: Training Lo