# This notebook is used to evaluate the performance of standard linear regression. Modifying the "project_path" and "base_data_path" fields to the path to the project on your system and the relative path from there to where the data is stored, respectively, will reproduce the results.

In [1]:
import os
import pickle

import numpy as np
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [2]:
project_path = 'C:\\Users\\danny\\Documents_Local\\Caltech_2023-2024\\CS101'

def join_path(relative_path: str) -> str:
    return os.path.join(project_path, relative_path)

def pickle_load(relative_path: str):  # -> pickled_file_contents
    return pickle.load(open(join_path(relative_path), 'rb'))

def pickle_save(obj: object, relative_path: str) -> None:
    pickle.dump(obj, open(join_path(relative_path), 'wb'))

In [9]:
base_data_path = 'Data\\Dataset\\Splits'

# load data

emit_train = pickle_load(
    os.path.join(base_data_path, 'reflectance_train_pca244.pkl')
)
emit_val = pickle_load(
    os.path.join(base_data_path, 'reflectance_val_pca244.pkl')
)

elev_train = pickle_load(
    os.path.join(base_data_path, 'elevation_train.pkl')
)
elev_val = pickle_load(os.path.join(base_data_path, 'elevation_val.pkl'))

elev_train = (
    (
        elev_train - np.mean(np.concatenate([elev_train, elev_val], axis=1))
    ) / 
    np.std(np.concatenate([elev_train, elev_val], axis=1))
)
elev_val = (
    (
        elev_val - np.mean(np.concatenate([elev_train, elev_val], axis=1))
    ) / 
    np.std(np.concatenate([elev_train, elev_val], axis=1))
)
eco_train = pickle_load(os.path.join(base_data_path, 'temp_train.pkl'))
eco_val = pickle_load(os.path.join(base_data_path, 'temp_val.pkl'))

In [10]:
emit_train = emit_train.reshape(
    (emit_train.shape[0] * emit_train.shape[1], emit_train.shape[2])
)
emit_val = emit_val.reshape(
    (emit_val.shape[0] * emit_val.shape[1], emit_val.shape[2])
)

elev_train = (
    (
        elev_train.reshape((elev_train.shape[0] * elev_train.shape[1])) -
        elev_train.mean()
    ) / elev_train.std()
)
elev_val = (
    (
        elev_val.reshape((elev_val.shape[0] * elev_val.shape[1])) -
        elev_val.mean()
    ) / elev_val.std()
)

eco_train = (eco_train - eco_train.mean()) / eco_train.std()
eco_val = (eco_val - eco_val.mean()) / eco_train.std()

x_train = np.concatenate([emit_train, elev_train[:, np.newaxis]], axis=1)
x_val = np.concatenate([emit_val, elev_val[:, np.newaxis]], axis=1)

y_train = eco_train.flatten()
y_val = eco_val.flatten()

In [11]:
lr = LinearRegression()
lr.fit(x_train[:, :], y_train)

In [12]:
lr.score(x_val[:, :], y_val)

0.2744109418002446

In [14]:
p = lr.predict(x_val[:, :])
(np.mean((p-y_val)**2)**0.5) / y_val.std()

0.8518151549483933