# import libraries

In [264]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, RegressorMixin
import matplotlib.pyplot as plt
import pandas as pd

pd.set_option("display.float_format", "{:.2e}".format)

# create dataset

In [265]:
X = np.linspace(0, 2 * np.pi, 1000)
print(f"X.shape: {X.shape}")

X1 = np.pow(X, 3)
print(f"X1.shape: {X1.shape}")

X2 = np.sin(X)
print(f"X1.shape: {X1.shape}")

def create_target(X1, X2, loc, scale):
    return 2 - X1 + (3 * X2) + np.random.normal(loc, scale)

y = create_target(X1, X2, 1, 0.2)
print(f"y.shape: {y.shape}")

X.shape: (1000,)
X1.shape: (1000,)
X1.shape: (1000,)
y.shape: (1000,)


# Error functions

In [266]:
def mse(y, y_hat):
    return np.mean((y - y_hat) ** 2)

def rmse(y, y_hat):
    return np.sqrt(mse(y, y_hat))

def ndei(y, y_hat):
    return rmse(y, y_hat) / np.std(y, ddof=0)

# LS Model

In [267]:
def LS(X, y):
    return np.linalg.pinv(X.T @ X) @ X.T @ y

# Wrap LS in a scikit-learn compatible estimator
class LSRegressor(BaseEstimator, RegressorMixin):
    def fit(self, X, y):
        self.theta_ = LS(X, y)  # compute theta
        return self
    
    def predict(self, X):
        return X @ self.theta_  # predict y_hat

In [278]:
columns = ["mse_train", "rmse_train", "ndei_train", "mse_test", "rmse_test", "ndei_test"]
rows = ["case1_not_scaled", "case1_scaled", "case1_not_scaled_LS", "case1_scaled_LS" ,
        "case2_not_scaled", "case2_scaled", "case2_not_scaled_LS", "case2_scaled_LS", 
        "case3_not_scaled", "case3_scaled", "case3_not_scaled_LS", "case3_scaled_LS" ]

df = pd.DataFrame(columns=columns, index=rows)
df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,,,,,,
case1_scaled,,,,,,
case1_not_scaled_LS,,,,,,
case1_scaled_LS,,,,,,
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


# Scenario A

In [269]:
X_dummy = X ** 2 + 2
dataset = np.c_[X_dummy, y]

X_train, X_test, y_train, y_test = train_test_split(X_dummy, y, test_size=0.2, random_state=42)
print(f"X_train.shape: {X_train.shape}")
print(f"y_train.shape: {y_train.shape}")
print(f"X_test.shape: {X_test.shape}")
print(f"y_test.shape: {y_test.shape}")

X_train.shape: (800,)
y_train.shape: (800,)
X_test.shape: (200,)
y_test.shape: (200,)


In [273]:
dummy_model = LinearRegression()
dummy_model_scaled = Pipeline([
    ("scaler", StandardScaler()),
    ("regressor", LinearRegression())
])
dummy_theta = LS(X_train.reshape(-1, 1), y_train)
dummy_theta_scaled = Pipeline([
    ('scaler', StandardScaler()),
    ('ls_regressor', LSRegressor())
])

In [274]:
dummy_model.fit(X_train.reshape(-1, 1), y_train)

In [275]:
dummy_model_scaled.fit(X_train.reshape(-1, 1), y_train)

In [276]:
dummy_theta_scaled.fit(X_train.reshape(-1, 1), y_train)

In [279]:
def report_errors(y, y_pred):
    mse_value = mse(y, y_pred)
    rmse_value = rmse(y, y_pred)
    ndei_value = ndei(y, y_pred)

    print(f"mse_value: {mse_value}")
    print(f"rmse_value: {rmse_value}")
    print(f"ndei_value: {ndei_value}")

    return mse_value, rmse_value, ndei_value

In [280]:
print("Train report for not scaled data")
dummy_y_pred_not_scaled_train = dummy_model.predict(X_train.reshape(-1, 1))

dummy_not_scaled_mse_train, dummy_not_scaled_rmse_train, \
    dummy_not_scaled_ndei_train = report_errors(y_train, dummy_y_pred_not_scaled_train)

Train report for not scaled data
mse_value: 117.98348567937468
rmse_value: 10.862020331382864
ndei_value: 0.15186691358845067


In [281]:
df.loc['case1_not_scaled', 'mse_train'] = dummy_not_scaled_mse_train
df.loc['case1_not_scaled', 'rmse_train'] = dummy_not_scaled_rmse_train
df.loc['case1_not_scaled', 'ndei_train'] = dummy_not_scaled_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,,,
case1_scaled,,,,,,
case1_not_scaled_LS,,,,,,
case1_scaled_LS,,,,,,
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [282]:
print("Test report for not scaled data")
dummy_y_pred_not_scaled_test = dummy_model.predict(X_test.reshape(-1, 1))

dummy_not_scaled_mse_test, dummy_not_scaled_rmse_test, \
    dummy_not_scaled_ndei_test = report_errors(y_test, dummy_y_pred_not_scaled_test)

Test report for not scaled data
mse_value: 120.6702057318134
rmse_value: 10.984999122977362
ndei_value: 0.15013452645469486


In [283]:
df.loc['case1_not_scaled', 'mse_test'] = dummy_not_scaled_mse_test
df.loc['case1_not_scaled', 'rmse_test'] = dummy_not_scaled_rmse_test
df.loc['case1_not_scaled', 'ndei_test'] = dummy_not_scaled_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,,,,,,
case1_not_scaled_LS,,,,,,
case1_scaled_LS,,,,,,
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [284]:
print("Train report for scaled data")
dummy_y_pred_scaled_train = dummy_model_scaled.predict(X_train.reshape(-1, 1))

dummy_scaled_mse_train, dummy_scaled_rmse_train, \
    dummy_scaled_ndei_train = report_errors(y_train, dummy_y_pred_scaled_train)

Train report for scaled data
mse_value: 117.98348567937462
rmse_value: 10.86202033138286
ndei_value: 0.1518669135884506


In [285]:
df.loc['case1_scaled', 'mse_train'] = dummy_scaled_mse_train
df.loc['case1_scaled', 'rmse_train'] = dummy_scaled_rmse_train
df.loc['case1_scaled', 'ndei_train'] = dummy_scaled_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,,,
case1_not_scaled_LS,,,,,,
case1_scaled_LS,,,,,,
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [286]:
print("Test report for scaled data")
dummy_y_pred_scaled_test = dummy_model_scaled.predict(X_test.reshape(-1, 1))

dummy_scaled_mse_test, dummy_scaled_rmse_test, \
    dummy_scaled_ndei_test = report_errors(y_test, dummy_y_pred_scaled_test)

Test report for scaled data
mse_value: 120.67020573181331
rmse_value: 10.984999122977358
ndei_value: 0.1501345264546948


In [287]:
df.loc['case1_scaled', 'mse_test'] = dummy_scaled_mse_test
df.loc['case1_scaled', 'rmse_test'] = dummy_scaled_rmse_test
df.loc['case1_scaled', 'ndei_test'] = dummy_scaled_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,,,,,,
case1_scaled_LS,,,,,,
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [288]:
dummy_LS_pred_train = dummy_theta * X_train
dummy_LS_mse_train, dummy_LS_rmse_train, \
    dummy_LS_ndei_train = report_errors(y_train, dummy_LS_pred_train)

mse_value: 503.39277175776346
rmse_value: 22.436416196838643
ndei_value: 0.3136938779202242


In [289]:
df.loc['case1_not_scaled_LS', 'mse_train'] = dummy_LS_mse_train
df.loc['case1_not_scaled_LS', 'rmse_train'] = dummy_LS_rmse_train
df.loc['case1_not_scaled_LS', 'ndei_train'] = dummy_LS_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,,,
case1_scaled_LS,,,,,,
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [290]:
dummy_LS_pred_test = dummy_theta * X_test
dummy_LS_mse_test, dummy_LS_rmse_test, \
    dummy_LS_ndei_test = report_errors(y_test, dummy_LS_pred_test)

mse_value: 530.8594724545209
rmse_value: 23.040387853821404
ndei_value: 0.31489831551560593


In [291]:
df.loc['case1_not_scaled_LS', 'mse_test'] = dummy_LS_mse_test
df.loc['case1_not_scaled_LS', 'rmse_test'] = dummy_LS_rmse_test
df.loc['case1_not_scaled_LS', 'ndei_test'] = dummy_LS_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,,,,,,
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [293]:
dummy_LS_pred_scaled_train = dummy_theta_scaled.predict(X_train.reshape(-1, 1))
dummy_LS_mse_scaled_train, dummy_LS_rmse_scaled_train, \
    dummy_LS_ndei_scaled_train = report_errors(y_train, dummy_LS_pred_scaled_train)

mse_value: 3579.949889966671
rmse_value: 59.832682456719844
ndei_value: 0.8365483159856181


In [295]:
df.loc['case1_scaled_LS', 'mse_train'] = dummy_LS_mse_scaled_train
df.loc['case1_scaled_LS', 'rmse_train'] = dummy_LS_rmse_scaled_train
df.loc['case1_scaled_LS', 'ndei_train'] = dummy_LS_ndei_scaled_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3580.0,59.8,0.837
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [296]:
dummy_LS_pred_scaled_test = dummy_theta_scaled.predict(X_test.reshape(-1, 1))
dummy_LS_mse_scaled_test, dummy_LS_rmse_scaled_test, \
    dummy_LS_ndei_scaled_test = report_errors(y_test, dummy_LS_pred_scaled_test)

mse_value: 3570.7936861489125
rmse_value: 59.75611839928119
ndei_value: 0.8167007059546443


In [297]:
df.loc['case1_scaled_LS', 'mse_test'] = dummy_LS_mse_scaled_test
df.loc['case1_scaled_LS', 'rmse_test'] = dummy_LS_rmse_scaled_test
df.loc['case1_scaled_LS', 'ndei_test'] = dummy_LS_ndei_scaled_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,,,,,,
case2_scaled,,,,,,
case2_not_scaled_LS,,,,,,
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


# Scenario 2

In [323]:
X_2D = np.c_[X1, X2]

X_train, X_test, y_train, y_test = train_test_split(X_2D, y, test_size=0.2, random_state=42)

In [324]:
case2_model = LinearRegression()
case2_model_scaled = Pipeline([
    ("scaler", StandardScaler()),
    ("regressor", LinearRegression())
])
case2_theta = LS(X_train, y_train)
case2_theta_scaled = Pipeline([
    ('scaler', StandardScaler()),
    ('ls_regressor', LSRegressor())
])

In [325]:
case2_model.fit(X_train, y_train)

In [326]:
case2_model_scaled.fit(X_train, y_train)

In [327]:
case2_theta_scaled.fit(X_train, y_train)

In [328]:
print("Train report for not scaled data")
case2_model_y_pred_not_scaled_train = case2_model.predict(X_train)

case2_model_not_scaled_mse_train, case2_model_not_scaled_rmse_train, \
    case2_model_not_scaled_ndei_train = report_errors(y_train, case2_model_y_pred_not_scaled_train)

Train report for not scaled data
mse_value: 3.6408981448981602e-28
rmse_value: 1.908113766235693e-14
ndei_value: 2.66782181963525e-16


In [329]:
df.loc['case2_not_scaled', 'mse_train'] = case2_model_not_scaled_mse_train
df.loc['case2_not_scaled', 'rmse_train'] = case2_model_not_scaled_rmse_train
df.loc['case2_not_scaled', 'ndei_train'] = case2_model_not_scaled_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [330]:
print("Test report for not scaled data")
case2_model_y_pred_not_scaled_test = case2_model.predict(X_test)

case2_model_not_scaled_mse_test, case2_model_not_scaled_rmse_test, \
    case2_model_not_scaled_ndei_test = report_errors(y_test, case2_model_y_pred_not_scaled_test)

Test report for not scaled data
mse_value: 3.2498111066711106e-28
rmse_value: 1.802723247387438e-14
ndei_value: 2.463823602036172e-16


In [331]:
df.loc['case2_not_scaled', 'mse_test'] = case2_model_not_scaled_mse_test
df.loc['case2_not_scaled', 'rmse_test'] = case2_model_not_scaled_rmse_test
df.loc['case2_not_scaled', 'ndei_test'] = case2_model_not_scaled_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [332]:
print("Train report for scaled data")
case2_model_y_pred_scaled_train = case2_model_scaled.predict(X_train)

case2_model_scaled_mse_train, case2_model_scaled_rmse_train, \
    case2_model_scaled_ndei_train = report_errors(y_train, case2_model_y_pred_scaled_train)

Train report for scaled data
mse_value: 2.822348952547222e-28
rmse_value: 1.6799848072370242e-14
ndei_value: 2.3488642054317857e-16


In [333]:
df.loc['case2_scaled', 'mse_train'] = case2_model_scaled_mse_train
df.loc['case2_scaled', 'rmse_train'] = case2_model_scaled_rmse_train
df.loc['case2_scaled', 'ndei_train'] = case2_model_scaled_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [334]:
print("Test report for scaled data")
case2_model_y_pred_scaled_test = case2_model_scaled.predict(X_test)

case2_model_scaled_mse_test, case2_model_scaled_rmse_test, \
    case2_model_scaled_ndei_test = report_errors(y_test, case2_model_y_pred_scaled_test)

Test report for scaled data
mse_value: 3.358920430624492e-28
rmse_value: 1.8327357776353066e-14
ndei_value: 2.5048424220290286e-16


In [335]:
df.loc['case2_scaled', 'mse_test'] = case2_model_scaled_mse_test
df.loc['case2_scaled', 'rmse_test'] = case2_model_scaled_rmse_test
df.loc['case2_scaled', 'ndei_test'] = case2_model_scaled_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [336]:
case2_LS_pred_train = X_train @ case2_theta
print("Train report for not scaled data")

case2_LS_mse_train, case2_LS_rmse_train, \
    case2_LS_ndei_train = report_errors(y_train, case2_LS_pred_train)

Train report for not scaled data
mse_value: 3.34835203448587
rmse_value: 1.8298502765215163
ndei_value: 0.02558398026759323


In [337]:
df.loc['case2_not_scaled_LS', 'mse_train'] = case2_LS_mse_train
df.loc['case2_not_scaled_LS', 'rmse_train'] = case2_LS_rmse_train
df.loc['case2_not_scaled_LS', 'ndei_train'] = case2_LS_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [338]:
case2_LS_pred_test = X_test @ case2_theta
print("Test report for not scaled data")

case2_LS_mse_test, case2_LS_rmse_test, \
    case2_LS_ndei_test = report_errors(y_test, case2_LS_pred_test)

Test report for not scaled data
mse_value: 3.4466925082781232
rmse_value: 1.8565270017638105
ndei_value: 0.02537358438901885


In [339]:
df.loc['case2_not_scaled_LS', 'mse_test'] = case2_LS_mse_test
df.loc['case2_not_scaled_LS', 'rmse_test'] = case2_LS_rmse_test
df.loc['case2_not_scaled_LS', 'ndei_test'] = case2_LS_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,,,,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [341]:
case2_LS_pred_scaled_train = case2_theta_scaled.predict(X_train)
print("Train report for scaled data")

case2_LS_mse_scaled_train, case2_LS_rmse_scaled_train, \
    case2_LS_ndei_scaled_train = report_errors(y_train, case2_LS_pred_scaled_train)

Train report for scaled data
mse_value: 3461.966404287296
rmse_value: 58.83847724310424
ndei_value: 0.8226478745705793


In [342]:
df.loc['case2_scaled_LS', 'mse_train'] = case2_LS_mse_scaled_train
df.loc['case2_scaled_LS', 'rmse_train'] = case2_LS_rmse_scaled_train
df.loc['case2_scaled_LS', 'ndei_train'] = case2_LS_ndei_scaled_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,,,
case3_not_scaled,,,,,,
case3_scaled,,,,,,


In [343]:
case2_LS_pred_scaled_test = case2_theta_scaled.predict(X_test)
print("Test report for scaled data")

case2_LS_mse_scaled_test, case2_LS_rmse_scaled_test, \
    case2_LS_ndei_scaled_test = report_errors(y_test, case2_LS_pred_scaled_test)

Test report for scaled data
mse_value: 3461.9664042872955
rmse_value: 58.838477243104236
ndei_value: 0.8041590918046903


In [344]:
df.loc['case2_scaled_LS', 'mse_test'] = case2_LS_mse_scaled_test
df.loc['case2_scaled_LS', 'rmse_test'] = case2_LS_rmse_scaled_test
df.loc['case2_scaled_LS', 'ndei_test'] = case2_LS_ndei_scaled_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,,,,,,
case3_scaled,,,,,,


# Scenario 3

In [366]:
bias = np.ones(X.shape) + 1
X_2D_with_bias = np.c_[X1, X2, bias]

X_train, X_test, y_train, y_test = train_test_split(X_2D_with_bias, y, test_size=0.2, random_state=42)

In [367]:
case3_model = LinearRegression()
case3_model_scaled = Pipeline([
    ("scaler", StandardScaler()),
    ("regressor", LinearRegression())
])
case3_theta = LS(X_train, y_train)
case3_theta_scaled = Pipeline([
    ('scaler', StandardScaler()),
    ('ls_regressor', LSRegressor())
])

In [368]:
case3_model.fit(X_train, y_train)

In [369]:
case3_model_scaled.fit(X_train, y_train)

In [370]:
print("Train report for not scaled data")
case3_model_y_pred_not_scaled_train = case3_model.predict(X_train)

case3_model_not_scaled_mse_train, case3_model_not_scaled_rmse_train, \
    case3_model_not_scaled_ndei_train = report_errors(y_train, case3_model_y_pred_not_scaled_train)

Train report for not scaled data
mse_value: 3.8757272452404825e-28
rmse_value: 1.96868668031266e-14
ndei_value: 2.7525116031863756e-16


In [371]:
df.loc['case3_not_scaled', 'mse_train'] = case3_model_not_scaled_mse_train
df.loc['case3_not_scaled', 'rmse_train'] = case3_model_not_scaled_rmse_train
df.loc['case3_not_scaled', 'ndei_train'] = case3_model_not_scaled_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,3.88e-28,1.97e-14,2.75e-16,3.41e-28,1.85e-14,2.52e-16
case3_scaled,3.61e-28,1.9e-14,2.65e-16,4.27e-28,2.07e-14,2.82e-16


In [372]:
print("Test report for not scaled data")
case3_model_y_pred_not_scaled_test = case3_model.predict(X_test)

case3_model_not_scaled_mse_test, case3_model_not_scaled_rmse_test, \
    case3_model_not_scaled_ndei_test = report_errors(y_test, case3_model_y_pred_not_scaled_test)

Test report for not scaled data
mse_value: 3.4099991742375526e-28
rmse_value: 1.8466183076742072e-14
ndei_value: 2.5238159972660426e-16


In [373]:
df.loc['case3_not_scaled', 'mse_test'] = case3_model_not_scaled_mse_test
df.loc['case3_not_scaled', 'rmse_test'] = case3_model_not_scaled_rmse_test
df.loc['case3_not_scaled', 'ndei_test'] = case3_model_not_scaled_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,3.88e-28,1.97e-14,2.75e-16,3.41e-28,1.85e-14,2.52e-16
case3_scaled,3.61e-28,1.9e-14,2.65e-16,4.27e-28,2.07e-14,2.82e-16


In [374]:
print("Train report for scaled data")
case3_model_y_pred_scaled_train = case3_model_scaled.predict(X_train)

case3_model_scaled_mse_train, case3_model_scaled_rmse_train, \
    case3_model_scaled_ndei_train = report_errors(y_train, case3_model_y_pred_scaled_train)

Train report for scaled data
mse_value: 3.6057667175222086e-28
rmse_value: 1.898885651513068e-14
ndei_value: 2.6549195670302156e-16


In [375]:
df.loc['case3_scaled', 'mse_train'] = case3_model_scaled_mse_train
df.loc['case3_scaled', 'rmse_train'] = case3_model_scaled_rmse_train
df.loc['case3_scaled', 'ndei_train'] = case3_model_scaled_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,3.88e-28,1.97e-14,2.75e-16,3.41e-28,1.85e-14,2.52e-16
case3_scaled,3.61e-28,1.9e-14,2.65e-16,4.27e-28,2.07e-14,2.82e-16


In [376]:
print("Test report for scaled data")
case3_model_y_pred_scaled_test = case3_model_scaled.predict(X_test)

case3_model_scaled_mse_test, case3_model_scaled_rmse_test, \
    case3_model_scaled_ndei_test = report_errors(y_test, case3_model_y_pred_scaled_test)

Test report for scaled data
mse_value: 4.266899332533876e-28
rmse_value: 2.0656474366488284e-14
ndei_value: 2.823168179184799e-16


In [377]:
df.loc['case3_scaled', 'mse_test'] = case3_model_scaled_mse_test
df.loc['case3_scaled', 'rmse_test'] = case3_model_scaled_rmse_test
df.loc['case3_scaled', 'ndei_test'] = case3_model_scaled_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,3.88e-28,1.97e-14,2.75e-16,3.41e-28,1.85e-14,2.52e-16
case3_scaled,3.61e-28,1.9e-14,2.65e-16,4.27e-28,2.07e-14,2.82e-16


In [378]:
case3_LS_pred_train = X_train @ case3_theta
print("Train report for not scaled data")

case3_LS_mse_train, case3_LS_rmse_train, \
    case3_LS_ndei_train = report_errors(y_train, case3_LS_pred_train)

Train report for not scaled data
mse_value: 2.2347598059472872e-27
rmse_value: 4.727324619641946e-14
ndei_value: 6.609490477949774e-16


In [379]:
df.loc['case3_not_scaled_LS', 'mse_train'] = case3_LS_mse_train
df.loc['case3_not_scaled_LS', 'rmse_train'] = case3_LS_rmse_train
df.loc['case3_not_scaled_LS', 'ndei_train'] = case3_LS_ndei_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,3.88e-28,1.97e-14,2.75e-16,3.41e-28,1.85e-14,2.52e-16
case3_scaled,3.61e-28,1.9e-14,2.65e-16,4.27e-28,2.07e-14,2.82e-16


In [380]:
case3_LS_pred_test = X_test @ case3_theta
print("Test report for not scaled data")

case3_LS_mse_test, case3_LS_rmse_test, \
    case3_LS_ndei_test = report_errors(y_test, case3_LS_pred_test)

Test report for not scaled data
mse_value: 2.4245846997955417e-27
rmse_value: 4.924007209372811e-14
ndei_value: 6.729754662359238e-16


In [381]:
df.loc['case3_not_scaled_LS', 'mse_test'] = case3_LS_mse_test
df.loc['case3_not_scaled_LS', 'rmse_test'] = case3_LS_rmse_test
df.loc['case3_not_scaled_LS', 'ndei_test'] = case3_LS_ndei_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,3.88e-28,1.97e-14,2.75e-16,3.41e-28,1.85e-14,2.52e-16
case3_scaled,3.61e-28,1.9e-14,2.65e-16,4.27e-28,2.07e-14,2.82e-16


In [383]:
case3_theta_scaled.fit(X_train, y_train)
case3_LS_pred_scaled_train = case3_theta_scaled.predict(X_train)
print("Train report for scaled data")

case3_LS_mse_scaled_train, case3_LS_rmse_scaled_train, \
    case3_LS_ndei_scaled_train = report_errors(y_train, case3_LS_pred_scaled_train)

Train report for scaled data
mse_value: 3461.966404287296
rmse_value: 58.83847724310424
ndei_value: 0.8226478745705793


In [385]:
df.loc['case3_scaled_LS', 'mse_train'] = case3_LS_mse_scaled_train
df.loc['case3_scaled_LS', 'rmse_train'] = case3_LS_rmse_scaled_train
df.loc['case3_scaled_LS', 'ndei_train'] = case3_LS_ndei_scaled_train

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,3.88e-28,1.97e-14,2.75e-16,3.41e-28,1.85e-14,2.52e-16
case3_scaled,3.61e-28,1.9e-14,2.65e-16,4.27e-28,2.07e-14,2.82e-16


In [386]:
case3_LS_pred_scaled_test = case3_theta_scaled.predict(X_test)
print("Test report for scaled data")

case3_LS_mse_scaled_test, case3_LS_rmse_scaled_test, \
    case3_LS_ndei_scaled_test = report_errors(y_test, case3_LS_pred_scaled_test)

Test report for scaled data
mse_value: 3461.9664042872955
rmse_value: 58.838477243104236
ndei_value: 0.8041590918046903


In [387]:
df.loc['case3_scaled_LS', 'mse_test'] = case3_LS_mse_scaled_test
df.loc['case3_scaled_LS', 'rmse_test'] = case3_LS_rmse_scaled_test
df.loc['case3_scaled_LS', 'ndei_test'] = case3_LS_ndei_scaled_test

df

Unnamed: 0,mse_train,rmse_train,ndei_train,mse_test,rmse_test,ndei_test
case1_not_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_scaled,118.0,10.9,0.152,121.0,11.0,0.15
case1_not_scaled_LS,503.0,22.4,0.314,531.0,23.0,0.315
case1_scaled_LS,3580.0,59.8,0.837,3570.0,59.8,0.817
case2_not_scaled,3.64e-28,1.91e-14,2.67e-16,3.2499999999999998e-28,1.8e-14,2.46e-16
case2_scaled,2.82e-28,1.68e-14,2.35e-16,3.36e-28,1.83e-14,2.5e-16
case2_not_scaled_LS,3.35,1.83,0.0256,3.45,1.86,0.0254
case2_scaled_LS,3460.0,58.8,0.823,3460.0,58.8,0.804
case3_not_scaled,3.88e-28,1.97e-14,2.75e-16,3.41e-28,1.85e-14,2.52e-16
case3_scaled,3.61e-28,1.9e-14,2.65e-16,4.27e-28,2.07e-14,2.82e-16


In [187]:
def generate_dataset(loc, scale):
    X = np.linspace(0, 2 * np.pi, 1000)
    X1 = np.pow(X, 3)
    X2 = np.sin(X)
    y = create_target(X1, X2, loc, scale)
    bias = np.ones(X.shape) + 1
    X_2D_with_bias = np.c_[X1, X2, bias]
    return train_test_split(X_2D_with_bias, y, test_size=0.2, random_state=42)

In [188]:
new_df = pd.DataFrame(index=["mse", "rmse", "ndei"])

for loc in [10, 111, 211]:
    for scale in [0.2, 0.6, 1]:
        key = f"{loc}-{scale}"

        regr = Pipeline([
            ("scaler", StandardScaler()),
            ("regressor", LinearRegression())
        ])

        X_train, X_test, y_train, y_test = generate_dataset(loc, scale)

        regr.fit(X_train, y_train)
        pred = regr.predict(X_test)

        mse_val, rmse_val, ndei_val = report_errors(y_test, pred)

        new_df.loc["mse", key] = mse_val
        new_df.loc["rmse", key] = rmse_val
        new_df.loc["ndei", key] = ndei_val
        del regr

new_df

mse_value: 3.9943577505413324e-28
rmse_value: 1.9985889398626552e-14
ndei_value: 2.731517778970411e-16
mse_value: 5.139704898831719e-28
rmse_value: 2.2670917270440822e-14
ndei_value: 3.0984867550619014e-16
mse_value: 4.195398952236907e-28
rmse_value: 2.0482673048791526e-14
ndei_value: 2.7994143506796836e-16
mse_value: 3.327730842584316e-28
rmse_value: 1.8242069078326384e-14
ndei_value: 2.4931858181942685e-16
mse_value: 4.5495186125906195e-28
rmse_value: 2.132960058836222e-14
ndei_value: 2.9151658984689953e-16
mse_value: 3.327730842584316e-28
rmse_value: 1.8242069078326384e-14
ndei_value: 2.4931858181942685e-16
mse_value: 4.291403324402304e-28
rmse_value: 2.07157025572446e-14
ndei_value: 2.83126303315103e-16
mse_value: 4.291403324402304e-28
rmse_value: 2.07157025572446e-14
ndei_value: 2.83126303315103e-16
mse_value: 4.614520751180831e-28
rmse_value: 2.1481435592578145e-14
ndei_value: 2.9359175400503975e-16


Unnamed: 0,10-0.2,10-0.6,10-1,111-0.2,111-0.6,111-1,211-0.2,211-0.6,211-1
mse,3.9899999999999997e-28,5.14e-28,4.2e-28,3.3299999999999998e-28,4.55e-28,3.3299999999999998e-28,4.29e-28,4.29e-28,4.61e-28
rmse,2e-14,2.27e-14,2.05e-14,1.82e-14,2.13e-14,1.82e-14,2.07e-14,2.07e-14,2.15e-14
ndei,2.73e-16,3.1e-16,2.8e-16,2.49e-16,2.92e-16,2.49e-16,2.83e-16,2.83e-16,2.94e-16


In [191]:
new_df = pd.DataFrame(index=["mse", "rmse", "ndei"])

for loc in [0, 10, 20]:
    for scale in [0.2, 6, 100]:
        key = f"{loc}-{scale}"

        regr = Pipeline([
            ("scaler", StandardScaler()),
            ("regressor", LinearRegression())
        ])

        X_train, X_test, y_train, y_test = generate_dataset(loc, scale)

        regr.fit(X_train, y_train)
        pred = regr.predict(X_test)

        mse_val, rmse_val, ndei_val = report_errors(y_test, pred)

        new_df.loc["mse", key] = mse_val
        new_df.loc["rmse", key] = rmse_val
        new_df.loc["ndei", key] = ndei_val
        del regr

new_df

mse_value: 3.786676558695092e-28
rmse_value: 1.945938477623353e-14
ndei_value: 2.659559173171488e-16
mse_value: 3.7223190673758664e-28
rmse_value: 1.9293312487429075e-14
ndei_value: 2.636861688940696e-16
mse_value: 5.470198175074062e-28
rmse_value: 2.3388454790930636e-14
ndei_value: 3.1965542693568104e-16
mse_value: 1.2323190630910036e-28
rmse_value: 1.1100986726822997e-14
ndei_value: 1.5171975589195124e-16
mse_value: 4.618149511344847e-28
rmse_value: 2.1489880202888166e-14
ndei_value: 2.937071684493921e-16
mse_value: 2.3160167316383684e-28
rmse_value: 1.521846487540175e-14
ndei_value: 2.079942830998219e-16
mse_value: 2.1264534561137594e-28
rmse_value: 1.4582364198283347e-14
ndei_value: 1.9930054786438403e-16
mse_value: 5.290732319136282e-28
rmse_value: 2.300159194302925e-14
ndei_value: 3.1436808282009527e-16
mse_value: 7.6033569488822e-28
rmse_value: 2.757418529872134e-14
ndei_value: 3.7686277493989276e-16


Unnamed: 0,0-0.2,0-6,0-100,10-0.2,10-6,10-100,20-0.2,20-6,20-100
mse,3.79e-28,3.72e-28,5.47e-28,1.23e-28,4.62e-28,2.3199999999999997e-28,2.13e-28,5.29e-28,7.6e-28
rmse,1.95e-14,1.93e-14,2.34e-14,1.11e-14,2.15e-14,1.52e-14,1.46e-14,2.3e-14,2.76e-14
ndei,2.66e-16,2.64e-16,3.2e-16,1.52e-16,2.94e-16,2.08e-16,1.99e-16,3.14e-16,3.77e-16
