- Ridge, Lasso Regression (with GridSearch for hyperparameter)
- R-squared, MAPE and RMSE

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv("../data/tesla/raw_data.csv").drop("Date", axis=1)

In [3]:
from pandas import DataFrame

def add_lag_features(df: DataFrame, cols: list[str], lag: int) -> list[str]:
  features: list[str] = []
  for i in range(1, lag + 1):
    for col in cols:
      s = f"{col}_lag_{i}"
      features.append(s)
      df[s] = df[col].shift(i)
  df.dropna(inplace=True)
  return features

In [4]:
targets = df.columns
features = add_lag_features(df, df.columns, 3)
df

Unnamed: 0,Open,High,Low,Close,Volume,Adj Close,Open_lag_1,High_lag_1,Low_lag_1,Close_lag_1,...,Low_lag_2,Close_lag_2,Volume_lag_2,Adj Close_lag_2,Open_lag_3,High_lag_3,Low_lag_3,Close_lag_3,Volume_lag_3,Adj Close_lag_3
3,23.000000,23.100000,18.709999,19.200001,5139800,19.200001,25.000000,25.920000,20.270000,21.959999,...,23.299999,23.830000,17187100.0,23.830000,19.000000,25.000000,17.540001,23.889999,18766300.0,23.889999
4,20.000000,20.000000,15.830000,16.110001,6866900,16.110001,23.000000,23.100000,18.709999,19.200001,...,20.270000,21.959999,8218800.0,21.959999,25.790001,30.420000,23.299999,23.830000,17187100.0,23.830000
5,16.400000,16.629999,14.980000,15.800000,6921700,15.800000,20.000000,20.000000,15.830000,16.110001,...,18.709999,19.200001,5139800.0,19.200001,25.000000,25.920000,20.270000,21.959999,8218800.0,21.959999
6,16.139999,17.520000,15.570000,17.459999,7711400,17.459999,16.400000,16.629999,14.980000,15.800000,...,15.830000,16.110001,6866900.0,16.110001,23.000000,23.100000,18.709999,19.200001,5139800.0,19.200001
7,17.580000,17.900000,16.549999,17.400000,4050600,17.400000,16.139999,17.520000,15.570000,17.459999,...,14.980000,15.800000,6921700.0,15.800000,20.000000,20.000000,15.830000,16.110001,6866900.0,16.110001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1687,244.820007,246.850006,242.779999,246.169998,3010700,246.169998,246.210007,246.500000,243.000000,243.690002,...,243.000000,244.899994,3861500.0,244.899994,247.000000,250.070007,245.320007,246.869995,3725200.0,246.869995
1688,246.110001,258.119995,246.020004,258.000000,7575500,258.000000,244.820007,246.850006,242.779999,246.169998,...,243.000000,243.690002,3057000.0,243.690002,247.630005,248.660004,243.000000,244.899994,3861500.0,244.899994
1689,257.000000,261.000000,254.270004,255.729996,4816600,255.729996,246.110001,258.119995,246.020004,258.000000,...,242.779999,246.169998,3010700.0,246.169998,246.210007,246.500000,243.000000,243.690002,3057000.0,243.690002
1690,262.399994,265.750000,259.059998,262.049988,7100400,262.049988,257.000000,261.000000,254.270004,255.729996,...,246.020004,258.000000,7575500.0,258.000000,244.820007,246.850006,242.779999,246.169998,3010700.0,246.169998


In [5]:
X = df[features]
y = df[targets]

In [6]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [7]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [8]:
from sklearn.linear_model import RidgeCV

reg = RidgeCV(alphas=np.logspace(-6, 6, 50))
reg.fit(X_train, y_train)
reg.alpha_

np.float64(0.1389495494373136)

In [9]:
y_train_pred = reg.predict(X_train)
y_test_pred = reg.predict(X_test)

In [10]:
from sklearn.metrics import mean_absolute_percentage_error, root_mean_squared_error

print("Train R2-squared:", reg.score(X_train, y_train))
print("Test R2-square:", reg.score(X_test, y_test))
print("Train MAPE (%):", mean_absolute_percentage_error(y_train, y_train_pred))
print("Test MAPE (%):", mean_absolute_percentage_error(y_test, y_test_pred))
print("Train RMSE:", root_mean_squared_error(y_train, y_train_pred))
print("Test RMSE:", root_mean_squared_error(y_test, y_test_pred))

Train R2-squared: 0.9368095287040408
Test R2-square: 0.9458761974770362
Train MAPE (%): 0.0868019777742081
Test MAPE (%): 0.0907759073474051
Train RMSE: 433129.08888108685
Test RMSE: 401683.6644487756
