In [36]:
import pandas as pd

In [37]:
df = pd.read_csv("../data/tesla/raw_data.csv").drop("Date", axis=1)

In [38]:
from pandas import DataFrame

def add_lag_features(df: DataFrame, cols: list[str], lag: int) -> list[str]:
  features: list[str] = []
  for i in range(1, lag + 1):
    for col in cols:
      s = f"{col}_lag_{i}"
      features.append(s)
      df[s] = df[col].shift(i)
  df.dropna(inplace=True)
  return features

In [39]:
targets = df.columns
features = add_lag_features(df, df.columns, 3)
df

Unnamed: 0,Open,High,Low,Close,Volume,Adj Close,Open_lag_1,High_lag_1,Low_lag_1,Close_lag_1,...,Low_lag_2,Close_lag_2,Volume_lag_2,Adj Close_lag_2,Open_lag_3,High_lag_3,Low_lag_3,Close_lag_3,Volume_lag_3,Adj Close_lag_3
3,23.000000,23.100000,18.709999,19.200001,5139800,19.200001,25.000000,25.920000,20.270000,21.959999,...,23.299999,23.830000,17187100.0,23.830000,19.000000,25.000000,17.540001,23.889999,18766300.0,23.889999
4,20.000000,20.000000,15.830000,16.110001,6866900,16.110001,23.000000,23.100000,18.709999,19.200001,...,20.270000,21.959999,8218800.0,21.959999,25.790001,30.420000,23.299999,23.830000,17187100.0,23.830000
5,16.400000,16.629999,14.980000,15.800000,6921700,15.800000,20.000000,20.000000,15.830000,16.110001,...,18.709999,19.200001,5139800.0,19.200001,25.000000,25.920000,20.270000,21.959999,8218800.0,21.959999
6,16.139999,17.520000,15.570000,17.459999,7711400,17.459999,16.400000,16.629999,14.980000,15.800000,...,15.830000,16.110001,6866900.0,16.110001,23.000000,23.100000,18.709999,19.200001,5139800.0,19.200001
7,17.580000,17.900000,16.549999,17.400000,4050600,17.400000,16.139999,17.520000,15.570000,17.459999,...,14.980000,15.800000,6921700.0,15.800000,20.000000,20.000000,15.830000,16.110001,6866900.0,16.110001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1687,244.820007,246.850006,242.779999,246.169998,3010700,246.169998,246.210007,246.500000,243.000000,243.690002,...,243.000000,244.899994,3861500.0,244.899994,247.000000,250.070007,245.320007,246.869995,3725200.0,246.869995
1688,246.110001,258.119995,246.020004,258.000000,7575500,258.000000,244.820007,246.850006,242.779999,246.169998,...,243.000000,243.690002,3057000.0,243.690002,247.630005,248.660004,243.000000,244.899994,3861500.0,244.899994
1689,257.000000,261.000000,254.270004,255.729996,4816600,255.729996,246.110001,258.119995,246.020004,258.000000,...,242.779999,246.169998,3010700.0,246.169998,246.210007,246.500000,243.000000,243.690002,3057000.0,243.690002
1690,262.399994,265.750000,259.059998,262.049988,7100400,262.049988,257.000000,261.000000,254.270004,255.729996,...,246.020004,258.000000,7575500.0,258.000000,244.820007,246.850006,242.779999,246.169998,3010700.0,246.169998


In [40]:
X = df[features]
y = df[targets]

In [41]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)

In [42]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [43]:
X_train

array([[ 0.58119296,  0.58214682,  0.47385434, ...,  0.6359548 ,
         1.06373671,  0.6359548 ],
       [-0.92241243, -0.90836476, -0.91454266, ..., -0.94006457,
        -0.51758299, -0.94006457],
       [ 0.86469711,  0.83593802,  0.86333688, ...,  0.7832006 ,
        -0.2128057 ,  0.7832006 ],
       ...,
       [-0.0454779 , -0.06088902, -0.04999958, ..., -0.14044142,
         2.21630507, -0.14044142],
       [ 1.26869849,  1.23883286,  1.18628289, ...,  1.28737332,
        -0.13797054,  1.28737332],
       [ 0.91786085,  0.8821391 ,  0.89687572, ...,  0.76860394,
         0.70201407,  0.76860394]], shape=(1351, 18))

In [44]:
from sklearn.linear_model import Lasso

reg = Lasso(alpha=.1, max_iter=150000)
reg.fit(X_train, y_train)

In [45]:
y_train_pred = reg.predict(X_train)
y_test_pred = reg.predict(X_test)

In [46]:
from sklearn.metrics import mean_absolute_percentage_error, root_mean_squared_error

print("Train R2-squared:", reg.score(X_train, y_train))
print("Test R2-square:", reg.score(X_test, y_test))
print("Train MAPE (%):", mean_absolute_percentage_error(y_train, y_train_pred))
print("Test MAPE (%):", mean_absolute_percentage_error(y_test, y_test_pred))
print("Train RMSE:", root_mean_squared_error(y_train, y_train_pred))
print("Test RMSE:", root_mean_squared_error(y_test, y_test_pred))

Train R2-squared: 0.9369817061528982
Test R2-square: 0.9443473961511587
Train MAPE (%): 0.08731755895178052
Test MAPE (%): 0.09159500629128087
Train RMSE: 432395.93529371865
Test RMSE: 407412.5122126751
