# 3. Model Development

This notebook implements various regression models for Tesla stock price prediction.

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

%matplotlib inline

## 3.1 Data Preparation

In [2]:
# Load processed data
df = pd.read_csv('../data/TSLA_processed.csv')

# Define features and target
X = df.drop('Close', axis=1)
y = df['Close']

# Split data into train, validation, and test sets (60-20-20)
X_temp, X_test, y_temp, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_temp, y_temp, test_size=0.25, random_state=42)

print("Training set shape:", X_train.shape)
print("Validation set shape:", X_val.shape)
print("Test set shape:", X_test.shape)

Training set shape: (1437, 11)
Validation set shape: (479, 11)
Test set shape: (480, 11)


## 3.2 Linear Regression Model

In [3]:
# Train Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Make predictions
lr_pred_val = lr_model.predict(X_val)

print("Linear Regression Performance on Validation Set:")
print(f"R² Score: {r2_score(y_val, lr_pred_val):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_val, lr_pred_val)):.4f}")

Linear Regression Performance on Validation Set:
R² Score: 0.9998
RMSE: 1.7720


## 3.3 Ridge Regression Model

In [4]:
# Train Ridge Regression model
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)

# Make predictions
ridge_pred_val = ridge_model.predict(X_val)

print("Ridge Regression Performance on Validation Set:")
print(f"R² Score: {r2_score(y_val, ridge_pred_val):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_val, ridge_pred_val)):.4f}")

Ridge Regression Performance on Validation Set:
R² Score: 0.9996
RMSE: 2.1835


## 3.4 Decision Tree Model

In [5]:
# Train Decision Tree model
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)

# Make predictions
dt_pred_val = dt_model.predict(X_val)

print("Decision Tree Performance on Validation Set:")
print(f"R² Score: {r2_score(y_val, dt_pred_val):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_val, dt_pred_val)):.4f}")

Decision Tree Performance on Validation Set:
R² Score: 0.9992
RMSE: 3.2388


## 3.5 Random Forest Model

In [6]:
# Train Random Forest model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
rf_pred_val = rf_model.predict(X_val)

print("Random Forest Performance on Validation Set:")
print(f"R² Score: {r2_score(y_val, rf_pred_val):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_val, rf_pred_val)):.4f}")

Random Forest Performance on Validation Set:
R² Score: 0.9996
RMSE: 2.3473


## 3.6 Save Models

In [9]:
import joblib

# Save all models
models = {
    'linear': lr_model,
    'ridge': ridge_model,
    'decision_tree': dt_model,
    'random_forest': rf_model
}

for name, model in models.items():
    joblib.dump(model, f'./models/{name}_model.joblib')

print("All models saved successfully!")

All models saved successfully!


In [10]:
# Re-load the model if it's already saved but causing issues
decision_tree_model = joblib.load('./models/decision_tree_model.joblib')

# Re-save the model correctly
joblib.dump(decision_tree_model, './models/decision_tree_model.joblib')

['./models/decision_tree_model.joblib']