In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import joblib
import matplotlib.pyplot as plt

# Load cleaned data
df = pd.read_csv("stock_cleaned.csv")

# Drop Date column if exists and sort
if "Date" in df.columns:
    df = df.sort_values("Date").drop(columns=["Date"]).reset_index(drop=True)

# -------------------- LINEAR REGRESSION MODEL --------------------

# Define features and target
features = ["Open", "High", "Low", "Volume", "Volatility", "DayOfWeek", "Day_Return"]
target = "Target_Close"

X = df[features]
y = df[target]

# Train/test split (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Train model
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
mae = mean_absolute_error(y_test, y_pred)

print(f"📊 Linear Regression Evaluation:")
print(f"• RMSE: {rmse:.2f}")
print(f"• MAE: {mae:.2f}")

# Save model
joblib.dump(model, "stock_lr_model.pkl")
# Save metrics
joblib.dump({"rmse": rmse, "mae": mae}, "metrics.pkl")



  super().__init__(**kwargs)


[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step
Linear Regression RMSE: 190.75
LSTM RMSE: 230.77
✅ Best Model: Linear Regression (RMSE: 190.75)


['stock_lr_model.pkl']