In [None]:
"""
stock_price_lr.py
=================
Forecast short-term stock prices with a baseline Linear Regression model.

Usage examples
--------------
# Train on the last 3 years of AAPL data and plot results
python stock_price_lr.py --ticker AAPL --period 3y --test_size 0.2

# Predict tomorrow's close after training
python stock_price_lr.py --ticker AAPL --predict --model_path models/aapl_lr.pkl
"""
from __future__ import annotations

import argparse
from pathlib import Path
import joblib
import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt

RSTATE = 42


# ---------------------------------------------------------------------
# Data utilities
# ---------------------------------------------------------------------
def download_data(ticker: str, period: str = "5y") -> pd.DataFrame:
    """Fetch historical daily OHLCV data with yfinance."""
    df = yf.download(ticker, period=period, interval="1d", auto_adjust=True, progress=False)
    df = df.reset_index()  # bring Date into a column
    df.dropna(inplace=True)
    return df


def make_features(df: pd.DataFrame) -> pd.DataFrame:
    """
    Use past close prices as features.
    Here: lag-1, lag-2, lag-3 closing prices + day index.
    """
    df = df.copy()
    df["Day"] = np.arange(len(df))         # numeric time index
    for lag in (1, 2, 3):
        df[f"Close_lag{lag}"] = df["Close"].shift(lag)
    df.dropna(inplace=True)
    return df


# ---------------------------------------------------------------------
# Train / predict helpers
# ---------------------------------------------------------------------
def train_model(df: pd.DataFrame, test_size: float = 0.2):
    X = df[["Day", "Close_lag1", "Close_lag2", "Close_lag3"]]
    y = df["Close"]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, shuffle=False
    )  # keep time order

    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("lr", LinearRegression())
    ])
    pipe.fit(X_train, y_train)

    pred = pipe.predict(X_test)
    mae = mean_absolute_error(y_test, pred)
    r2  = r2_score(y_test, pred)

    print(f"MAE: {mae:.3f}\nR² : {r2:.3f}")

    return pipe, X_train.index, X_test.index, y_train, y_test, pred


def plot_results(df: pd.DataFrame, train_idx, test_idx, y_train, y_test, pred,
                 ticker: str, out_dir: Path):
    plt.figure(figsize=(10, 4))
    plt.plot(df.loc[train_idx, "Date"], y_train, label="Train")
    plt.plot(df.loc[test_idx, "Date"], y_test, label="Test – Actual")
    plt.plot(df.loc[test_idx, "Date"], pred, label="Test – Predicted", linestyle="--")
    plt.title(f"{ticker} – Linear Regression forecast")
    plt.xlabel("Date")
    plt.ylabel("Close Price")
    plt.legend()
    plt.tight_layout()
    out_dir.mkdir(exist_ok=True, parents=True)
    img_path = out_dir / f"{ticker.lower()}_lr_plot.png"
    plt.savefig(img_path, dpi=300)
    print(f"Plot saved ➜ {img_path.resolve()}")


# ---------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------
def main():
    ap = argparse.ArgumentParser(description="Stock Price Prediction – Linear Regression")
    ap.add_argument("--ticker", type=str, required=True, help="e.g. AAPL, MSFT, TSLA")
    ap.add_argument("--period", type=str, default="5y", help="Data period (e.g. 1y, 3y, 60d)")
    ap.add_argument("--test_size", type=float, default=0.2, help="Fraction for test split")
    ap.add_argument("--model_path", type=str, help="Path to saved model for --predict")
    ap.add_argument("--predict", action="store_true", help="Predict next-day close with saved model")
    args = ap.parse_args()

    if args.predict:
        if not args.model_path:
            ap.error("--model_path is required with --predict")
        pipe = joblib.load(args.model_path)
        latest = download_data(args.ticker, period="10d").tail(4)  # need last 4 rows
        latest = make_features(latest).tail(1)
        X_latest = latest[["Day", "Close_lag1", "Close_lag2", "Close_lag3"]]
        pred = pipe.predict(X_latest)[0]
        print(f"Predicted next close for {args.ticker}: {pred:.2f}")
        return

    # -----------------------------------------------------------------
    # Training workflow
    # -----------------------------------------------------------------
    raw = download_data(args.ticker, args.period)
    df  = make_features(raw)

    pipe, tr_idx, te_idx, y_tr, y_te, y_hat = train_model(df, args.test_size)

    # Save model + plot
    models_dir = Path("models")
    models_dir.mkdir(exist_ok=True)
    model_fp = models_dir / f"{args.ticker.lower()}_lr.pkl"
    joblib.dump(pipe, model_fp)
    print(f"Model saved ➜ {model_fp.resolve()}")

    plot_results(df, tr_idx, te_idx, y_tr, y_te, y_hat, args.ticker, Path("reports"))


if __name__ == "__main__":
    main()
