In [1]:
import pandas as pd
from river import linear_model, optim
from river import metrics
import plotly.express as px
import pickle
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("../data/input/AAPL.csv")
df["Date"] = pd.to_datetime(df["Date"])

# Extract date-based features
df["Year"] = df["Date"].dt.year
df["Month"] = df["Date"].dt.month
df["DayOfWeek"] = df["Date"].dt.dayofweek
df["MA_5"] = df["Adj Close"].rolling(window=5).mean().shift(1)  # 5-day moving average
df["MA_10"] = df["Adj Close"].rolling(window=10).mean().shift(1)  # 10-day moving average
df["Volatility"] = df["Adj Close"].rolling(window=5).std().shift(1)
df["Momentum"] = df["Adj Close"].diff(5).shift(1)

# Create a lag feature (previous day's adjusted close)
df["Prev_Close"] = df["Adj Close"].shift(1)
df.dropna(inplace=True)  # Remove first row with NaN

In [3]:
df.head()

Unnamed: 0,Date,Adj Close,Close,Dividends,High,Low,Open,Stock Splits,Volume,Ticker,Year,Month,DayOfWeek,MA_5,MA_10,Volatility,Momentum,Prev_Close
10,2023-04-11,159.194595,160.800003,0.0,162.360001,160.509995,162.350006,0.0,47644200,AAPL,2023,4,1,162.808124,160.997385,1.621282,-2.841339,160.412292
11,2023-04-12,158.501572,160.100006,0.0,162.059998,159.779999,161.220001,0.0,50133100,AAPL,2023,4,2,161.744852,161.24687,1.937746,-5.31636,159.194595
12,2023-04-13,163.907043,165.559998,0.0,165.800003,161.419998,161.630005,0.0,68445600,AAPL,2023,4,3,160.649899,161.489427,1.908152,-5.474762,158.501572
13,2023-04-14,163.560547,165.210007,0.0,166.320007,163.820007,164.589996,0.0,49386500,AAPL,2023,4,4,161.006305,161.963644,2.364428,1.782028,163.907043
14,2023-04-17,163.580338,165.229996,0.0,165.389999,164.029999,165.089996,0.0,41516200,AAPL,2023,4,0,161.11521,162.245801,2.489376,0.544525,163.560547


In [4]:
# Convert to river format
dataset = [
    (
        {
            "Year": row["Year"],
            "Month": row["Month"],
            "DayOfWeek": row["DayOfWeek"],
            "Prev_Close": row["Prev_Close"],
            "MA_5": row["MA_5"],
            "MA_10": row["MA_10"],
            "Volatility": row["Volatility"],
            "Momentum": row["Momentum"],
        },
        row["Adj Close"],
        row["Date"]  # Store date separately
    )
    for _, row in df.iterrows()
]

In [5]:
dataset[0]

({'Year': 2023,
  'Month': 4,
  'DayOfWeek': 1,
  'Prev_Close': 160.41229248046875,
  'MA_5': 162.80812377929686,
  'MA_10': 160.99738464355468,
  'Volatility': 1.6212817132442896,
  'Momentum': -2.841339111328125},
 159.19459533691406,
 Timestamp('2023-04-11 00:00:00'))

In [6]:
model = linear_model.LinearRegression(optimizer=optim.Adam(lr=0.009))
model

In [7]:
# Train incrementally
for x, y, _ in dataset:
    model.learn_one(x, y)
    print("Prediction:", model.predict_one(x))

Prediction: 25.83401570221781
Prediction: 50.967214488370594
Prediction: 75.39239682734843
Prediction: 98.80394511608392
Prediction: 120.78196093864632
Prediction: 141.2805228883952
Prediction: 159.99636116701305
Prediction: 176.37660082129804
Prediction: 190.28779409323903
Prediction: 200.62982793214326
Prediction: 208.30640461757272
Prediction: 213.03308381131777
Prediction: 215.2621589018997
Prediction: 215.44015000553784
Prediction: 213.30478989062536
Prediction: 209.7977256236362
Prediction: 204.69687187780212
Prediction: 198.49636321704565
Prediction: 192.41834682976923
Prediction: 185.89594203445404
Prediction: 179.52202475419418
Prediction: 173.4318155990754
Prediction: 167.93392185384795
Prediction: 162.94456913113882
Prediction: 159.2908945559691
Prediction: 156.26154772615658
Prediction: 154.11942599141557
Prediction: 153.38033088725612
Prediction: 153.58526559462604
Prediction: 154.2863858625066
Prediction: 155.8802162565094
Prediction: 158.06161243795216
Prediction: 160.63

In [8]:
# Evaluate model
def evaluate_model(model, dataset): 
    metric = metrics.MAE()  # Use MAE directly

    results = []

    for x, y, date in dataset:  # Unpack date
        y_pred = model.predict_one(x)  # Predict
        model.learn_one(x, y)  # Train

        metric.update(y, y_pred)  # Update metric

        # Store data for visualization
        results.append((date, y, y_pred))

    df_results = pd.DataFrame(results, columns=["Date", "Actual", "Prediction"])

    # Create interactive plot with Plotly Express
    fig = px.line(
        df_results,
        x="Date",
        y=["Actual", "Prediction"],
        title=f"AAPL Stock Price Prediction - MAE: {metric.get():.2f}",
        labels={"value": "Price", "variable": "Type"},
        template="plotly_white"
    )

    return fig

In [9]:
# Run model evaluation
evaluate_model(model, dataset)

In [11]:
# Save model
with open('../models/model_nb.pkl', 'wb') as f:
    pickle.dump(model, f)