In [None]:
#---Machine Learning Stock Predictor---
#Author: Jack McSharry | Royal Holloway
#Description: Predicts short-term stock returns using ML and technical indicators.

import pandas as pd
import numpy as np
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
import lightgbm as lgb
import matplotlib.pyplot as plt
import seaborn as sns

#1. Download stock data
ticker = "AAPL"
data = yf.download(ticker, start="2020-01-01", end="2024-12-31")
data.head()

#2. Feature engineering
def addTechnicalIndicators(df):
    df["Return"] = df["Close"].pct_change()
    df["MA5"] = df["Close"].rolling(window=5).mean()
    df["MA10"] = df["Close"].rolling(window=10).mean()
    df["MA20"] = df["Close"].rolling(window=20).mean()
    df["RSI"] = 100 - (100 / (1 + (df["Return"].rolling(14).mean() / abs(df["Return"].rolling(14).mean()))))
    df["Volatility"] = df["Return"].rolling(window=10).std()
    df["Target"] = df["Return"].shift(-1)  #predict next day's return
    return df.dropna()

data = addTechnicalIndicators(data)

#3. Train-test split
features = ["MA5", "MA10", "MA20", "RSI", "Volatility"]
X = data[features]
y = data["Target"]
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.2, shuffle=False)

#4. Train models
models = {
    "RandomForest": RandomForestRegressor(n_estimators=200, random_state=42),
    "LightGBM": lgb.LGBMRegressor(n_estimators=300, learning_rate=0.05, random_state=42)
}

results = {}
for name, model in models.items():
    model.fit(XTrain, yTrain)
    preds = model.predict(XTest)
    mse = mean_squared_error(yTest, preds)
    r2 = r2_score(yTest, preds)
    results[name] = {"MSE": mse, "R2": r2}

resultsDf = pd.DataFrame(results).T
print(resultsDf)

#5. Visualize predictions
bestModel = models["LightGBM"]
preds = bestModel.predict(XTest)
plt.figure(figsize=(12,5))
plt.plot(yTest.index, yTest, label="Actual", color="blue")
plt.plot(yTest.index, preds, label="Predicted", color="orange")
plt.title(f"{ticker} - Actual vs Predicted Returns (LightGBM)")
plt.legend()
plt.show()
