In [None]:
# Price Movement Prediction (Logistic Regression)
Predict next-day up/down for SPY using lagged returns and RSI features.


In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.model_selection import TimeSeriesSplit
import yfinance as yf


def rsi(series: pd.Series, window: int = 14) -> pd.Series:
    delta = series.diff()
    gain = delta.clip(lower=0).rolling(window).mean()
    loss = -delta.clip(upper=0).rolling(window).mean()
    rs = gain / (loss.replace(0, np.nan))
    return 100 - (100 / (1 + rs))


data = yf.download('SPY', start='2014-01-01', end=None, progress=False)
close = data['Adj Close'].dropna()
ret1 = close.pct_change()
features = pd.DataFrame({
    'ret1': ret1,
    'ret5': close.pct_change(5),
    'ret10': close.pct_change(10),
    'rsi14': rsi(close, 14),
}).dropna()
y = (ret1.shift(-1).reindex(features.index) > 0).astype(int)

X = features.values
y = y.values
tscv = TimeSeriesSplit(n_splits=5)
preds = np.zeros_like(y, dtype=float)
coefs = []
for train, test in tscv.split(X):
    model = LogisticRegression(max_iter=1000)
    model.fit(X[train], y[train])
    p = model.predict_proba(X[test])[:,1]
    preds[test] = p
    coefs.append(model.coef_.ravel())

auc = roc_auc_score(y[~np.isnan(preds)], preds[~np.isnan(preds)])
pred_labels = (preds > 0.5).astype(int)
print('ROC AUC:', round(auc, 3))
print(classification_report(y[~np.isnan(preds)], pred_labels[~np.isnan(preds)]))

coefs = np.array(coefs)
coef_mean = coefs.mean(axis=0)
for name, w in zip(features.columns, coef_mean):
    print(name, round(w, 4))
