## Predictive Modelling in Finance: ML for Stock Prices & Risk.

Core Idea: Use LSTM (RNN variant) for time-series forecasting of prices; Random Forest for risk factors (e.g., volatility predictors). Features: Lagged returns, volume, moving averages. Evaluate with RMSE/MAE.

In [None]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# Data prep
ticker = 'AAPL'
data = yf.download(ticker, period='3y')['Adj Close']
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data.values.reshape(-1, 1))

def create_sequences(data, seq_length=60):
    """Create sequences for LSTM."""
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

seq_length = 60
X, y = create_sequences(scaled_data, seq_length)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

# LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(seq_length, 1)))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
model.fit(X_train.reshape((X_train.shape[0], seq_length, 1)), y_train, epochs=10, batch_size=32, verbose=0)

# Predict
predictions = model.predict(X_test.reshape((X_test.shape[0], seq_length, 1)))
predictions = scaler.inverse_transform(predictions)
y_test_inv = scaler.inverse_transform([y_test])

rmse = np.sqrt(mean_squared_error(y_test_inv, predictions))
print(f"LSTM RMSE: ${rmse[0][0]:.2f}")

# Risk: RF for beta (using market proxy SPY)
spy = yf.download('SPY', period='3y')['Adj Close']
df = pd.DataFrame({'Stock': data.pct_change(), 'Market': spy.pct_change()}).dropna()
df['Lag1'] = df['Stock'].shift(1)
df['Vol_MA'] = df['Stock'].rolling(20).std()
df = df.dropna()

X_rf = df[['Market', 'Lag1', 'Vol_MA']]
y_rf = df['Stock'] * df['Market']  # Simple beta proxy: covariance-like
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_rf[:-1], y_rf[:-1])  # Train on all but last
risk_pred = rf.predict(X_rf[-1:])
print(f"Predicted Risk Factor (Beta Proxy): {risk_pred[0]:.3f}")

# Plot predictions
plt.plot(y_test_inv[0], label='Actual')
plt.plot(predictions, label='Predicted')
plt.legend()
plt.title('Stock Price Forecast')
plt.show()