# Imports & basic setup

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
import pmdarima as pm
import datetime as dt

from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

import warnings
warnings.filterwarnings("ignore")

# --- Parameters you can change quickly ---
forecast_months = 6            # set 6 or 12
business_days_per_month = 21   # approximate trading days per month
horizon_days = forecast_months * business_days_per_month  # e.g., 6*21=126
n_bootstrap_sims = 500         # simulations for LSTM intervals
n_input = 30                   # LSTM lookback window (days)
seed = 42
np.random.seed(seed)


# Load TSLA series from your Task 1 assets dict


In [None]:
tsla_df = assets['TSLA'].copy()        # uses your existing cleaned dataframe
tsla = tsla_df['Adj Close'].asfreq('B')  # reindex to business-day frequency (NaN for non-trading days)
# Fill forward to fill non-trading gaps; this keeps a business-day index for forecasting
tsla = tsla.fillna(method='ffill')

# Quick sanity
print("Data range:", tsla.index.min().date(), "to", tsla.index.max().date())
tsla.plot(title='TSLA Adj Close (used for forecasting)', figsize=(12,4))
plt.show()


# Prepare index for forecasts (business days)


In [None]:
last_date = tsla.index.max()
future_index = pd.bdate_range(last_date + pd.Timedelta(days=1), periods=horizon_days)
future_index[:5], future_index[-1]


# Auto ARIMA on full series (retrain on full history)

In [None]:
# Use price series directly but note ARIMA commonly requires differencing.
print("Running auto_arima on TSLA (this may take a bit)...")
auto = pm.auto_arima(tsla, seasonal=False, trace=False, error_action='ignore', stepwise=True, suppress_warnings=True)
print("auto_arima order:", auto.order)
# Fit final ARIMA (statsmodels)
arima_order = auto.order
arima_model = ARIMA(tsla, order=arima_order).fit()


# ARIMA forecast with 95% conf intervals

In [None]:
arima_fc = arima_model.get_forecast(steps=horizon_days)
arima_mean = arima_fc.predicted_mean
arima_ci = arima_fc.conf_int(alpha=0.05)  # 95% CI -> columns ['lower Adj Close','upper Adj Close'] if named

# Put in DataFrame
arima_forecast_df = pd.DataFrame({
    'mean': arima_mean.values,
    'lower_95': arima_ci.iloc[:,0].values,
    'upper_95': arima_ci.iloc[:,1].values
}, index=future_index)

# Plot ARIMA result
plt.figure(figsize=(12,5))
plt.plot(tsla.index[-500:], tsla.values[-500:], label='Historical (last 500 days)')
plt.plot(arima_forecast_df.index, arima_forecast_df['mean'], label='ARIMA mean forecast')
plt.fill_between(arima_forecast_df.index, arima_forecast_df['lower_95'], arima_forecast_df['upper_95'],
                 color='lightgreen', alpha=0.4, label='ARIMA 95% CI')
plt.title(f'ARIMA Forecast — {forecast_months} months ahead')
plt.legend()
plt.show()


# Prepare scaled series for LSTM (train on full tsla)

In [None]:
scaler = MinMaxScaler()
ts_values = tsla.values.reshape(-1,1)
ts_scaled = scaler.fit_transform(ts_values)

# Create training sequences for full history
def create_sequences(series, n_input):
    X, y = [], []
    for i in range(n_input, len(series)):
        X.append(series[i-n_input:i, 0])
        y.append(series[i, 0])
    X = np.array(X)
    y = np.array(y)
    return X.reshape((X.shape[0], X.shape[1], 1)), y

X_all, y_all = create_sequences(ts_scaled, n_input)
print("LSTM input shape:", X_all.shape)


# Define & train LSTM on full historical series

In [None]:
tf.random.set_seed(seed)
model = Sequential()
model.add(LSTM(64, activation='tanh', input_shape=(n_input,1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

es = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)
model.fit(X_all, y_all, epochs=50, batch_size=32, verbose=1, callbacks=[es])


# Compute in-sample fitted values & residuals

In [None]:
# Reconstruct in-sample predictions (one-step ahead) by predicting X_all
in_sample_pred_scaled = model.predict(X_all, verbose=0).flatten()
in_sample_pred = scaler.inverse_transform(in_sample_pred_scaled.reshape(-1,1)).flatten()

# Align indices: predictions start at index n_input
pred_index = tsla.index[n_input:]
in_sample_actual = tsla.loc[pred_index].values.flatten()
residuals = in_sample_actual - in_sample_pred
residuals_std = residuals.std(ddof=1)
print("In-sample residuals std:", residuals_std)


# Point forecast with LSTM

In [None]:

last_window = ts_scaled[-n_input:].reshape((1, n_input, 1))
lstm_point_forecasts = []

current_input = last_window.copy()
for i in range(horizon_days):
    pred_scaled = model.predict(current_input, verbose=0)[0,0]
    lstm_point_forecasts.append(pred_scaled)
    # slide window
    current_input = np.append(current_input[:,1:,:], [[[pred_scaled]]], axis=1)

# inverse scale
lstm_fc_values = scaler.inverse_transform(np.array(lstm_point_forecasts).reshape(-1,1)).flatten()
lstm_forecast_df = pd.DataFrame({'mean': lstm_fc_values}, index=future_index)

# Plot LSTM point forecast
plt.figure(figsize=(12,5))
plt.plot(tsla.index[-500:], tsla.values[-500:], label='Historical (last 500 days)')
plt.plot(lstm_forecast_df.index, lstm_forecast_df['mean'], label='LSTM point forecast', color='orange')
plt.title(f'LSTM point forecast — {forecast_months} months ahead')
plt.legend()
plt.show()


# Bootstrap residual simulation for LSTM intervals

In [None]:

sim_matrix = np.zeros((n_bootstrap_sims, horizon_days))

for s in range(n_bootstrap_sims):
    # sample residuals with replacement for each future step
    sampled_resids = np.random.choice(residuals, size=horizon_days, replace=True)
    # add sampled residuals (in price space) to point forecast
    sim_path = lstm_fc_values + sampled_resids  # simple additive residual bootstrap
    sim_matrix[s, :] = sim_path

# Compute percentiles
lower_95 = np.percentile(sim_matrix, 2.5, axis=0)
upper_95 = np.percentile(sim_matrix, 97.5, axis=0)

lstm_forecast_df['lower_95'] = lower_95
lstm_forecast_df['upper_95'] = upper_95

# Plot LSTM with empirical 95% intervals
plt.figure(figsize=(12,5))
plt.plot(tsla.index[-500:], tsla.values[-500:], label='Historical (last 500 days)')
plt.plot(lstm_forecast_df.index, lstm_forecast_df['mean'], label='LSTM mean forecast', color='orange')
plt.fill_between(lstm_forecast_df.index, lstm_forecast_df['lower_95'], lstm_forecast_df['upper_95'],
                 color='peachpuff', alpha=0.5, label='LSTM empirical 95% interval')
plt.title(f'LSTM Forecast w/ Bootstrap Intervals — {forecast_months} months')
plt.legend()
plt.show()


# Combine into one DataFrame for comparison, and save

In [None]:
combined = pd.DataFrame(index=future_index)
combined['ARIMA_mean'] = arima_forecast_df['mean']
combined['ARIMA_lower95'] = arima_forecast_df['lower_95']
combined['ARIMA_upper95'] = arima_forecast_df['upper_95']

combined['LSTM_mean'] = lstm_forecast_df['mean']
combined['LSTM_lower95'] = lstm_forecast_df['lower_95']
combined['LSTM_upper95'] = lstm_forecast_df['upper_95']

combined.head()
# Save to CSV if you want:
combined.to_csv("tsla_forecasts_task3.csv", index=True)
print("Saved combined forecasts to tsla_forecasts_task3.csv")
