## Tuning

In [4]:
import pandas as pd
import numpy as np
import warnings
import itertools
import os
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from prophet import Prophet
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

warnings.filterwarnings("ignore")
os.makedirs("Models", exist_ok=True)

# ===================== DATA CLEANING =====================
df = pd.read_csv("AAPL_clean.csv", parse_dates=['Date'])
df = df[['Date', 'Close']]

df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
df = df.dropna(subset=['Close'])
df.set_index('Date', inplace=True)

print("Data cleaned. Shape:", df.shape)
print(df.head())

# ===================== ARIMA TUNING =====================
p = d = q = range(0, 3)
pdq = list(itertools.product(p, d, q))

best_aic, best_params, best_model = np.inf, None, None
for param in pdq:
    try:
        model = ARIMA(df['Close'], order=param)
        results = model.fit()
        if results.aic < best_aic:
            best_aic, best_params, best_model = results.aic, param, results
    except:
        continue

if best_model:
    print("Best ARIMA params:", best_params, "| AIC:", best_aic)
    forecast = best_model.forecast(steps=30)
    arima_df = pd.DataFrame({
        'Date': pd.date_range(start=df.index[-1] + pd.offsets.BDay(1), periods=30, freq='B'),
        'Forecast': forecast
    })
    arima_df.to_csv("Models/future_arima.csv", index=False)
else:
    print("ARIMA failed.")

# ===================== SARIMA TUNING =====================
pdq = list(itertools.product(range(0, 2), repeat=3))
seasonal_pdq = [(x[0], x[1], x[2], 12) for x in pdq]

best_aic, best_params, best_model = np.inf, None, None
for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            model = SARIMAX(df['Close'], order=param, seasonal_order=param_seasonal)
            results = model.fit(disp=False)
            if results.aic < best_aic:
                best_aic, best_params, best_model = results.aic, (param, param_seasonal), results
        except:
            continue

if best_model:
    print("Best SARIMA params:", best_params, "| AIC:", best_aic)
    forecast = best_model.forecast(steps=30)
    sarima_df = pd.DataFrame({
        'Date': pd.date_range(start=df.index[-1] + pd.offsets.BDay(1), periods=30, freq='B'),
        'Forecast': forecast
    })
    sarima_df.to_csv("Models/future_sarima.csv", index=False)
else:
    print("SARIMA failed.")

# ===================== PROPHET =====================
prophet_df = df.reset_index().rename(columns={"Date": "ds", "Close": "y"})
prophet_model = Prophet(daily_seasonality=True)
prophet_model.fit(prophet_df)

future = prophet_model.make_future_dataframe(periods=30)
forecast = prophet_model.predict(future)

prophet_df_out = forecast[['ds', 'yhat']].tail(30).rename(columns={'ds': 'Date', 'yhat': 'Forecast'})
prophet_df_out.to_csv("Models/future_prophet.csv", index=False)
print("Prophet model trained.")

# ===================== LSTM =====================
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df[['Close']])

X, y = [], []
for i in range(60, len(scaled_data)):
    X.append(scaled_data[i-60:i, 0])
    y.append(scaled_data[i, 0])

X, y = np.array(X), np.array(y)
X = np.reshape(X, (X.shape[0], X.shape[1], 1))

model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(X.shape[1], 1)))
model.add(LSTM(50))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X, y, epochs=5, batch_size=32, verbose=0)

last_60 = scaled_data[-60:]
future_preds = []
curr_batch = last_60.reshape((1, 60, 1))

for _ in range(30):  
    pred = model.predict(curr_batch, verbose=0)[0][0]
    future_preds.append(pred)
    
    pred_reshaped = np.array(pred).reshape(1, 1, 1)
    
    curr_batch = np.concatenate([curr_batch[:, 1:, :], pred_reshaped], axis=1)

future_preds = scaler.inverse_transform(np.array(future_preds).reshape(-1, 1))

lstm_df = pd.DataFrame({
    'Date': pd.date_range(start=df.index[-1] + pd.offsets.BDay(1), periods=30, freq='B'),
    'Forecast': future_preds.flatten()
})
lstm_df.to_csv("Models/future_lstm.csv", index=False)
print("LSTM model trained.")

print("\n All models completed! Forecasts saved in 'Models/' folder.")


Data cleaned. Shape: (2515, 1)
                Close
Date                 
2015-01-02  24.288584
2015-01-05  23.604326
2015-01-06  23.606552
2015-01-07  23.937569
2015-01-08  24.857311
Best ARIMA params: (0, 1, 0) | AIC: 10434.071740645499
Best SARIMA params: ((0, 1, 0), (0, 0, 0, 12)) | AIC: 10434.071740645499


21:44:49 - cmdstanpy - INFO - Chain [1] start processing
21:44:50 - cmdstanpy - INFO - Chain [1] done processing


Prophet model trained.
LSTM model trained.

 All models completed! Forecasts saved in 'Models/' folder.
