In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import itertools
from prophet import Prophet
from prophet.diagnostics import cross_validation, performance_metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error
import numpy as np
import matplotlib.pyplot as plt

# 1. Load and prepare the data (using the uploaded file name)
df = pd.read_csv("/content/drive/MyDrive/AAPL_all_data2.csv", header=0, skiprows=[1, 2])
df.rename(columns={'Price': 'Date'}, inplace=True)
df_prophet = df[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})
df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])

# 2. Split into final training and holdout test sets
# This remains the same: we hold out the last year for our final, unbiased evaluation.
split_date = df_prophet['ds'].max() - pd.DateOffset(days=365)
train_df = df_prophet[df_prophet['ds'] <= split_date]
test_df = df_prophet[df_prophet['ds'] > split_date]

print(f"Training data goes up to {train_df['ds'].max()}")
print(f"Testing data starts from {test_df['ds'].min()}\n")


# --- NEW: Hyperparameter Tuning using Cross-Validation ---
# We define a grid of parameters to test. This will find the best model configuration.
grid = {
    'changepoint_prior_scale': [0.001, 0.05, 0.1, 0.5],
    'seasonality_prior_scale': [1.0, 5.0, 10.0],
    'seasonality_mode': ['additive', 'multiplicative']
}

all_params = [dict(zip(grid.keys(), v)) for v in itertools.product(*grid.values())]
rmses = []  # Store the RMSEs for each parameter combination

print(f"Starting hyperparameter tuning with {len(all_params)} combinations...")

# We perform cross-validation on the TRAINING data to find the best params
for params in all_params:
    m = Prophet(**params).fit(train_df)
    # Use cross-validation settings appropriate for daily data with a yearly horizon
    df_cv = cross_validation(m, initial='730 days', period='180 days', horizon='365 days', parallel="processes")
    df_p = performance_metrics(df_cv, rolling_window=1)
    rmses.append(df_p['rmse'].values[0])

# Find the best parameters based on the lowest RMSE
tuning_results = pd.DataFrame(all_params)
tuning_results['rmse'] = rmses
tuning_results = tuning_results.sort_values('rmse')
best_params = tuning_results.iloc[0].to_dict()

print("\n--- Best Hyperparameters Found ---")
print(best_params)


# --- FIX: The best_params dictionary contains the 'rmse' metric, which is not a Prophet parameter.
# We must remove it before creating the final model.
best_params.pop('rmse')


# 3. Fit the FINAL model with the best parameters and evaluate
print("\nFitting final model with best parameters...")
final_model = Prophet(**best_params)
final_model.fit(train_df)

# Make predictions on the holdout test set
future_test = test_df[['ds']]
forecast = final_model.predict(future_test)

# 4. Calculate Final Accuracy Metrics
results_df = pd.merge(test_df, forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']], on='ds')

mae = mean_absolute_error(results_df['y'], results_df['yhat'])
rmse = np.sqrt(mean_squared_error(results_df['y'], results_df['yhat']))
mape = mean_absolute_percentage_error(results_df['y'], results_df['yhat'])

print("\n--- Final Accuracy Metrics on Test Set (with Tuned Model) ---")
print(f'Mean Absolute Error (MAE): ${mae:.2f}')
print(f'Root Mean Squared Error (RMSE): ${rmse:.2f}')
print(f'Mean Absolute Percentage Error (MAPE): {mape:.2%}')


# 5. Visualize the final results (same as before)
plt.figure(figsize=(14, 7))
plt.plot(results_df['ds'], results_df['y'], label='Actual Price', color='blue')
plt.plot(results_df['ds'], results_df['yhat'], label='Predicted Price (Tuned)', color='red', linestyle='--')
plt.fill_between(results_df['ds'], results_df['yhat_lower'], results_df['yhat_upper'], color='red', alpha=0.2, label='Uncertainty Interval')
plt.title('Tuned Stock Price Forecast vs. Actuals (Test Set)')
plt.xlabel('Date')
plt.ylabel('Price')
plt.legend()
plt.show()



: 