# Requirement Setup

In [None]:
!pip install prophet
!pip install scikit-learn
!pip install tensorflow
!pip install setuptools

In [None]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
import keras_tuner as kt
import numpy as np
import time
import matplotlib.pyplot as plt
from datetime import datetime
from prophet.serialize import model_to_json, model_from_json

Enable / Disable Hyperparameter Tuning

In [None]:
HYPERPARAMETER_TUNING = False

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

# Data Loading

In [None]:
df_fifa = pd.read_csv('datasets/fifa.csv')
df_fifa.columns = ['ds', 'y_us']
df_fifa['ds'] = pd.to_datetime(df_fifa['ds'])

print(f'FIFA DATASET - Dates: {df_fifa["ds"].count()} | Total: {df_fifa["y_us"].sum()}')
df_fifa.head()

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(df_fifa['ds'], df_fifa['y_us'])
plt.title('World Cup 98 Original Dataset Distribution')
plt.xlabel('Time')
plt.ylabel('Requests per Min')
plt.grid(True)
plt.savefig("images/fifa-original.jpeg", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()

# Data Splitting & Preprocessing

1. Scaling to 0-1  
2. Split datasets as 70% training, 30% testing
3. Fill for empty data
4. Drop duplicate data

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
df_fifa['y'] = scaler.fit_transform(df_fifa[['y_us']])

In [None]:
train_df_fifa, test_df_fifa = train_test_split(df_fifa, test_size=0.3, random_state=42, shuffle=False)

print(f'FIFA DATASET: Train Set: {train_df_fifa.shape} | Test Set: {test_df_fifa.shape}')

In [None]:
train_df_fifa['y'].fillna(method='ffill', inplace=True)
train_df_fifa.drop_duplicates(subset='ds', inplace=True)

print(f'FIFA DATASET: Train Set: {train_df_fifa.shape} | Test Set: {test_df_fifa.shape}')

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(train_df_fifa['ds'], train_df_fifa['y'], label='Train', color='green')
plt.plot(test_df_fifa['ds'], test_df_fifa['y'], label='Test', color='red')
plt.title('World Cup 98 Train and Test Datasets')
plt.xlabel('Time')
plt.ylabel('Workload')
plt.grid(True)
plt.legend()
plt.savefig("images/fifa-train-test.jpeg", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()

# Model Training

## Seasonality Capturing with Prophet

### Train Model

In [None]:
model_one = Prophet(
    growth='linear',
    changepoint_prior_scale=5.1,
    yearly_seasonality=False,
    weekly_seasonality=20,
    daily_seasonality=50,
    seasonality_prior_scale=30
)
model_one.fit(train_df_fifa)

### Predict Dataset

In [None]:
forecast_train_df_fifa = model_one.predict(train_df_fifa[['ds']].copy())

start_time = time.time()
forecast_test_df_fifa = model_one.predict(test_df_fifa[['ds']].copy())
end_time = time.time()

In [None]:
forecast_test_df_fifa_analysis = test_df_fifa.copy()
forecast_test_df_fifa_analysis = forecast_test_df_fifa_analysis.merge(forecast_test_df_fifa[['ds', 'yhat']], on='ds', how='left')

mse = mean_squared_error(forecast_test_df_fifa_analysis['y'], forecast_test_df_fifa_analysis['yhat'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(forecast_test_df_fifa_analysis['y'], forecast_test_df_fifa_analysis['yhat'])
r2 = r2_score(forecast_test_df_fifa_analysis['y'], forecast_test_df_fifa_analysis['yhat'])
model_one_time = (end_time - start_time) * 1000

print('=== Facebook Prophet Model Metrics ===')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R²: {r2}')
print(f'Prediction Time: {model_one_time}ms')

In [None]:
plt.figure(figsize=(14, 5))
# plt.plot(train_df_fifa['ds'], train_df_fifa['y'], label='Train', color='green')
plt.plot(test_df_fifa['ds'], test_df_fifa['y'], label='Actual', color='#1f77b4')
plt.plot(forecast_test_df_fifa['ds'], forecast_test_df_fifa['yhat'], label='Prediction', color='red')
plt.title('World Cup 98 Facebook Prophet Predictions')
plt.xlim(pd.to_datetime('1998-07-01'), pd.to_datetime('1998-07-31'))
plt.xlabel('Time')
plt.ylabel('Count')
plt.grid(True)
plt.legend()
plt.savefig("images/fifa-prophet-output.jpeg", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()

In [None]:
components_fig = model_one.plot_components(forecast_test_df_fifa);
ax_weekly = components_fig.get_axes()[1]
ax_daily = components_fig.get_axes()[2]

plt.figure(figsize=(14, 5))
plt.plot(ax_weekly.lines[0].get_xdata(), ax_weekly.lines[0].get_ydata())
plt.title('World Cup 98 Seasonality Weekly Component')
plt.xlabel(ax_weekly.get_xlabel())
plt.ylabel(ax_weekly.get_ylabel())
plt.grid(True)
plt.savefig("images/fifa-prophet-weekly.jpeg", dpi=300, bbox_inches='tight', pad_inches=0)
plt.close()

plt.figure(figsize=(14, 5))
plt.plot(ax_daily.lines[0].get_xdata(), ax_daily.lines[0].get_ydata())
plt.title('World Cup 98 Seasonality Daily Component')
plt.xlabel(ax_daily.get_xlabel())
plt.ylabel(ax_daily.get_ylabel())
plt.grid(True)
plt.savefig("images/fifa-prophet-daily.jpeg", dpi=300, bbox_inches='tight', pad_inches=0)
plt.close()

## Merge Forecasted Data

In [None]:
forecast_df_fifa = pd.concat([forecast_train_df_fifa, forecast_test_df_fifa])

## Residual Analysis

### Calculate Residuals

In [None]:
df_fifa_residual = df_fifa.merge(forecast_df_fifa[['ds', 'yhat']], on='ds', how='left')
df_fifa_residual['residual'] = df_fifa_residual['y'] - df_fifa_residual['yhat']
df_fifa_residual.head()

### Prepare Dataset for LSTM

In [None]:
def create_dataset(dataset, look_back):
    X, y = [], []
    dataset = np.array(dataset)
    
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(y)

In [None]:
look_back = 1
X_fifa, y_fifa = create_dataset(df_fifa_residual[['residual']], look_back)

In [None]:
X_train_fifa, X_test_fifa, y_train_fifa, y_test_fifa = train_test_split(X_fifa, y_fifa, test_size=0.3, random_state=42, shuffle=False)

X_train_fifa = np.reshape(X_train_fifa, (X_train_fifa.shape[0], X_train_fifa.shape[1], 1))
X_test_fifa = np.reshape(X_test_fifa, (X_test_fifa.shape[0], X_test_fifa.shape[1], 1))

print(f'FIFA DATASET: Train Set: {X_train_fifa.shape} | Test Set: {X_test_fifa.shape}')

### Model Definition

In [None]:
model_two = None

#### Early Stopping

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

#### HyperParameter Tuning (If Enabled)

In [None]:
def build_model(hp):
    model = Sequential()
    model.add(LSTM(units=hp.Int('units', min_value=32, max_value=512, step=32), return_sequences=True, input_shape=(look_back, 1)))
    model.add(LSTM(units=hp.Int('units', min_value=32, max_value=512, step=32), return_sequences=False))
    model.add(Dense(1))
    
    model.compile(optimizer=hp.Choice('optimizer', values=['adam', 'rmsprop']), loss='mean_squared_error')
    return model

In [None]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=10,
    executions_per_trial=2,
    directory='hyperparam_tuning',
    project_name='lstm_hyperparam_tuning'
)

In [None]:
if HYPERPARAMETER_TUNING:
    tuner.search(X_train_fifa, y_train_fifa, epochs=50, batch_size=16, validation_split=0.2, callbacks=[early_stopping], verbose=1)

In [None]:
if HYPERPARAMETER_TUNING:
    model_two = tuner.get_best_models(num_models=1)[0]
    model_two_hyperparameters = tuner.get_best_hyperparameters(num_trials=1)[0]
    print(f"Units: {model_two_hyperparameters.get('units')}")
    print(f"Optimizer: {model_two_hyperparameters.get('optimizer')}")

#### Default Model

In [None]:
if not HYPERPARAMETER_TUNING:
    model_two = Sequential()
    model_two.add(LSTM(50, return_sequences=True, input_shape=(look_back, 1)))
    model_two.add(LSTM(50, return_sequences=False))
    model_two.add(Dense(1))
    model_two.compile(optimizer='adam', loss='mean_squared_error')

Print Model Summery

In [None]:
model_two.summary()

### Train Model

In [None]:
model_two.fit(X_train_fifa, y_train_fifa, epochs=20, batch_size=16, validation_split=0.2, callbacks=[early_stopping], verbose=1)

### Predict Dataset

In [None]:
fifa_train_predict = model_two.predict(X_train_fifa)

start_time = time.time()
fifa_test_predict = model_two.predict(X_test_fifa)
end_time = time.time()

print(f'FIFA DATASET - Prediction Size Train: {len(fifa_train_predict)} | Prediction Size Test: {len(fifa_test_predict)}')

In [None]:
plt.figure(figsize=(10,6))
plt.plot(y_test_fifa, label='Actual', color='#1f77b4')
plt.plot(fifa_test_predict, label='Prediction', color='red')
plt.xlabel('Residual Value')
plt.ylabel('X')
plt.title('World Cup 98 Residual Prediction')
plt.legend()
plt.savefig("images/fifa-lstm-output.jpeg", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()

In [None]:
mse = mean_squared_error(y_test_fifa, fifa_test_predict)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_fifa, fifa_test_predict)
r2 = r2_score(y_test_fifa, fifa_test_predict)
model_two_time = (end_time - start_time) * 1000

print('=== LSTM Residual Analysis Model Metrics ===')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R2: {r2}')
print(f'Prediction Time: {model_two_time}ms')

## Combine Results from Both Models

In [None]:
fifa_combine_predict = np.concatenate((fifa_train_predict, fifa_test_predict))

final_df_fifa = df_fifa_residual.copy()

final_df_fifa['residual_predict'] = np.nan
final_df_fifa['residual_predict'].iloc[0:len(fifa_combine_predict)] = fifa_combine_predict[:, 0]
print(f'FIFA DATASET: Total Output: {final_df_fifa.shape[0]} | Loss: {final_df_fifa['residual_predict'].isna().sum()}')

final_df_fifa['residual_predict'] = final_df_fifa['residual_predict'].fillna(final_df_fifa['residual'])

final_df_fifa['y_predicted'] = final_df_fifa['yhat'] + final_df_fifa['residual_predict']

final_df_fifa.head()

### Extract Test Dataset Related Dataframe & Calculate Metrics

In [None]:
final_df_fifa_analysis = final_df_fifa.iloc[train_df_fifa.shape[0]:]
final_df_fifa_analysis.tail()

print(f'FIFA DATASET: Testing Output: {final_df_fifa_analysis.shape}')

In [None]:
mse = mean_squared_error(final_df_fifa_analysis['y'], final_df_fifa_analysis['y_predicted'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(final_df_fifa_analysis['y'], final_df_fifa_analysis['y_predicted'])
r2 = r2_score(final_df_fifa_analysis['y'], final_df_fifa_analysis['y_predicted'])

print('=== Hybrid Model Metrics ===')
print(f'MSE: {mse:.6f}')
print(f'RMSE: {rmse:.6f}')
print(f'MAE: {mae:.6f}')
print(f'R²: {r2:.6f}')
print(f'Prediction Time: {model_one_time + model_two_time}ms')

In [None]:
plt.figure(figsize=(14, 5))
# plt.plot(train_df_fifa['ds'], train_df_fifa['y'], label='Train', color='green')
plt.plot(test_df_fifa['ds'], test_df_fifa['y'], label='Actual', color='#1f77b4')
plt.plot(final_df_fifa_analysis['ds'], final_df_fifa_analysis['y_predicted'], label='Prediction', color='red')
plt.xlim(pd.to_datetime('1998-07-02'), pd.to_datetime('1998-07-31'))
plt.title('World Cup 98 Original and Predicted Datasets')
plt.xlabel('Time')
plt.ylabel('Workload')
plt.grid(True)
plt.legend()
plt.savefig("images/fifa-final-output.jpeg", dpi=300, bbox_inches='tight', pad_inches=0)
plt.show()

## Save Models

In [None]:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

filename = f'models/fbprophet-fifa-{timestamp}.json'
with open(filename, 'w') as fout:
    fout.write(model_to_json(model_one))

filename = f'models/lstm-fifa-{timestamp}.keras' 
model_two.save(filename)