# Requirement Setup

In [None]:
!pip install prophet
!pip install scikit-learn
!pip install tensorflow
!pip install setuptools

In [None]:
import pandas as pd
from prophet import Prophet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, GRU
import numpy as np
import time
import matplotlib.pyplot as plt

# Data Loading

In [None]:
df_nasa = pd.read_csv('datasets/nasa.csv')
df_nasa.columns = ['ds', 'y']
df_nasa['ds'] = pd.to_datetime(df_nasa['ds'])

print(f'NASA DATASET - Dates: {df_nasa["ds"].count()} | Total: {df_nasa["y"].sum()}')
df_nasa.head()

In [None]:
df_fifa = pd.read_csv('datasets/fifa.csv')
df_fifa.columns = ['ds', 'y']
df_fifa['ds'] = pd.to_datetime(df_fifa['ds'])

print(f'FIFA DATASET - Dates: {df_fifa["ds"].count()} | Total: {df_fifa["y"].sum()}')
df_fifa.head()

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(df_nasa['ds'], df_nasa['y'])
plt.title('NASA Original Dataset Distribution')
plt.xlabel('Time')
plt.ylabel('Count')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(df_fifa['ds'], df_fifa['y'])
plt.title('FIFA Original Dataset Distribution')
plt.xlabel('Time')
plt.ylabel('Count')
plt.grid(True)
plt.show()

# Data Splitting & Preprocessing

1.   Split datasets as 70% training, 30% testing
2.   Fill for empty data
3.   Drop duplicate data





In [None]:
train_df_nasa, test_df_nasa = train_test_split(df_nasa, test_size=0.3, random_state=42, shuffle=False)
train_df_fifa, test_df_fifa = train_test_split(df_fifa, test_size=0.3, random_state=42, shuffle=False)

print(f'NASA DATASET: Train Set: {train_df_nasa.shape} | Test Set: {test_df_nasa.shape}')
print(f'FIFA DATASET: Train Set: {train_df_fifa.shape} | Test Set: {test_df_fifa.shape}')

In [None]:
train_df_nasa['y'].fillna(method='ffill', inplace=True)
train_df_nasa.drop_duplicates(subset='ds', inplace=True)

train_df_fifa['y'].fillna(method='ffill', inplace=True)
train_df_fifa.drop_duplicates(subset='ds', inplace=True)

print(f'NASA DATASET: Train Set: {train_df_nasa.shape} | Test Set: {test_df_nasa.shape}')
print(f'FIFA DATASET: Train Set: {train_df_fifa.shape} | Test Set: {test_df_fifa.shape}')

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(train_df_nasa['ds'], train_df_nasa['y'], label='Train', color='green')
plt.plot(test_df_nasa['ds'], test_df_nasa['y'], label='Test', color='red')
plt.title('NASA Train and Test Datasets')
plt.xlabel('Time')
plt.ylabel('Count')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(train_df_fifa['ds'], train_df_fifa['y'], label='Train', color='green')
plt.plot(test_df_fifa['ds'], test_df_fifa['y'], label='Test', color='red')
plt.title('FIFA Train and Test Datasets')
plt.xlabel('Time')
plt.ylabel('Count')
plt.grid(True)
plt.legend()
plt.show()

# Model Training - NASA

## Seasonality Capturing with Prophet

### Train Model

In [None]:
model = Prophet(
    growth='linear',
    changepoint_prior_scale=5.1,
    yearly_seasonality=False,
    weekly_seasonality=20,
    daily_seasonality=50,
    seasonality_prior_scale=30
)
model.fit(train_df_nasa)

### Predict Dataset

In [None]:
forecast_train_df_nasa = model.predict(train_df_nasa[['ds']].copy())

start_time = time.time()
forecast_test_df_nasa = model.predict(test_df_nasa[['ds']].copy())
end_time = time.time()

In [None]:
forecast_test_df_nasa_analysis = test_df_nasa.copy()
forecast_test_df_nasa_analysis = forecast_test_df_nasa_analysis.merge(forecast_test_df_nasa[['ds', 'yhat']], on='ds', how='left')

mse = mean_squared_error(forecast_test_df_nasa_analysis['y'], forecast_test_df_nasa_analysis['yhat'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(forecast_test_df_nasa_analysis['y'], forecast_test_df_nasa_analysis['yhat'])
r2 = r2_score(forecast_test_df_nasa_analysis['y'], forecast_test_df_nasa_analysis['yhat'])

print('=== Facebook Prophet Model Metrics ===')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R²: {r2}')
print(f'Prediction Time: {(end_time - start_time) * 1000}ms')

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(train_df_nasa['ds'], train_df_nasa['y'], label='Train', color='green')
plt.plot(test_df_nasa['ds'], test_df_nasa['y'], label='Test', color='red')
plt.plot(forecast_test_df_nasa['ds'], forecast_test_df_nasa['yhat'], label='Prediction', color='blue')
plt.title('NASA Facebook Prophet Predictions')
plt.xlabel('Time')
plt.ylabel('Count')
plt.grid(True)
plt.legend()
plt.show()

In [None]:
model.plot_components(forecast_test_df_nasa);

## Merge Forecasted Data

In [None]:
forecast_df_nasa = pd.concat([forecast_train_df_nasa, forecast_test_df_nasa])

## Residual Analysis

### Calculate Residuals

In [None]:
df_nasa_residual = df_nasa.merge(forecast_df_nasa[['ds', 'yhat']], on='ds', how='left')
df_nasa_residual['residual'] = df_nasa_residual['y'] - df_nasa_residual['yhat']
df_nasa_residual.head()

### Prepare Dataset for LSTM

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_df_nasa = scaler.fit_transform(df_nasa_residual[['residual']])

In [None]:
def create_dataset(dataset, look_back):
    X, y = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(y)

In [None]:
look_back = 1
X_nasa, y_nasa = create_dataset(scaled_df_nasa, look_back)

In [None]:
X_train_nasa, X_test_nasa, y_train_nasa, y_test_nasa = train_test_split(X_nasa, y_nasa, test_size=0.3, random_state=42, shuffle=False)

X_train_nasa = np.reshape(X_train_nasa, (X_train_nasa.shape[0], X_train_nasa.shape[1], 1))
X_test_nasa = np.reshape(X_test_nasa, (X_test_nasa.shape[0], X_test_nasa.shape[1], 1))

print(f'NASA DATASET: Train Set: {X_train_nasa.shape} | Test Set: {X_test_nasa.shape}')

### Train Model

In [None]:
model = Sequential()
model.add(GRU(50, return_sequences=True, input_shape=(look_back, 1)))
model.add(GRU(50, return_sequences=False))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train_nasa, y_train_nasa, epochs=20, batch_size=16, verbose=1)

### Predict Dataset

In [None]:
nasa_train_predict = model.predict(X_train_nasa)
nasa_train_predict = scaler.inverse_transform(nasa_train_predict)
y_train_nasa_org = scaler.inverse_transform([y_train_nasa])

start_time = time.time()
nasa_test_predict = model.predict(X_test_nasa)
end_time = time.time()
nasa_test_predict = scaler.inverse_transform(nasa_test_predict)
y_test_nasa_org = scaler.inverse_transform([y_test_nasa])

print(f'NASA DATASET - Prediction Size Train: {len(nasa_train_predict)} | Prediction Size Test: {len(nasa_test_predict)}')

In [None]:
plt.figure(figsize=(10,6))
plt.plot(y_test_nasa_org[0], label='Test', color='red')
plt.plot(nasa_test_predict, label='Prediction', color='blue')
plt.xlabel('Residual Value')
plt.ylabel('X')
plt.title('NASA Residual Prediction')
plt.legend()
plt.show()

In [None]:
mse = mean_squared_error(y_test_nasa_org[0], nasa_test_predict)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_nasa_org[0], nasa_test_predict)
r2 = r2_score(y_test_nasa_org[0], nasa_test_predict)

print('=== LSTM Residual Analysis Model Metrics ===')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R2: {r2}')
print(f'Prediction Time: {(end_time - start_time) * 1000}ms')

## Combine Results from Both Models

In [None]:
nasa_combine_predict = np.concatenate((nasa_train_predict, nasa_test_predict))

final_df_nasa = df_nasa_residual.copy()

final_df_nasa['residual_predict'] = np.nan
final_df_nasa['residual_predict'].iloc[0:len(nasa_combine_predict)] = nasa_combine_predict[:, 0]
print(f'NASA DATASET: Total Output: {final_df_nasa.shape[0]} | Loss: {final_df_nasa['residual_predict'].isna().sum()}')

final_df_nasa['residual_predict'] = final_df_nasa['residual_predict'].fillna(final_df_nasa['residual'])

final_df_nasa['y_predicted'] = final_df_nasa['yhat'] + final_df_nasa['residual_predict']

final_df_nasa.head()

### Extract Test Dataset Related Dataframe & Calculate Metrics

In [None]:
final_df_nasa_analysis = final_df_nasa.iloc[train_df_nasa.shape[0]:]
final_df_nasa_analysis.tail()

print(f'NASA DATASET: Testing Output: {final_df_nasa_analysis.shape}')

In [None]:
mse = mean_squared_error(final_df_nasa_analysis['y'], final_df_nasa_analysis['y_predicted'])
rmse = np.sqrt(mse)
mae = mean_absolute_error(final_df_nasa_analysis['y'], final_df_nasa_analysis['y_predicted'])
r2 = r2_score(final_df_nasa_analysis['y'], final_df_nasa_analysis['y_predicted'])

print('=== Hybrid Model Metrics ===')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')
print(f'MAE: {mae}')
print(f'R²: {r2}')

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(train_df_nasa['ds'], train_df_nasa['y'], label='Train', color='green')
plt.plot(test_df_nasa['ds'], test_df_nasa['y'], label='Test', color='red')
plt.plot(final_df_nasa_analysis['ds'], final_df_nasa_analysis['y_predicted'], label='Prediction', color='blue')
plt.title('NASA Original and Predicted Datasets')
plt.xlabel('Time')
plt.ylabel('Count')
plt.grid(True)
plt.legend()
plt.show()