# Requirement Setup

In [None]:
!pip install prophet
!pip install scikit-learn
!pip install tensorflow
!pip install setuptools

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from prophet.serialize import model_from_json
from keras.models import load_model
import tensorflow as tf
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

# Data Loading

Original Dataset

In [None]:
df_fifa = pd.read_csv('datasets/fifa.csv')
df_fifa.columns = ['ds', 'y_us']
df_fifa['ds'] = pd.to_datetime(df_fifa['ds'])

print(f'FIFA DATASET - Dates: {df_fifa["ds"].count()} | Total: {df_fifa["y_us"].sum()}')
df_fifa.head()

Splitted Dataset

In [None]:
start_time = pd.to_datetime("1998-07-07 09:51:00")
end_time = pd.to_datetime("1998-07-07 16:31:00")
df_fifa = df_fifa[(df_fifa['ds'] >= start_time) & (df_fifa['ds'] <= end_time)]

print(f'FIFA DATASET - Dates: {df_fifa["ds"].count()} | Total: {df_fifa["y_us"].sum()}')
df_fifa.head()

Scaling the dataset because Model trained using scaled dataset

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
df_fifa['y'] = scaler.fit_transform(df_fifa[['y_us']])

# Load Trained Models

In [None]:
with open('models/fbprophet-fifa-20240911_180633.json', 'r') as f:
    prophet_model = model_from_json(f.read())

lstm_model = load_model('models/lstm-fifa-20240911_180633.keras')

# Prophet Prediction

In [None]:
forecast_df_fifa = prophet_model.predict(df_fifa[['ds']].copy())

# Residual Calculation & Preparation

In [None]:
df_fifa_residual = df_fifa.merge(forecast_df_fifa[['ds', 'yhat']], on='ds', how='left')
df_fifa_residual['residual'] = df_fifa_residual['y'] - df_fifa_residual['yhat']

In [None]:
def create_dataset(dataset, look_back):
    X, y = [], []
    dataset = np.array(dataset)
    
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]
        X.append(a)
        y.append(dataset[i + look_back, 0])
    return np.array(X), np.array(y)

In [None]:
look_back = 1
X_fifa, y_fifa = create_dataset(df_fifa_residual[['residual']], look_back)

# LSTM Prediction

In [None]:
fifa_predict = lstm_model.predict(X_fifa)

# Calculate Final Prediction

In [None]:
final_df_fifa = df_fifa_residual.copy()

final_df_fifa['residual_predict'] = np.nan
final_df_fifa['residual_predict'].iloc[0:len(fifa_predict)] = fifa_predict[:, 0]
print(f'FIFA DATASET: Total Output: {final_df_fifa.shape[0]} | Loss: {final_df_fifa['residual_predict'].isna().sum()}')

final_df_fifa['residual_predict'] = final_df_fifa['residual_predict'].fillna(final_df_fifa['residual'])

final_df_fifa['y_predicted'] = final_df_fifa['yhat'] + final_df_fifa['residual_predict']
final_df_fifa['y_predicted_us'] = scaler.inverse_transform(final_df_fifa[['y_predicted']])

final_df_fifa.head()

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(df_fifa['ds'], df_fifa['y_us'], label='Actual', color='#1f77b4')
plt.plot(final_df_fifa['ds'], final_df_fifa['y_predicted_us'], label='Prediction', color='red')
plt.title('World Cup 98 Original and Predicted Datasets')
plt.xlabel('Time')
plt.ylabel('Workload')
plt.grid(True)
plt.legend()
plt.show()

# Pod Calculation

Pods from Prediction Based Autoscaler

Constants
- Workload Per Pod: 100
- RRS: 0.1
- Minimum Pods: 1

In [None]:
workload_per_pod = 100
rrs = 0.6
pods_min = 1

In [None]:
pod_df_fifa = final_df_fifa.copy()

In [None]:
pod_df_fifa['pods_t'] = 0
pod_df_fifa['pods_t+1'] = 0

pod_df_fifa.loc[0, 'pods_t'] = 100

for i in range(1, len(pod_df_fifa)):
    workload_t1 = pod_df_fifa.loc[i, 'y_predicted_us']
    
    pods_t = pod_df_fifa.loc[i - 1, 'pods_t']
    pods_t1 = int(workload_t1 / workload_per_pod) + (1 if workload_t1 % workload_per_pod != 0 else 0)
    
    if pods_t1 > pods_t:
        # Scale Up
        pod_df_fifa.loc[i, 'pods_t'] = pods_t1
    elif pods_t1 < pods_t:
        # Scale Down
        pods_surplus = (pods_t - pods_t1) * rrs
        pods_t1 = max(pods_min, pods_t - int(pods_surplus))
        pod_df_fifa.loc[i, 'pods_t'] = pods_t1
    else:
        pod_df_fifa.loc[i, 'pods_t'] = pods_t
    
    pod_df_fifa.loc[i, 'pods_t+1'] = pods_t1

Pods from Horizontal Pod Autoscaler

Constants
- CPU Threshold: 0.7
- Scale Down Delay: 5 min

In [None]:
cpu_threshold = 0.7
scale_down_delay = 5

In [None]:
pod_df_fifa['hpa_pods'] = 0
last_scale_down_time = None

pod_df_fifa.loc[0, 'hpa_pods'] = 100

for i in range(1, len(pod_df_fifa)):
    workload = pod_df_fifa.loc[i, 'y_us']
    pods_t = pod_df_fifa.loc[i - 1, 'hpa_pods']
    
    required_pods = int(workload / (workload_per_pod * cpu_threshold)) + (1 if workload % (workload_per_pod * cpu_threshold) != 0 else 0)
    
    if required_pods > pods_t:
        # Scale Up
        pod_df_fifa.loc[i, 'hpa_pods'] = required_pods
        last_scale_down_time = None
    elif required_pods < pods_t:
        # Scale Down
        if last_scale_down_time is None:
            # Scale Down Timer Start - No Scale Down
            last_scale_down_time = pod_df_fifa.loc[i, 'ds']
            pod_df_fifa.loc[i, 'hpa_pods'] = pods_t
        elif (pod_df_fifa.loc[i, 'ds'] - last_scale_down_time).total_seconds() / 60 >= scale_down_delay:
            # Scale Down Timer OK - Scale Down
            pod_df_fifa.loc[i, 'hpa_pods'] = required_pods
            last_scale_down_time = pod_df_fifa.loc[i, 'ds']
        else:
            # During Scale Down Delay - No Scale Down
            pod_df_fifa.loc[i, 'hpa_pods'] = pods_t
    else:
        pod_df_fifa.loc[i, 'hpa_pods'] = pods_t

In [None]:
pod_df_fifa.head()

In [None]:
plt.figure(figsize=(14, 5))
plt.plot(pod_df_fifa['ds'], pod_df_fifa['pods_t'], label='PBA Pods Count', color='red')
plt.plot(pod_df_fifa['ds'], pod_df_fifa['hpa_pods'], label='HPA Pods Count', color='green')
plt.title('Pod Count Over Time - PBA vs HPA ')
plt.xlabel('Time')
plt.ylabel('Number of Pods')
plt.grid(True)
plt.legend()

plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
plt.gcf().autofmt_xdate() 

plt.show()

In [None]:
fig, ax1 = plt.subplots(figsize=(14, 5))

line1 = ax1.plot(pod_df_fifa['ds'], pod_df_fifa['pods_t'], label='PBA Pods Count', color='blue')
ax1.set_title('Pod Count and Workload Over Time')
ax1.set_xlabel('Time')
ax1.set_ylabel('Number of Pods')
ax1.grid(True)

ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d %H:%M:%S'))
ax1.xaxis.set_major_locator(mdates.HourLocator(interval=1))
fig.autofmt_xdate()

ax2 = ax1.twinx()
line2 = ax2.plot(pod_df_fifa['ds'], pod_df_fifa['hpa_pods'], label='HPA Pods Count', color='green')
ax2.get_yaxis().set_visible(False)

ax3 = ax1.twinx()
line3 = ax3.plot(pod_df_fifa['ds'], pod_df_fifa['y_us'], label='Workload', color='red')

lines = line1 + line2 + line3
labels = [l.get_label() for l in lines]
ax3.legend(lines, labels, loc=0)

plt.show()

# Evaluation Metrics

In [None]:
df_fifa_analysis = pod_df_fifa.copy()

df_fifa_analysis['r_t'] = df_fifa_analysis['y_us']
df_fifa_analysis['p_t'] = df_fifa_analysis['pods_t'] * workload_per_pod
df_fifa_analysis['under_provisioning'] = np.maximum(df_fifa_analysis['r_t'] - df_fifa_analysis['p_t'], 0)
df_fifa_analysis['over_provisioning'] = np.maximum(df_fifa_analysis['p_t'] - df_fifa_analysis['r_t'], 0)
df_fifa_analysis['sgn_under'] = np.where(df_fifa_analysis['r_t'] > df_fifa_analysis['p_t'], 1, 0)
df_fifa_analysis['sgn_over'] = np.where(df_fifa_analysis['p_t'] > df_fifa_analysis['r_t'], 1, 0)
    
# Data Range Time in Minutes
total_time = 400

Θ_U = (100 / total_time) * (df_fifa_analysis['under_provisioning'] / df_fifa_analysis['r_t']).sum()
Θ_O = (100 / total_time) * (df_fifa_analysis['over_provisioning'] / df_fifa_analysis['r_t']).sum()

T_U = (100 / total_time) * df_fifa_analysis['sgn_under'].sum()
T_O = (100 / total_time) * df_fifa_analysis['sgn_over'].sum()

print(f'Θ_O: {Θ_O}')
print(f'T_O: {T_O}')
print(f'Θ_U: {Θ_U}')
print(f'T_U: {T_U}')

ε_n = ((0.189 * 593.755 * 2.2 * 97.6) / (Θ_U * Θ_O * T_U * T_O)) ** 0.25

print(f'Elastic Speed Up: {ε_n}')