<a href="https://colab.research.google.com/github/RifatMuhtasim/Data_Science_Workflow/blob/main/4.5.Time_Series_Forecast_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. ARIMA

## Find p,d,q value

In [None]:
# Test the ARIMA (p, d, q) value

# Adfuller Test
from statsmodels.tsa.stattools import adfuller

# H0: It is non stationarity
# H1: It is stationarity

def Adfuller_test(data):
    result = adfuller(data)
    p_value = result[1]
    labels = ["ADF Test Statistic", "P-Value", "Lags Used", "Number of Observation Used"]
    for value, label in zip(result, labels):
        print(label + " : " + str(value))

    if p_value > 0.05:
        print("Fail to Reject the Null Hypothesis. Data is non Stationarity.")
    else:
        print("Reject the Null Hypothesis. Data is Stationarity.")

In [None]:
# Find D Value

def Find_d_value_using_Adfuller_test(dataset, target):
    df = dataset.copy()
    df.rename(columns={target: 0}, inplace=True)

    def AdFuller_test(data):
        result = adfuller(data)
        p_value = result[1]
        return p_value

    for i in range(1, df.shape[0]):
        df[i] = df[i-1] - df[i-1].shift(1)
        p_value = AdFuller_test(df[i].dropna())

        if p_value <= 0.05:
            print("d-value: ", i)
            break

In [None]:
# Find P, Q Value

try:
    import pmdarima as pm
except ImportError:
    !pip install --quiet pmdarima
    import pmdarima as pm

def Find_best_arima_model_params(data):
    # Specify the parameter grid to search over
    p_values = range(0, 14)  # from 2 to 10
    q_values = range(0, 10)   # from 0 to 5

    # Perform a grid search over the parameter space
    model = pm.auto_arima(data,
                          start_p=0,
                          start_q=0,
                          test='adf',
                          max_p=15,
                          max_q=10,
                          m=0,
                          d=4,  # Replace the d-value
                          seasonal=False,
                          start_P=0,
                          D=1,
                          trace=True,
                          error_action='ignore',
                          suppress_warnings=True,
                          stepwise=True,
                          scoring='mse',  # Use Mean Squared Error for scoring
                          n_jobs=-1,      # Utilize all available CPU cores
                          p_values=p_values,
                          q_values=q_values)

    # Print the summary of the best model found
    print(model.summary())

## Model

In [None]:
from statsmodels.tsa.arima.model import ARIMA

model = ARIMA(train_dataset['co2_emissions'], order=(2, 4, 0))
model_fit = model.fit()

In [None]:
forecast_output = model_fit.forecast(steps=10).tolist()
forecast_output

In [None]:
pred_result_df = pd.DataFrame(columns=["year", "co2_emissions"])

for idx, value in enumerate(forecast_output):
    new_df = pd.DataFrame({
        "year": [idx + 2016],
        "co2_emissions": [value]
    })
    pred_result_df = pd.concat([pred_result_df, new_df], ignore_index=True)

train_dataset = pd.concat([train_dataset, pred_result_df], ignore_index=True)

# 2. SARIMAX

In [None]:
# Using ARIMA value for determine p,d,q

from statsmodels.tsa.statespace.sarimax import SARIMAX

model = SARIMAX(df['sales'], order=(0, 1, 0), seasonal_order= (0, 1, 0, 12) )
model_fit = model.fit()

# Forecast the next 10 steps
forecast_steps = 10
forecast = model_fit.forecast(steps=forecast_steps)

# Print the forecasted values
print("Forecasted values for the next 10 steps:")
print(forecast)

# 3. FBProphet

In [None]:
from prophet import Prophet
import logging


def FBProphet(df, timestamp, target, freq, periods):
    missing_forecast_model = Prophet()
    # Set the target column 'y' for Prophet
    new_df = pd.DataFrame()
    new_df['y'] = df[target]
    new_df['ds'] = pd.to_datetime(df[timestamp])

    # Suppress INFO messages from Prophet
    logging.getLogger('fbprophet').setLevel(logging.WARNING)
    logging.getLogger('cmdstanpy').setLevel(logging.WARNING)
    logging.getLogger('prophet').setLevel(logging.WARNING)

    missing_forecast_model.fit(new_df)
    if freq == "M":
        future = missing_forecast_model.make_future_dataframe(periods=periods, freq='M')
    elif freq == "Y":
        future = missing_forecast_model.make_future_dataframe(periods=periods, freq='Y')
    else:
        future = missing_forecast_model.make_future_dataframe(periods=periods)

    forecast = missing_forecast_model.predict(future)
    # Extract Forecast Values
    forecast_values = forecast['yhat'][-periods:].tolist()
    return forecast_values


forecast = FBProphet(df, timestamp= "month", target= "sales", freq="M", periods=10)
forecast

In [None]:
forecast_months = []
forecast_values = []
last_month = df['month'].iloc[-1]

for index, value in enumerate(forecast):
    forecast_month = last_month + pd.DateOffset(months=1+index)
    forecast_value = value
    forecast_months.append(forecast_month)
    forecast_values.append(forecast_value)

forecast_data = pd.DataFrame({'month': forecast_months, 'sales': forecast_values})

# 4. LSTM Univariate

## Prepare Data

In [None]:
def Prepare_data(timeseries_data, n_features):
    X, y = [], []

    for i in range(len(timeseries_data)):
        # Find the end of this pattern
        end_ix = i + n_features

        if end_ix > len(timeseries_data) - 1:
            break

        # Gather Input and Output Parts of the pattern
        seq_X, seq_y = timeseries_data[i: end_ix], timeseries_data[end_ix]
        X.append(seq_X)
        y.append(seq_y)

    return np.array(X), np.array(y)

In [None]:
timeseries_data = train_dataset["co2_emissions"]
n_steps = 5
X_train, y_train = Prepare_data(timeseries_data, n_steps)
n_features = 1
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], n_features ))

## Building LSTM Model

In [None]:
lstm_model = Sequential([
    LSTM(64, input_shape=(n_steps, n_features), activation="relu", return_sequences=True),
    LSTM(32, activation="relu"),
    Dense(1)
])

lstm_model.compile(optimizer="adam", loss="mse")
print(lstm_model.summary())

In [None]:
history = lstm_model.fit(X_train, y_train, epochs=1000, verbose=0)

# Extract loss and accuracy values of the last epoch
last_epoch_loss = history.history['loss'][-1]
print("Last Epoch Loss:", last_epoch_loss)

## Predictions

In [None]:
def LSTM_Prediction(lstm_model, timeseries_data, n_steps, n_features, num_of_predictions):
    lstm_output = []
    timeseries_array = np.array(timeseries_data)

    for i in range(num_of_predictions):
        x_input = np.array(timeseries_array[-n_steps:])
        print(f"{i+2016} year input: {x_input}")
        x_input = x_input.reshape((1, n_steps, n_features))
        y_pred = lstm_model.predict(x_input, verbose=0)
        print(f"{i+2016} year output: {y_pred[0][0]}")
        lstm_output.append(y_pred[0][0])
        timeseries_array = np.append(timeseries_array, y_pred[0][0])

    return lstm_output

In [None]:
lstm_output = LSTM_Prediction(lstm_model, timeseries_data, n_steps, n_features, 10)

pred_result_df = pd.DataFrame(columns=["year", "co2_emissions"])

for idx, value in enumerate(lstm_output):
    new_df = pd.DataFrame({
        "year": [idx + 2016],
        "co2_emissions": [value]
    })
    pred_result_df = pd.concat([pred_result_df, new_df], ignore_index=True)

train_dataset = pd.concat([train_dataset, pred_result_df], ignore_index=True)

# 5. LSTM Multivariate

In [None]:
# Time Series Split

win_length = 3
num_features = 12
batch_size = 32

train_generator = TimeseriesGenerator(X_train, y_train, length=win_length)
X_train_gen = train_generator[0][0]
y_train_gen = train_generator[0][1]

In [None]:
lstm_model = Sequential([
    LSTM(64, input_shape=(win_length, num_features), activation="relu", return_sequences=True),
    LSTM(32, activation="relu"),
    Dense(1)
])

lstm_model.compile(optimizer="adam", loss="mse")
print(lstm_model.summary())

In [None]:
history = lstm_model.fit(X_train_gen, y_train_gen, epochs=1000, verbose=0)

# Extract loss and accuracy values of the last epoch
last_epoch_loss = history.history['loss'][-1]
print("Last Epoch Loss:", last_epoch_loss)

In [None]:
future_step = 10
forecast = lstm_model.predict(X_train_gen[-future_step: ])
forecast