<a href="https://colab.research.google.com/github/GaneshiUmayangana/Average_Fare_Forecasting/blob/main/AutoARIMA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
from statsmodels.tsa.stattools import adfuller
# Import the 'auto_arima' function from 'pmdarima'
from pmdarima import auto_arima
import numpy as np

In [8]:
# Streamlit page configuration
st.set_page_config(
    page_title="Forecasting Dashboard",
    page_icon=":chart_with_upwards_trend:",
    layout="wide"
)
st.header("Average YLD Prediction")

# Load your dataset
df = pd.read_excel('Daily Yield_Nov24_12M&6M.xlsx', sheet_name='6M')

# Get unique sectors from the 'Sector' column
sectors = df['Sector'].unique()

# Create a selectbox for the user to choose a sector
selected_sector = st.selectbox('Select Sector', sectors)

# Convert 'Sale Date' to datetime format
df["Sale Date"] = pd.to_datetime(df["Sale Date"])

# Input forecast period start and end dates
forecast_period_start = st.date_input("Forecast Period: Start", key="start_pax_count")
forecast_period_end = st.date_input("Forecast Period End", key="end_pax_count")
forecast_period_start = pd.to_datetime(forecast_period_start)
forecast_period_end = pd.to_datetime(forecast_period_end)

# Filter data for the selected sector
df_filtered = df[df['Sector'] == selected_sector]

# Group by 'Sale Date' and calculate the average YLD USD and sum of PAX COUNT
df_grouped = df_filtered.groupby("Sale Date", as_index=False).agg(
    Avg_YLD_USD=("YLD USD", "mean"),
    Sum_PAX=("PAX COUNT", "sum")
)
st.write(df_grouped)

# Create the time series plot of the average yield
fig = go.Figure()

# Add line plot for Average YLD USD
fig.add_trace(go.Scatter(
    x=df_grouped['Sale Date'],
    y=df_grouped['Avg_YLD_USD'],
    mode='lines',
    name='Average YLD USD'
))

# Update the layout of the plot
fig.update_layout(
    title=f"Time Series of Average Yield for {selected_sector}",
    xaxis_title="Sale Date",
    yaxis_title="Average YLD USD",
    template="plotly_dark"
)

# Display the plot in Streamlit
st.plotly_chart(fig)

# Stationarity Test: Augmented Dickey-Fuller (ADF) Test
def adf_test(series):
    result = adfuller(series)
    return result

# Perform the ADF test on the 'Avg_YLD_USD' series
adf_result = adf_test(df_grouped['Avg_YLD_USD'])

# Display ADF test results
adf_statistic = adf_result[0]
adf_p_value = adf_result[1]
adf_critical_values = adf_result[4]

# Show the test results in Streamlit
st.write(f"ADF Statistic: {adf_statistic}")
st.write(f"p-value: {adf_p_value}")
st.write("Critical Values:")
for key, value in adf_critical_values.items():
    st.write(f"{key}: {value}")

# Interpret the result
if adf_p_value < 0.05:
    st.write("The time series is **stationary** (p-value < 0.05).")
else:
    st.write("The time series is **not stationary** (p-value >= 0.05).")
    # If the series is not stationary, apply differencing
    df_grouped['Diff_Avg_YLD_USD'] = df_grouped['Avg_YLD_USD'].diff().dropna()

    # Plot the differenced time series
    fig_diff = go.Figure()

    # Add line plot for differenced Average YLD USD
    fig_diff.add_trace(go.Scatter(
        x=df_grouped['Sale Date'][1:],  # Start from the second point after differencing
        y=df_grouped['Diff_Avg_YLD_USD'],
        mode='lines',
        name='Differenced Avg YLD USD'
    ))

    # Update the layout of the plot
    fig_diff.update_layout(
        title=f"Time Series of Differenced Average Yield for {selected_sector}",
        xaxis_title="Sale Date",
        yaxis_title="Differenced Avg YLD USD",
        template="plotly_dark"
    )

    # Display the differenced plot in Streamlit
    st.plotly_chart(fig_diff)

    # Perform the ADF test on the differenced series
    adf_result_diff = adf_test(df_grouped['Diff_Avg_YLD_USD'].dropna())

    # Show the test results for the differenced series
    st.write(f"ADF Statistic (Differenced): {adf_result_diff[0]}")
    st.write(f"p-value (Differenced): {adf_result_diff[1]}")
    st.write("Critical Values (Differenced):")
    for key, value in adf_result_diff[4].items():
        st.write(f"{key}: {value}")

    # Interpret the result for the differenced series
    if adf_result_diff[1] < 0.05:
        st.write("The differenced time series is **stationary** (p-value < 0.05).")
    else:
        st.write("The differenced time series is **not stationary** (p-value >= 0.05).")


2025-01-22 09:46:07.503 Session state does not function when running a script without `streamlit run`


In [9]:
# Create training and test sets
train_data = df_grouped[(df_grouped['Sale Date'] < forecast_period_start) ]
test_data = df_grouped[df_grouped['Sale Date'] >= forecast_period_start]

#st.write(forecast_period_start)

# Define feature columns (adjust based on your data)
X_train = train_data[['Sum_PAX']]  # Example feature column
y_train = train_data['Avg_YLD_USD']

X_test = test_data[['Sum_PAX']]  # Example feature column
y_test = test_data['Avg_YLD_USD']

# Show train and test set sizes
st.write(f"Training Data Size: {len(X_train)}")
st.write(f"Testing Data Size: {len(X_test)}")



In [20]:
#Fit Auto ARIMA Model
# Fit Auto ARIMA Model
st.subheader("Auto ARIMA Model")

# Initialize the AutoARIMA model, passing y_train as the first argument
arima_model = auto_arima(
    y_train,  # Pass the time series data here
    start_p=0, d=1, start_q=0,
    max_p=5, max_d=5, max_q=5,
    start_P=0, D=1, start_Q=0, max_P=5, max_D=5,
    max_Q=5, sp=12, seasonal=True,
    stationary=False,
    error_action='warn', trace=True,
    suppress_warnings=True, stepwise=True,
    random_state=20, n_fits=50
)

# Fit the model (no need to call fit again since it's done in auto_arima)
#arima_model.fit(y_train)  # Remove this line

# Print AIC value and model summary in Streamlit
# Access the AIC value directly using the 'aic' attribute
st.write(f"ARIMA Model AIC: {arima_model.aic()}")
st.write("ARIMA Model Summary:")
st.text(arima_model.summary())



Performing stepwise search to minimize aic
 ARIMA(0,1,0)(0,0,0)[0] intercept   : AIC=2020.436, Time=0.02 sec
 ARIMA(1,1,0)(0,0,0)[0] intercept   : AIC=2001.907, Time=0.08 sec
 ARIMA(0,1,1)(0,0,0)[0] intercept   : AIC=1991.721, Time=0.09 sec
 ARIMA(0,1,0)(0,0,0)[0]             : AIC=2018.588, Time=0.02 sec
 ARIMA(1,1,1)(0,0,0)[0] intercept   : AIC=1991.178, Time=0.12 sec
 ARIMA(2,1,1)(0,0,0)[0] intercept   : AIC=1992.803, Time=0.19 sec
 ARIMA(1,1,2)(0,0,0)[0] intercept   : AIC=1995.064, Time=0.25 sec
 ARIMA(0,1,2)(0,0,0)[0] intercept   : AIC=1991.730, Time=0.16 sec
 ARIMA(2,1,0)(0,0,0)[0] intercept   : AIC=2003.445, Time=0.17 sec
 ARIMA(2,1,2)(0,0,0)[0] intercept   : AIC=1994.807, Time=0.48 sec
 ARIMA(1,1,1)(0,0,0)[0]             : AIC=1989.806, Time=0.07 sec
 ARIMA(0,1,1)(0,0,0)[0]             : AIC=1990.177, Time=0.05 sec
 ARIMA(1,1,0)(0,0,0)[0]             : AIC=1999.965, Time=0.04 sec
 ARIMA(2,1,1)(0,0,0)[0]             : AIC=1991.408, Time=0.12 sec
 ARIMA(1,1,2)(0,0,0)[0]          



 ARIMA(2,1,2)(0,0,0)[0]             : AIC=1992.453, Time=0.16 sec

Best model:  ARIMA(1,1,1)(0,0,0)[0]          
Total fit time: 2.318 seconds


DeltaGenerator()