In [74]:
#Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib       
matplotlib.use('Qt5Agg')
sns.set_style("whitegrid")
import calendar

# DATA PREPROCESSING

### MSTA

In [75]:
MSTA= pd.read_excel('Data_36544086.xlsx', sheet_name='MSTA',
                                header=0, 
                                usecols=['Time', 'Anomaly (deg C)'],
                                parse_dates=True).squeeze()

In [76]:
MSTA['Time'] = pd.to_datetime(MSTA['Time'])
MSTA = MSTA.set_index('Time')

In [77]:
MSTA.describe()

Unnamed: 0,Anomaly (deg C)
count,2100.0
mean,-0.065022
std,0.406237
min,-1.044895
25%,-0.345928
50%,-0.153942
75%,0.122397
max,1.352173


In [78]:
MSTA.isna().sum()

Anomaly (deg C)    0
dtype: int64

In [79]:
# Extract data from the year 1950 onwards
MSTA_1950_onwards = MSTA[MSTA.index >= '1950-01-01']
MSTA_1950_onwards

Unnamed: 0_level_0,Anomaly (deg C)
Time,Unnamed: 1_level_1
1950-01-01,-0.300044
1950-02-01,-0.370036
1950-03-01,-0.216438
1950-04-01,-0.243616
1950-05-01,-0.137298
...,...
2024-08-01,1.239584
2024-09-01,1.144937
2024-10-01,1.199982
2024-11-01,1.225049


### CH4

In [80]:
CH4 = pd.read_excel('Data_36544086.xlsx', sheet_name='CH4',
                header=0, 
                usecols=['Year', 'Month', 'NOAA CH4 (ppb)'], 
                parse_dates=True).squeeze()

In [81]:
# Convert 'Year' and 'Month' to strings and concatenate them
CH4['Time'] = CH4['Year'].astype(str) + '-' + CH4['Month'].astype(str).str.zfill(2)

In [82]:
CH4 = CH4.drop(columns=['Year', 'Month'])
CH4['Time'] = pd.to_datetime(CH4['Time'])
CH4 = CH4.set_index('Time')


In [83]:
CH4.describe()
CH4.isna().sum()

NOAA CH4 (ppb)    0
dtype: int64

In [84]:
CH4

Unnamed: 0_level_0,NOAA CH4 (ppb)
Time,Unnamed: 1_level_1
1983-07-01,1625.95
1983-08-01,1628.05
1983-09-01,1638.44
1983-10-01,1644.80
1983-11-01,1642.59
...,...
2024-05-01,1926.36
2024-06-01,1921.77
2024-07-01,1921.03
2024-08-01,1926.83


### GMAF

In [85]:
GMAF = pd.read_excel('Data_36544086.xlsx', sheet_name='GMAF',
                    header=None,
                    skiprows=227,
                    usecols=[0,2],
                    names=['Time', 'Passenger Count'], 
                    parse_dates=True).squeeze()

In [86]:
# Reset the index to access the 'Time' column
#GMAF = GMAF.reset_index()

# Convert the 'Time' column to datetime format
GMAF['Time'] = pd.to_datetime(GMAF['Time'], format='%Y %b')

# Format the dates to 'YYYY-MM'
GMAF['Time'] = GMAF['Time'].dt.strftime('%Y-%m')

GMAF['Time'] = pd.to_datetime(GMAF['Time'])

# Set 'Time' column back as the index
GMAF = GMAF.set_index('Time')

In [87]:
GMAF['Passenger Count'] = GMAF['Passenger Count'].astype(int)

In [88]:
GMAF.describe()

Unnamed: 0,Passenger Count
count,528.0
mean,4259.448864
std,2304.916442
min,150.0
25%,2475.5
50%,4033.0
75%,5793.25
max,11628.0


In [89]:
GMAF.isna().sum()

Passenger Count    0
dtype: int64

### ET12

In [90]:
ET12 = pd.read_excel('Data_36544086.xlsx', sheet_name='ET12',
                                    header=None,
                                    skiprows= 6,
                                    usecols=[0,1],
                                    names=['Time', 'Unadjusted Total'], parse_dates=True).squeeze()

In [91]:
ET12 = ET12.set_index('Time')

In [92]:
ET12['Unadjusted Total'] = ET12['Unadjusted Total'].round(2)

In [93]:
# Reset the index to access the 'Time' column
ET12 = ET12.reset_index()

# Clean the 'Time' column to remove unwanted characters or spaces
ET12['Time'] = ET12['Time'].str.replace(r'\[.*\]', '', regex=True).str.strip()

# Convert the 'Time' column to datetime format
ET12['Time'] = pd.to_datetime(ET12['Time'], format='%B %Y')

# Format the dates to 'YYYY-MM'
ET12['Time'] = ET12['Time'].dt.strftime('%Y-%m')

# Set 'Time' column back as the index
ET12['Time'] = pd.to_datetime(ET12['Time'])
ET12 = ET12.set_index('Time')

In [94]:
ET12.describe()

Unnamed: 0,Unadjusted Total
count,360.0
mean,17.369639
std,3.374263
min,10.02
25%,14.89
50%,16.9
75%,19.735
max,25.02


In [95]:
ET12.isna().sum()

Unadjusted Total    0
dtype: int64

### Calendar Adjustment

In [96]:
variables = [MSTA_1950_onwards, CH4, GMAF, ET12]

In [97]:
#Calendar adjustment
# Create empty column for days in the month
def calendar_adjustment(df):
    df["Days"] = np.nan
    # Fill empty columns with the days per calendar month
    for date in df.index:
        df.loc[date, "Days"] = calendar.monthrange(date.year, date.month)[1]
    # Perform calendar adjustment
    df["Adjusted Data"] = (df.iloc[:, 0] * 365.25 / (12 * df["Days"])).round(2)


#for i in variables:
for i in [MSTA_1950_onwards, GMAF, ET12]:
    calendar_adjustment(i)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Days"] = np.nan
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Adjusted Data"] = (df.iloc[:, 0] * 365.25 / (12 * df["Days"])).round(2)


In [98]:
# CH4 = CH4.drop(columns=['Days', 'NOAA CH4 (ppb)'])
MSTA_1950_onwards = MSTA_1950_onwards.drop(columns=['Days', 'Anomaly (deg C)'])
GMAF = GMAF.drop(columns=['Days', 'Passenger Count'])
ET12 = ET12.drop(columns=['Days', 'Unadjusted Total'])

In [99]:
def rename_columns(df, df_name):
    df = df.rename(columns={'Adjusted Data': df_name})
    return df

In [100]:
# CH4 = rename_columns(CH4, 'CH4 (adjusted)')
MSTA_1950_onwards = rename_columns(MSTA_1950_onwards, 'Anomaly (adjusted deg C)')#
GMAF = rename_columns(GMAF, 'Passenger Count (adjusted)')
ET12 = rename_columns(ET12, 'Total ET12 (adjusted)')

In [101]:
CH4

Unnamed: 0_level_0,NOAA CH4 (ppb)
Time,Unnamed: 1_level_1
1983-07-01,1625.95
1983-08-01,1628.05
1983-09-01,1638.44
1983-10-01,1644.80
1983-11-01,1642.59
...,...
2024-05-01,1926.36
2024-06-01,1921.77
2024-07-01,1921.03
2024-08-01,1926.83


In [102]:
# Add frequency to the index
def frequency_add(i):
    i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

for i in variables:
    frequency_add(i)

In [103]:
# Set the name attribute for each DataFrame
variables[0].name = 'MSTA_1950_onwards'
variables[1].name = 'CH4'
variables[2].name = 'GMAF'
variables[3].name = 'ET12'

### Log Transformation


In [104]:
#Log transformation funciton
def log_transform(df):
    df['Log Data'] = np.log(df.iloc[:, 0])
    return df

log_transform(CH4)
log_transform(ET12)
log_transform(GMAF)

Unnamed: 0_level_0,Passenger Count (adjusted),Log Data
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
1980-01-01,808.07,6.694649
1980-02-01,818.66,6.707669
1980-03-01,1038.80,6.945821
1980-04-01,1417.37,7.256558
1980-05-01,1411.91,7.252699
...,...,...
2023-08-01,10037.50,9.214083
2023-09-01,9615.21,9.171101
2023-10-01,8339.88,9.028804
2023-11-01,5794.29,8.664628


In [105]:
#log timeplot
for i in [CH4, ET12, GMAF]:
    plt.plot(i['Log Data'], label='Log Data')
    plt.legend()
    plt.show()

# Exponential Smoothing

In [106]:
# Exponential Smoothing
from statsmodels.tsa.api import ExponentialSmoothing, Holt

# Forecasting Error
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error
from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

### Common Functions

In [107]:
#Holt-Winters Forecasting for annual seasonality
def holt_winters_M_forecast_annual(i, predict_periods):
    # Ensure the datetime index has a frequency set
    i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

    fit2 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='mul').fit()
    fit4 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='mul', damped_trend=True).fit()

    forecast2 = fit2.forecast(predict_periods).rename('HW-multiplicative seasonality')
    forecast4 = fit4.forecast(predict_periods).rename('Damped HW-multiplicative seasonality')

    return fit2, fit4, forecast2, forecast4

In [108]:
#Holt-Winters Forecasting for annual seasonality
def holt_winters_A_forecast_annual(i, predict_periods):
    # Ensure the datetime index has a frequency set
    i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

    fit1 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='add').fit(optimized = True)
    fit3 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='add', damped_trend=True).fit()

    forecast1 = fit1.forecast(predict_periods).rename('HW-additive seasonality')
    forecast3 = fit3.forecast(predict_periods).rename('Damped HW-additive seasonality')

    return fit1, fit3, forecast1, forecast3

In [109]:
#HW-M plot
def holt_winters_M_forecast_plot(i, predict_periods):
    # Ensure the datetime index has a frequency set
    i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

    fit2 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='mul').fit()
    fit4 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='mul', damped_trend=True).fit()

    forecast2 = fit2.forecast(predict_periods).rename('Model 1: HW-multiplicative seasonality')
    forecast4 = fit4.forecast(predict_periods).rename('Model 2: Damped HW-multiplicative seasonality')

    # Create figure
    fig, ax = plt.subplots(1, 1, figsize=(15, 10))

    i['Log Data'].rename('Time plot of original').plot(color='black', legend=True, ax=ax)

    fit2.fittedvalues.plot(color='blue', ax=ax)
    forecast2.plot(color='blue', legend=True, ax=ax)

    fit4.fittedvalues.plot(color='yellow', ax=ax)
    forecast4.plot(color='yellow', legend=True, ax=ax)

    plt.xlabel('Dates')
    plt.ylabel('Values')
    plt.title('HW-M method-based forecasts')
    plt.legend(fontsize = 20)
    plt.tight_layout()
    plt.show()

In [110]:
#HW-A plot
def holt_winters_A_forecast_plot(i, predict_periods):
    # Ensure the datetime index has a frequency set
    i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

    fit1 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='add').fit(optimized = True)
    fit3 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='add', damped_trend=True).fit()


    # Forecast the next periods
    forecast1 = fit1.forecast(predict_periods).rename('Model 1: HW-additive seasonality')
    forecast3 = fit3.forecast(predict_periods).rename('Model 2: Damped HW-additive seasonality')

    # Create figure
    fig, ax = plt.subplots(1, 1, figsize=(15, 10))

    i['Log Data'].rename('Time plot of original').plot(color='black', legend=True, ax=ax)

    fit1.fittedvalues.plot(color='red', ax=ax)
    forecast1.plot(color='red', legend=True, ax=ax)


    fit3.fittedvalues.plot(color='green', ax=ax)
    forecast3.plot(color='green', legend=True, ax=ax)


    plt.xlabel('Dates')
    plt.ylabel('Values')
    plt.title('HW-A method-based forecasts')
    plt.legend(fontsize = 20)

    plt.tight_layout()
    plt.show()

In [111]:
#Back transformation and merge (for log transformed data):
def back_transformation_merge(forecast2,forecast4):
    forecast2_backtransformed = round(np.exp(forecast2),2)
    forecast4_backtransformed = round(np.exp(forecast4),2)
    forecast_df = pd.concat([forecast2_backtransformed,  forecast4_backtransformed], axis=1)
    
    return forecast_df

In [112]:
#ME score:
def ME_results(i, model):
    i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)


    fit2 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal= model).fit()
    fit4 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal= model, damped_trend=True).fit()

    def mean_error(y_true, y_pred):
        return np.mean(y_true - y_pred)

    ME2 = mean_error(i['Log Data'], fit2.fittedvalues)
    ME4 = mean_error(i['Log Data'], fit4.fittedvalues)

        # Printing the parameters and errors for each scenario
    results = pd.DataFrame(index=[r"alpha", r"beta", r"gamma", r"l0", "b0", "ME"])
    params = ['smoothing_level', 'smoothing_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
    results["HW model 1"] = [fit2.params[p] for p in params] + [ME2]
    results["HW model 2"] = [fit4.params[p] for p in params] + [ME4]
    print(results)   

In [113]:
# MSE Score
def MSE_results(i, model):
        # Ensure the datetime index has a frequency set
        i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

        fit2 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend= 'add', seasonal= model).fit()
        fit4 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend= 'add', seasonal= model, damped_trend=True).fit()


        MSE2 = mean_squared_error(i['Log Data'], fit2.fittedvalues)
        MSE4 = mean_squared_error(i['Log Data'], fit4.fittedvalues)

        # Printing the parameters and errors for each scenario
        results = pd.DataFrame(index=[r"alpha", r"beta", r"gamma", r"l0", "b0", "MSE"])
        params = ['smoothing_level', 'smoothing_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
        results["HW model 1"] = [fit2.params[p] for p in params] + [MSE2]
        results["HW model 2"] = [fit4.params[p] for p in params] + [MSE4]
        print(results)

In [114]:
#MAE score
def MAE_results(i, model):
        # Ensure the datetime index has a frequency set
        i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

        fit2 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal= model).fit()
        fit4 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal= model, damped_trend=True).fit()


        MAE2 = mean_absolute_error(i['Log Data'], fit2.fittedvalues)
        MAE4 = mean_absolute_error(i['Log Data'], fit4.fittedvalues)

        # Printing the parameters and errors for each scenario
        results = pd.DataFrame(index=[r"alpha", r"beta", r"gamma", r"l0", "b0", "MAE"])
        params = ['smoothing_level', 'smoothing_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
        results["HW model 1"] = [fit2.params[p] for p in params] + [MAE2]
        results["HW model 2"] = [fit4.params[p] for p in params] + [MAE4]
        print(results)

In [115]:
#MAPE score
def MAPE_results(i, model):
        # Ensure the datetime index has a frequency set
        i.index = pd.DatetimeIndex(i.index.values, freq=i.index.inferred_freq)

        fit2 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='mul').fit()
        fit4 = ExponentialSmoothing(i['Log Data'], seasonal_periods=12, trend='add', seasonal='mul', damped_trend=True).fit()

        MAPE2 = mean_absolute_percentage_error(i['Log Data'], fit2.fittedvalues)
        MAPE4 = mean_absolute_percentage_error(i['Log Data'], fit4.fittedvalues)

        # Printing the parameters and errors for each scenario
        results = pd.DataFrame(index=[r"alpha", r"beta", r"gamma", r"l0", "b0", "MAPE"])
        params = ['smoothing_level', 'smoothing_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
        results["HW model 2"] = [fit2.params[p] for p in params] + [MAPE2]
        results["HW model 4"] = [fit4.params[p] for p in params] + [MAPE4]
        print(results)

### MSTA 

In [116]:
#Holt Linear Forecasting

MSTA_1950_onwards.index = pd.DatetimeIndex(MSTA_1950_onwards.index.values, freq=MSTA_1950_onwards.index.inferred_freq)
predict_periods = 12
# Fit the models without specifying initial parameters
fit1 = Holt(MSTA_1950_onwards.iloc[:, 0]).fit(optimized=True)
# Holt's method with additive trend and seasonal component, damped trend
fit3 = Holt(MSTA_1950_onwards.iloc[:, 0], damped_trend=True).fit(optimized=True)

# Create figure
fig, ax = plt.subplots(1, 1, figsize=(15, 10))

MSTA_1950_onwards.iloc[:, 0].rename('Time plot of original').plot(color='black', legend=True, ax=ax)

fit1.fittedvalues.plot(color='red', ax=ax)
fit1.forecast(predict_periods).rename("Model 1: Holt's linear trend").plot(color='red', legend=True, ax=ax)

fit3.fittedvalues.plot(color='green', ax=ax)
fit3.forecast(predict_periods).rename("Model 2: Damped Holt's linear trend").plot(color='green', legend=True, ax=ax)

plt.xlabel('Dates')
plt.ylabel('Values')
plt.title('Holt Linear Forecasting for MSTA', fontsize = 20, fontweight = 'bold')
plt.legend(fontsize = 20)
plt.tight_layout()
plt.show()

In [117]:
# Forecast the next periods
forecast1 = fit1.forecast(predict_periods).rename("Holt's linear trend")
forecast3 = fit3.forecast(predict_periods).rename("Damped Holt's linear trend")

MSTA_forecast = pd.concat([forecast1,  forecast3], axis=1)
MSTA_forecast


Unnamed: 0,Holt's linear trend,Damped Holt's linear trend
2025-01-01,1.140734,1.13744
2025-02-01,1.145052,1.13744
2025-03-01,1.14937,1.13744
2025-04-01,1.153688,1.13744
2025-05-01,1.158006,1.13744
2025-06-01,1.162323,1.13744
2025-07-01,1.166641,1.13744
2025-08-01,1.170959,1.13744
2025-09-01,1.175277,1.13744
2025-10-01,1.179595,1.13744


In [118]:
#MSE for MSTA
MSTA_1950_onwards.index = pd.DatetimeIndex(MSTA_1950_onwards.index.values, freq=MSTA_1950_onwards.index.inferred_freq)


fit1 = Holt(MSTA_1950_onwards.iloc[:, 0]).fit(optimized=True)
fit3 = Holt(MSTA_1950_onwards.iloc[:, 0], damped_trend=True).fit(optimized=True)

MSE1 = mean_squared_error(MSTA_1950_onwards.iloc[:, 0], fit1.fittedvalues)
MSE3 = mean_squared_error(MSTA_1950_onwards.iloc[:, 0], fit3.fittedvalues)

results = pd.DataFrame(index=[r"alpha", r"beta", r"gamma", r"l0", "b0", "MSE"])
params = ['smoothing_level', 'smoothing_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
results["Holt's linear trend"] = [fit1.params[p] for p in params] + [MSE1]
results["Damped Holt's linear trend"] = [fit3.params[p] for p in params] + [MSE3]
print(results)

       Holt's linear trend  Damped Holt's linear trend
alpha             0.620077                5.633594e-01
beta              0.046042                1.254498e-15
gamma                  NaN                         NaN
l0               -0.290000               -2.900000e-01
b0               -0.110000               -1.100000e-01
MSE               0.011956                1.113280e-02


In [119]:
#MAE for MSTA
MSTA_1950_onwards.index = pd.DatetimeIndex(MSTA_1950_onwards.index.values, freq=MSTA_1950_onwards.index.inferred_freq)


fit1 = Holt(MSTA_1950_onwards.iloc[:, 0]).fit(optimized=True)
fit3 = Holt(MSTA_1950_onwards.iloc[:, 0], damped_trend=True).fit(optimized=True)

MAE1 = mean_absolute_error(MSTA_1950_onwards.iloc[:, 0], fit1.fittedvalues)
MAE3 = mean_absolute_error(MSTA_1950_onwards.iloc[:, 0], fit3.fittedvalues)

results = pd.DataFrame(index=[r"alpha", r"beta", r"gamma", r"l0", "b0", "MAE"])
params = ['smoothing_level', 'smoothing_trend', 'smoothing_seasonal', 'initial_level', 'initial_trend']
results["Holt's linear trend"] = [fit1.params[p] for p in params] + [MAE1]
results["Damped Holt's linear trend"] = [fit3.params[p] for p in params] + [MAE3]
print(results)

       Holt's linear trend  Damped Holt's linear trend
alpha             0.620077                5.633594e-01
beta              0.046042                1.254498e-15
gamma                  NaN                         NaN
l0               -0.290000               -2.900000e-01
b0               -0.110000               -1.100000e-01
MAE               0.085863                8.228074e-02


In [120]:
#AIC and BIC of model 2
aic = fit3.aic
bic = fit3.bic

# Print the results
print("AIC:", aic)
print("BIC:", bic)

AIC: -4038.0733174231777
BIC: -4014.061343606556


In [121]:
#Checking Residuals behaviour

residuals = MSTA_1950_onwards.iloc[:, 0] - fit3.fittedvalues

residuals_flattened = residuals.values.flatten()

autocorrelation_plot(residuals_flattened)
plot_acf(residuals_flattened, lags=20)  
plt.title('ACF of the Model 2 Residuals', fontsize = 20, fontweight = "bold")
plt.show()

### CH4

In [122]:
fit2, fit4, forecast2, forecast4 = holt_winters_A_forecast_annual(CH4, 15)
CH4_forecast = back_transformation_merge(forecast2, forecast4)
CH4_forecast

Unnamed: 0,HW-additive seasonality,Damped HW-additive seasonality
2024-10-01,1941.5,1941.22
2024-11-01,1943.26,1942.74
2024-12-01,1943.01,1942.26
2025-01-01,1941.56,1940.59
2025-02-01,1941.56,1940.37
2025-03-01,1943.45,1942.02
2025-04-01,1944.59,1942.91
2025-05-01,1942.93,1941.0
2025-06-01,1939.26,1937.09
2025-07-01,1937.28,1934.85


In [123]:
holt_winters_A_forecast_plot(CH4,15)

In [124]:
ME_results(CH4, "add")

       HW model 1  HW model 2
alpha    0.889084    0.889501
beta     0.032915    0.033035
gamma    0.110915    0.110497
l0       7.398303    7.399062
b0       0.000597    0.000776
ME      -0.000004    0.000064


In [125]:
MSE_results(CH4, "add")

         HW model 1    HW model 2
alpha  8.890841e-01  8.895012e-01
beta   3.291542e-02  3.303478e-02
gamma  1.109152e-01  1.104965e-01
l0     7.398303e+00  7.399062e+00
b0     5.971599e-04  7.764085e-04
MSE    4.842070e-07  4.892355e-07


In [126]:
MAE_results(CH4, "add")

       HW model 1  HW model 2
alpha    0.889084    0.889501
beta     0.032915    0.033035
gamma    0.110915    0.110497
l0       7.398303    7.399062
b0       0.000597    0.000776
MAE      0.000541    0.000547


In [127]:
# Residuals behaviour
residualsCH4 = CH4.iloc[:, 0] - np.exp(fit2.fittedvalues)

residualsCH4_flattened = residualsCH4.values.flatten()

autocorrelation_plot(residualsCH4_flattened)
plot_acf(residualsCH4_flattened, lags=100)
plt.title("ACF of the CH4 Model 1 Residuals", fontsize = 20, fontweight = "bold")
plt.show()

### GMAF

In [128]:
holt_winters_M_forecast_plot(GMAF, 24)

In [129]:
fit2, fit4, forecast2, forecast4 = holt_winters_M_forecast_annual(GMAF, 24)
GMAF_forecast = back_transformation_merge(forecast2, forecast4)
GMAF_forecast

Unnamed: 0,HW-multiplicative seasonality,Damped HW-multiplicative seasonality
2024-01-01,5490.98,5471.13
2024-02-01,5854.74,5804.05
2024-03-01,6376.79,6291.6
2024-04-01,7841.31,7700.31
2024-05-01,7849.3,7675.94
2024-06-01,10162.06,9892.71
2024-07-01,10453.68,10137.84
2024-08-01,13985.51,13503.1
2024-09-01,12449.13,11983.4
2024-10-01,9948.6,9552.45


In [130]:
MSE_results(GMAF,'mul')

         HW model 1    HW model 2
alpha  1.000000e+00  1.000000e+00
beta   1.282217e-12  0.000000e+00
gamma  1.369026e-08  7.159591e-09
l0     6.263109e+00  6.051598e+00
b0     3.534649e-03  1.050869e-01
MSE    3.834569e-02  3.833131e-02


In [131]:
MAE_results(GMAF,"mul")

         HW model 1    HW model 2
alpha  1.000000e+00  1.000000e+00
beta   1.282217e-12  0.000000e+00
gamma  1.369026e-08  7.159591e-09
l0     6.263109e+00  6.051598e+00
b0     3.534649e-03  1.050869e-01
MAE    9.241076e-02  9.226575e-02


In [132]:
MAPE_results(GMAF,"mul")

         HW model 2    HW model 4
alpha  1.000000e+00  1.000000e+00
beta   1.282217e-12  0.000000e+00
gamma  1.369026e-08  7.159591e-09
l0     6.263109e+00  6.051598e+00
b0     3.534649e-03  1.050869e-01
MAPE   1.239660e-02  1.238172e-02


In [133]:
# Residuals behaviour
residualsGMAF = GMAF['Log Data'] - fit4.fittedvalues
residualsGMAF_flattened = residualsGMAF.values.flatten()

autocorrelation_plot(residualsGMAF_flattened)
plot_acf(residualsCH4_flattened, lags=20) 
plt.title('ACF of the GMAF Model 2 Residuals', fontsize = 20, fontweight = "bold")
plt.show()

### ET12

In [134]:
holt_winters_M_forecast_plot(ET12, 12)

In [135]:
fit2, fit4,forecast2, forecast4 = holt_winters_M_forecast_annual(ET12, 12)
ET12_forecast = back_transformation_merge(forecast2, forecast4)
ET12_forecast

Unnamed: 0,HW-multiplicative seasonality,Damped HW-multiplicative seasonality
2025-01-01,16.04,16.1
2025-02-01,16.21,16.27
2025-03-01,15.15,15.21
2025-04-01,13.54,13.61
2025-05-01,12.12,12.19
2025-06-01,11.68,11.75
2025-07-01,11.15,11.23
2025-08-01,11.03,11.12
2025-09-01,12.04,12.14
2025-10-01,13.1,13.23


In [136]:
MSE_results(ET12,"mul")

       HW model 1    HW model 2
alpha    0.292059  2.996873e-01
beta     0.000000  3.527778e-13
gamma    0.000000  4.181360e-12
l0       2.883013  2.889285e+00
b0      -0.000781  1.076465e-02
MSE      0.001897  1.896529e-03


In [137]:
MAE_results(ET12, "mul")

       HW model 1    HW model 2
alpha    0.292059  2.996873e-01
beta     0.000000  3.527778e-13
gamma    0.000000  4.181360e-12
l0       2.883013  2.889285e+00
b0      -0.000781  1.076465e-02
MAE      0.031571  3.170332e-02


In [138]:
MAPE_results(ET12, "mul")

       HW model 2    HW model 4
alpha    0.292059  2.996873e-01
beta     0.000000  3.527778e-13
gamma    0.000000  4.181360e-12
l0       2.883013  2.889285e+00
b0      -0.000781  1.076465e-02
MAPE     0.011180  1.123794e-02


In [139]:
# Residuals behaviour
residualsET12 = ET12['Log Data'] - fit2.fittedvalues
residualsET12_flattened = residualsET12.values.flatten()
autocorrelation_plot(residualsET12_flattened)
plot_acf(residualsCH4_flattened, lags=20)  # You can adjust the number of lags as needed
plt.title('ACF of the ET12 Model 1 Residuals', fontsize = 20, fontweight = "bold")
plt.show()