In [1]:
#Loading the necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
#Importing the quartely divided dataset
quarter_dom_ES = pd.read_csv('C:\\Users\\User\\Desktop\\Research_code\\quarter_data_domestic.csv')
quarter_dom_ES.head(10)

Unnamed: 0,Year,DomesticDebt_adjust
0,1990-03-31,14337.301563
1,1990-06-30,14002.819688
2,1990-09-30,14331.459062
3,1990-12-31,15323.219687
4,1991-03-31,16978.101563
5,1991-06-30,15833.919688
6,1991-09-30,15352.859062
7,1991-12-31,15534.919687
8,1992-03-31,16380.101563
9,1992-06-30,17557.744688


In [3]:
#Checking for null values
quarter_dom_ES.isnull().sum()

Year                   0
DomesticDebt_adjust    0
dtype: int64

* As per the results above there are no null values in the decomposed dataset.

In [4]:
#Checking for duplicates
quarter_dom_ES.duplicated().sum()

0

* As shown above there are no duplicate values.

In [5]:
quarter_dom_ES.shape

(125, 2)

In [6]:
quarter_dom_ES.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 2 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Year                 125 non-null    object 
 1   DomesticDebt_adjust  125 non-null    float64
dtypes: float64(1), object(1)
memory usage: 2.1+ KB


In [7]:
quarter_dom_ES=quarter_dom_ES.set_index('Year ')
is_year_index = 'Year ' in quarter_dom_ES.index.names
print(is_year_index)

True


In [8]:
train, test = train_test_split(quarter_dom_ES, train_size = 0.8, random_state = 42) 

In [9]:
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.fit_transform(test)

In [10]:
#Creating the exponential smoothing model
model = ExponentialSmoothing(train_scaled, trend='add', seasonal='add', seasonal_periods=12)

In [11]:
#Fitting the model
model_fit = model.fit()

# Split the data into training and testing sets
train_data, test_data = train_test_split(quarter_dom_ES['DomesticDebt_adjust'], train_size=0.8)

In [12]:
#Making predcitions to check the model accuracy
forecast = model.predict(start=len(train_scaled), end=len(train_scaled)+len(test_scaled)-1,params=model_fit.params)
forecast_original = scaler.inverse_transform([forecast])
forecast_original
#Making predictions from the ES model to check model accuracy
#preds = model_fit.forecast(71)
#preds

array([[294437.91727491, 255242.14210986, 222630.28121432,
        407332.78836486, 392110.06243457, 369741.61448373,
        269208.00630234, 172828.43307475, 875401.09590688,
        300514.06447369, 245674.41431655, 343503.38589844,
        310918.23159004, 271722.456425  , 239110.59552945,
        423813.10267999, 408590.3767497 , 386221.92879886,
        285688.32061748, 189308.74738988, 891881.41022201,
        316994.37878882, 262154.72863169, 359983.70021358,
        327398.54590518]])

In [13]:
test_scaled.shape

(25, 1)

In [14]:
forecast.shape

(25,)

In [15]:
# Calculating mean absolute error (MAE)
mae = mean_absolute_error(test_scaled, forecast)

# Calculating mean squared error (MSE)
mse = mean_squared_error(test_scaled, forecast)

# Calculating root mean squared error (RMSE)
rmse = np.sqrt(mse)

# Calculating mean absolute percentage error (MAPE)
mape = np.mean(np.abs((test_scaled - forecast) / test_scaled)) * 100

# Printing the computed metrics
print("MAE:", mae)
print("MSE:", mse)
print("RMSE:", rmse)
print("MAPE:", mape)

MAE: 0.789574203640948
MSE: 1.3183414019908044
RMSE: 1.1481904902893092
MAPE: 155.81500329194202


In [16]:
#Making predictions from the ES model for the next 8 years
start_date = '2020'
end_date = '2028'
#date_range = pd.date_range(start=start_date, end=end_date, freq='Q')
date_range = pd.date_range(start=start_date, end=end_date, freq='Y')
forecast_mod = model.predict(start=len(train_data)+len(test_data), end=len(train_data)+len(test_data)+len(date_range)-1,params=model_fit.params)
forecast_index = pd.DatetimeIndex(date_range)
forecast_df = pd.DataFrame(forecast_mod, index=forecast_index, columns=['DomesticDebt_adjust'])
forecast_df

NameError: name 'train_data' is not defined

In [None]:
actual_values = quarter_dom_ES['DomesticDebt_adjust']

# Plotting the actual values and predicted values

#plt.figure(figsize=(20, 6)) 
plt.plot(actual_values, label='Actual')
plt.plot(forecast, label='Predicted')

#plt.plot(quarter_dom_ES, label='Actual')
#plt.plot(forecast, label='Predicted')

plt.xlabel('Time')
plt.ylabel('Value')
plt.title('Actual vs Predicted Values')
plt.legend()

plt.show()