### 1. LOAD NECESSARY LIBRARY 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
cpi = 'CPI.csv' # Consumer price index
eci = 'ECI.csv' #  Employment cost index
csi = 'CONSUMER-SENTIMENT-INDEX.csv' # Consumer sentiment index
gpd = 'GPD-PRICE-DEFLATOR.csv' # Price deflator
ipd = 'IMPORT-PRICE-INDEX.csv' # Import price index
op = 'OIL-PRICE.csv' # WPI oil prices
pce = 'PCE.csv' # Personal consumption expenditures
ppi = 'PPI.csv' # Producer price index
wg = 'WAGE-GROWTH.csv' # Wage growth
# unrate = 'UNRATE.csv' # Unemploment rate

In [3]:
df_cpi = pd.read_csv(cpi, index_col='DATE', parse_dates=True)
df_eci = pd.read_csv(eci, index_col='DATE', parse_dates=True)
df_csi = pd.read_csv(csi)
df_gpd = pd.read_csv(ipd, index_col='DATE', parse_dates=True)
df_op = pd.read_csv(op, index_col='DATE', parse_dates=True)
df_pce = pd.read_csv(pce, index_col='DATE', parse_dates=True)
df_ppi = pd.read_csv(ppi, index_col='DATE', parse_dates=True)
df_wg = pd.read_csv(wg, index_col='DATE', parse_dates=True)
# df_unemply = pd.read_csv(unrate, index_col='DATE', parse_dates=True)

### 2. DATA EXPLORATION

In [4]:
def check_outliers(df, col):
    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)
    IQR = Q3 - Q1
    return df[(df[col] > Q3 + 1.5 * IQR) | (df[col] < Q1 - 1.5 * (IQR))]

In [5]:
df_unemply

NameError: name 'df_unemply' is not defined

In [None]:
dfs = [df_eci, df_gpd, df_op, df_pce, df_ppi, df_wg, df_csi, df_cpi]

In [None]:
df

In [None]:
for df in dfs:
    for column in df.columns:
        df[column].replace('.', np.nan, inplace=True)

In [None]:
for df in dfs:
    for column in df.columns:
        print(column.upper())
        print(df[column].isnull().sum())
        print('*' * 130)
        print()

In [None]:
df_gpd.dropna(inplace=True)

In [None]:
df_wg.dropna(inplace=True)

In [None]:
df_csi.drop('Table 1: The Index of Consumer Sentiment', axis = 1, inplace=True)

In [None]:
df_csi

In [None]:
df_csi.reset_index(inplace=True)

In [None]:
df_csi

In [None]:
# Combine the columns into a single datetime column
df_csi['Date'] = pd.to_datetime(df_csi['level_1'] + '-' + df_csi['level_0'] + '-01', errors='coerce')

In [None]:
df_csi.dropna(inplace=True)

In [None]:
df_csi.drop(['level_0', 'level_1'], axis = 1, inplace=True)

In [None]:
df_csi.columns = ['csi', 'DATE']

In [None]:
df_csi = df_csi[['DATE', 'csi']]

In [None]:
df_csi

In [None]:
df_csi.set_index(df_csi['DATE'], inplace=True)

In [None]:
df = df_eci.join(df_gpd).join(df_op).join(df_pce).join(df_ppi).join(df_wg).join(df_csi).join(df_cpi)

In [None]:
df

In [None]:
df.drop('DATE', axis=1, inplace=True)

In [None]:
df.columns = ['eci', 'price_deflator', 'oil_price', 'pce', 'ppi', 'unemployment_rate', 'wage_growth', 'csi', 'cpi']

### 2. STATISTICAL ANALYSIS AND DATA CLEANING AND EXPLORATION

In [None]:
df.head()

In [None]:
df.dtypes

In [None]:
for column in df.columns:
    if df[column].dtype == 'object':
        df[column] = df[column].astype('float64')

In [None]:
for column in df.columns:
    print(column.upper())
    print(check_outliers(df, column)[column])
    print(len(check_outliers(df, column)))
    print('*' * 150)
    print()

In [None]:
check_outliers(df, 'unemployment_rate')

In [None]:
check_outliers(df, 'ppi')

In [None]:
check_outliers(df, 'wage_growth')

In [None]:
df.columns

In [None]:
for column in df.columns:
    plt.figure(figsize=(16, 8))
    sns.histplot(df[column])
    plt.plot()

In [None]:
from scipy.stats import skew, kurtosis

In [None]:
# check skewness and kurtosis
from scipy.stats import skew, kurtosis

def check_skewedness(df, col):
    print(col.upper())
    print(skew(df[col]))
    if (skew(df[col]) < 0.5) and (skew(df[col]) > -0.5):
        print('Normal distribution.')
    elif skew(df[col]) > 0.5:
        print('Right skewed detected!')
    else:
        print('Left skewed detected!')
    print('*' * 100)

def check_kurtosis(df, col):
    print(col.upper())
    print(kurtosis(df[col]))
    if kurtosis(df[col]) == 3:
        print('Normal distribution.')
    else:
        print('Possible outliers detected! XXXXXXXXXXXXXXXXX')

In [None]:
# check all the skewedness value and kurtosis values:
for column in df.columns:
    check_skewedness(df, column)
    print()
    check_kurtosis(df, column)

In [None]:
for column in df.columns:
    plt.figure(figsize=(16, 8))
    sns.boxplot(df[column])
    plt.plot()

### 3. DIVIDE THE DF INTO ONE QUATERLY DATAFRAME AND ONE YEARLY DATAFRAME AND NORMALIZATION

In [None]:
df.head(10)

In [None]:
df_yearly = df.resample('Y').mean()

In [None]:
df_yearly.plot(figsize=(16, 8))

In [None]:
df_yearly[['eci', 'price_deflator', 'oil_price', 'ppi', 'unemployment_rate', 'wage_growth', 'csi', 'cpi']].plot()

In [None]:
df_yearly.corr()

In [None]:
cpi_line = df_yearly.cpi

In [None]:
for column in df_yearly.columns:
    plt.figure(figsize=(16, 8))
    plt.plot(df_yearly[column], label=column.upper())
    plt.plot(cpi_line, label='CPI')
    plt.xlabel('TIME')
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
for column in df_yearly.columns:
    plt.figure(figsize=(16, 8))
    sns.scatterplot(data=df_yearly, x = df_yearly[column], y = cpi_line, legend=True, s = 100, palette={'A':'red', 'B': 'blue'})
    plt.title(f"{column.upper()} vs. CPI", fontsize=16)
    plt.tight_layout()
    plt.show()

In [None]:
for column in df_yearly.columns:
    plt.figure(figsize=(16, 8))
    sns.regplot(data=df_yearly, x = df_yearly[column], y = csi_line)
    plt.tight_layout()
    plt.show()

In [None]:
df_yearly.corr()

In [None]:
from scipy.stats import pearsonr

In [None]:
def find_pearsonr(df, col1, col2):
    result = pearsonr(df[col1], df[col2])
    print(f"The correlation coef between {col1.upper()} and {col2.upper()} is {result[0]}")
    print(f"The p-value between the {col1.upper()} and {col2.upper()} is {result[1]}")
    if result[1] > 0.05:
        print('It is too big. You cannot reject the null hypothesis! Therefore, no correlation!')
    else:
        print('There is indeed correlation between the two since p-value is too small.')
    print('*' * 100)
    print()

In [None]:
for column in df_yearly.columns:
    find_pearsonr(df_yearly, column, 'cpi')

### CAN WE PREDICT WHEN INFLATION STARTS AND ENDS?

In [None]:
df_yearly_2020 = df_yearly[df_yearly.index.year >= 2020]

In [None]:
df_yearly_2020

In [None]:
df_yearly_2020['inflation'] = df_yearly_2020['cpi'].diff() / df_yearly_2020['cpi'].shift(1) * 100

In [None]:
df_yearly_2020

In [None]:
def check_inflation(df, col):
    return df[df[col] > 2.4]

inflation_periods = df_yearly_2020(df_yearly_2020, 'inflation')['inflation']

start_1_index = inflation_periods.index[0]
start_1 = inflation_periods[0]

start_2_index = inflation_periods.index[1]
start_2 = inflation_periods[1]

start_3_index = inflation_periods.index[2]
start_3 = inflation_periods[2]

plt.figure(figsize=(16, 8))
plt.plot(df_yearly_2020['inflation'], marker='o', linestyle='--')

plt.annotate(text='inflationary period', xy=(start_1_index, start_1), arrowprops=dict(facecolor='black', arrowstyle='->'))
plt.scatter(start_1_index, start_1, color='red', marker='o', s= 100)

plt.annotate(text='inflationary period', xy=(start_2_index, start_2), arrowprops=dict(facecolor='black', arrowstyle='->'))
plt.scatter(start_2_index, start_2, color='red', marker='o', s=100)

plt.annotate(text='inflationary period', xy=(start_3_index, start_3))
plt.scatter(start_3_index, start_3, color='red', marker='o', s=100)

plt.plot()

In [None]:
def check_inflation(df, col):
    return df[df[col] > 2.4]

In [None]:
inflation_periods = df_yearly_2020(df_yearly_2020, 'inflation')['inflation']

In [None]:
start_1_index = inflation_periods.index[0]
start_1 = inflation_periods[0]

In [None]:
start_2_index = inflation_periods.index[1]
start_2 = inflation_periods[1]

In [None]:
start_3_index = inflation_periods.index[2]
start_3 = inflation_periods[2]

In [None]:
start_3_index

In [None]:
plt.figure(figsize=(16, 8))
plt.plot(df_yearly_2020['inflation'], marker='o', linestyle='--')

plt.annotate(text='inflationary period', xy=(start_1_index, start_1), arrowprops=dict(facecolor='black', arrowstyle='->'))
plt.scatter(start_1_index, start_1, color='red', marker='o', s= 100)

plt.annotate(text='inflationary period', xy=(start_2_index, start_2), arrowprops=dict(facecolor='black', arrowstyle='->'))
plt.scatter(start_2_index, start_2, color='red', marker='o', s=100)

plt.annotate(text='inflationary period', xy=(start_3_index, start_3))
plt.scatter(start_3_index, start_3, color='red', marker='o', s=100)

plt.plot()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 8))

sns.regplot(data=df_yearly_2020, x='eci', y='cpi', ax=ax)

plt.show()

In [None]:
df_yearly_2020['eci'].plot(figsize=(16, 8), label='ECI')
df_yearly_2020['cpi'].plot(label='CPI')

plt.legend()
plt.plot()

In [None]:
df_quarterly = df.resample('Q').mean()

In [None]:
df_quarterly[['price_deflator', 'oil_price', 'ppi', 'unemployment_rate', 'wage_growth', 'csi', 'cpi']].plot(figsize=(16, 8))

In [None]:
df_quarterly.corr()

In [None]:
for column in df_quarterly.columns:
    find_pearsonr(df_quarterly, column, 'cpi')

In [None]:
for column in df_quarterly.columns:
    plt.figure(figsize=(16, 8))
    plt.title(f"{column.upper()} vs. CPI", fontsize=24)
    sns.regplot(data=df_quarterly, x = df_quarterly[column], y = df_quarterly['cpi'])
    plt.tight_layout()
    plt.show()

In [None]:
df_quarterly['inflation'] = df_quarterly['cpi'].diff() / df_quarterly['cpi'].shift(1) * 100

In [None]:
df_quarterly_2020 = df_quarterly[df_quarterly.index.year >= 2020]

In [None]:
check_inflation(df_quarterly_2020, 'inflation')

In [None]:
df_quarterly_2020.iloc[:, [1,2,4,5,6,7,8,9]].plot(figsize=(16, 8))

In [None]:
df_quarterly_2020.inflation

In [None]:
plt.figure(figsize=(16, 8))
sns.regplot(y=df_quarterly_2020['inflation'], x=df_quarterly_2020['unemployment_rate'])
plt.title(f"UNEMPLOYMENT VS. INFLATION RATE", fontsize=24)
plt.ylabel('INFLATION')
plt.xlabel('UNEMPLOYMENT')
plt.tight_layout()
plt.show()

In [None]:
df_quarterly_2020.unemployment_rate.plot(figsize=(16, 8))

In [None]:
df_quarterly_2020['inflation'].rolling(window=3).mean()

In [None]:
df_quarterly_2020

In [None]:
quarters = np.array([i for i in range(len(df_quarterly_2020))])
inflation_rates = df_quarterly_2020['inflation'].values

# Calculate means
mean_quarters = np.mean(quarters)
mean_inflation = np.mean(inflation_rates)

# Calculate numerator and denominator
numerator = np.sum((quarters - mean_quarters) * (inflation_rates - mean_inflation))
denominator = np.sum((quarters - mean_quarters) ** 2)

# Calculate slope (b) and intercept (a)
b = numerator / denominator
a = mean_inflation - b * mean_quarters

# Predict inflation for the next quarter
quarter_to_predict1 = len(quarters) + 1
quarter_to_predict2 = len(quarters) + 2
quarter_to_predict3 = len(quarters) + 3
predicted_inflation1 = a + b * quarter_to_predict1
predicted_inflation2 = a + b * quarter_to_predict2
predicted_inflation3 = a + b * quarter_to_predict3

print("Predicted inflation for quarter", quarter_to_predict, ":", predicted_inflation)

In [None]:
def inflation_forecast(data, col):
    orders = np.array([i for i in range(len(data))])
    rates = data[col].values

    # calculate all means
    mean_data = np.mean(orders)
    mean_inflation = np.mean(rates)

    # calculate the numerator and denominator
    numerator = np.sum((orders - mean_data) * (rates - mean_inflation))
    denominator = np.sum((orders - mean_quarters) ** 2)

    # calculate slope b and the intercept a
    b = numerator / denominator
    a = mean_inflation - b * mean_quarters

    y_pred = a + b * (len(orders) + 1)

    return y_pred

In [None]:
df_quarterly_2020_inflation = df_quarterly_2020['inflation']

In [None]:
df_quarterly_2020['inflation']

In [None]:
predicted_value = [predicted_inflation1, predicted_inflation2, predicted_inflation3]
date = pd.date_range(start='2024-06-30', periods=3, freq='QE')

In [None]:
predicted = pd.DataFrame(
    predicted_value,
    date,
)

In [None]:
predicted.columns = ['inflation']

In [None]:
df_quarterly_2020['inflation'].plot()
predicted.plot()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 8))

sns.lineplot(data=df_quarterly_2020, x=df_quarterly_2020.index, y='inflation', ax=ax)
sns.lineplot(data=predicted, x=predicted.index, y='inflation', ax=ax)

plt.plot()

In [None]:
for column in df_quarterly_2020.columns:
    print(column.upper())
    print(inflation_forecast(df_quarterly_2020, column))
    print('*' * 100)
    print()

In [None]:
df_quarterly_2020

In [None]:
df_quarterly_2020.iloc[len(df_quarterly_2020)]

In [None]:
data_next_quarter = {
    pd.to_datetime('2024-06-30') : [167.69, 149.31, 101.35, 20202.56, 285.05, 1.46, 6.31, 59.61, 320.64]
}

new_row = pd.DataFrame(data_next_quarter).transpose()

In [None]:
eci_inflation = df_quarterly_2020['eci'][-1]

In [None]:
inflation_rate_next_quarter = (eci_inflation) + (0.84 * price_deflator_inflation) + (0.74 * oil_price_inflation) + (0.95 * pce_inflation) +(0.9 * ppi_inflation) - (0.67 * unemployment_inflation) + (0.74 * wage_growth_inflation) - (0.6 * csi_inflation)

In [None]:
new_row

In [None]:
df_quarterly_2020

In [None]:
new_row

In [None]:
(new_row.values - df_quarterly_2020.iloc[-1, [0, 1,2,3,4,5,6,7,8]].values) / df_quarterly_2020.iloc[-1, [0, 1,2,3,4,5,6,7,8]].values * 100

In [None]:
eci_inflation = 2.31
price_deflator_inflation = 7.11
oil_price_inflation = 36.95
pce_inflation = 6.17
ppi_inflation = 13.43
unemployment_inflation = -60.54
wage_growth_inflation = 32.25
csi_inflation = -24.54
cpi_inflation = 3.53

In [None]:
inflation_rate_next_quarter = (eci_inflation) + (0.84 * price_deflator_inflation) + (0.74 * oil_price_inflation) + (0.95 * pce_inflation) +(0.9 * ppi_inflation) - (0.67 * unemployment_inflation) + (0.74 * wage_growth_inflation) - (0.6 * csi_inflation) / 9

In [None]:
inflation_rate_next_quarter / 100

In [None]:
print(f'Inflation will be {np.round(inflation_rate_next_quarter / 100, 2)}')