In [None]:
from statsmodels.tsa.ar_model import AutoReg
import pandas as pd
import compass_tools as ct
import numpy as np
import pandas as pd
import os
os.chdir(r'your directory')
from pandas.tseries.offsets import DateOffset
from datetime import datetime
import matplotlib.pyplot as plt
from getpass import getpass
import snowflake.connector
import statsmodels.api as sm
# os.listdir()

In [None]:
ROLE = 'SFK_TEAM_MAC'
WAREHOUSE = 'SFK_TEAM_MAC_XS'
db_name = 'DIG_SHARE_INFARE_AIR_TRAVEL_DATA'
## Snowflake uses okta to authenticate, so input your regular password here when prompted
## NOTE: You may need to save USERNAME as an account variable. You can also hardcode your username.
conn_args = dict(
    account='xxxx',
    authenticator='https://urnetwork.com/',
    user=os.environ['USERNAME'],  # May need to update
    password=getpass(),
    warehouse=WAREHOUSE,
    role=ROLE,
)

conn = snowflake.connector.connect(**conn_args)
sql = 'select * from DIG_SHARE_INFARE_AIR_TRAVEL_DATA.PUBLIC.V_MARKET_TRENDS_MACRO_BAM'
infare_raw = pd.read_sql(sql, conn)
infare_raw = infare_raw[infare_raw['COUNTRY'] == 'United States']
infare_raw['id'] = infare_raw['DOMINT'] + '_' + infare_raw['WBD'].astype(str).apply(lambda x: x.lower())
infare = infare_raw.pivot_table(values='PRICE_SIMPLE_AVG', columns='id', index='WEEK_OBS_REFDATE')
infare = infare[['dom3', 'dom5', 'dom7', 'dom9', 'dom11']]
infare['Date'] = pd.to_datetime(infare.index)
infare.set_index(drop=True, inplace=True)
infare.tail()

In [None]:
infare.to_csv('infare_weekly.csv')  # this will save the table above into a csv

# Code 2: Copy the data from csv to infare_weekly_interpolated.xlsx and then
infare = pd.read_excel('infare_weekly_interpolated.xlsx').set_index('Unnamed: 0')

In [None]:
new_rows = []

# Iterate over the DataFrame rows
for index, row in infare.iterrows():
    # For each row, create six new rows with adjusted dates
    for i in range(1, 4):
        # Create rows for t-1, t-2, t-3
        new_row_before = row.copy()
        new_row_before['Date'] = row['Date'] - DateOffset(days=i)
        new_rows.append(new_row_before)

        # Create rows for t+1, t+2, t+3
        new_row_after = row.copy()
        new_row_after['Date'] = row['Date'] + DateOffset(days=i)
        new_rows.append(new_row_after)

# Convert the list of new rows to a DataFrame
new_rows_df = pd.DataFrame(new_rows)

# Concatenate the original DataFrame with the new rows DataFrame
expanded_df = pd.concat([infare, new_rows_df], ignore_index=True)
expanded_df['Date'] = pd.to_datetime(expanded_df['Date'])
df_final = pd.concat([infare, expanded_df]).sort_values('Date').reindex()

df = df_final.set_index('Date')
df = df.resample('M').mean()
df['pct_change'] = df.pct_change()*100
df_mom = df.pct_change()
df_mom_shift = df_mom.shift()
df_mom_prior_mom = df_mom*0.4 + df_mom_shift*0.6
df_mom.dropna(inplace=True)
df_mom.index = df_mom.index.to_period('M')
df_mom.tail()

In [None]:
seriesholder_list = [
    ct.SeriesHolder(name='USCPI Air', data_source='Haver', ticker="USECON:UTUAN")
]

air = ct.get_data(seriesholder_list, freq='Monthly').pct_change(fill_method=None)*100
df = df_mom.join(air, how='left')
df.tail()

In [None]:
weight = np.array((df.corr()['CPI-U: Airline Fare (NSA, 1982-84=100)'][:-1])/df.corr()['CPI-U: Airline Fare (NSA, 1982-84=100)'][:-1].sum())
df['weighted'] = df_mom @ weight
df['const'] = 1
X = df['weighted']
Y = df['CPI-U: Airline Fare (NSA, 1982-84=100)'].dropna()
mod = sm.OLS(endog=Y, exog=X[:-1])
result = mod.fit()
print(result.summary())
ols = result.get_prediction(X).predicted_mean

In [None]:
fig = plt.figure(figsize=(12, 8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(df['CPI-U: Airline Fare (NSA, 1982-84=100)'][:-2], lags=24, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(df['CPI-U: Airline Fare (NSA, 1982-84=100)'][:-2], lags=24, ax=ax2)
plt.show()

In [None]:
exog = df['weighted']

for i in range(0, 5):
    model = AutoReg(
        df['CPI-U: Airline Fare (NSA, 1982-84=100)'][:-1],
        lags=i,
        period=12,
        old_names=False,
        exog=exog[:-1],
        seasonal=True,
        trend='n'
    ).fit()
    print(f"model MAE for AR-{i} is", (model.fittedvalues - df['CPI-U: Airline Fare (NSA, 1982-84=100)']).abs().mean())

model = AutoReg(
    df['CPI-U: Airline Fare (NSA, 1982-84=100)'][:-1],
    lags=2,  # input picked AR-order
    period=12,
    old_names=False,
    exog=exog[:-1],
    seasonal=True,
    trend='n'
).fit()
# model.summary()

final_model = pd.concat([model.fittedvalues, model.forecast(1, exog=exog.iloc[-1:])])
final_df = pd.DataFrame({'model': final_model}).join(air)
final_df['ARX_error'] = final_df['CPI-U: Airline Fare (NSA, 1982-84=100)'] - final_df['model']
final_df['ols'] = ols[2:]
final_df['OLS_error'] = final_df['CPI-U: Airline Fare (NSA, 1982-84=100)'] - final_df['ols']
final_df = final_df.join(df.drop('CPI-U: Airline Fare (NSA, 1982-84=100)', axis=1))

today = datetime.today().strftime('%Y-%m-%d')
final_df.drop(['const'], axis=1, inplace=True)
final_df.to_excel(f'airfare_model_{today}.xlsx')