In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import sys
import altair as alt
sys.path.append('../src')
from models import plot_structure, plot_supply_and_demand, regression_discontinuity_model, differences_differences, plot_for_parallel_trends 

#categories for products and services gotten by LLMs
df_model_data_CAN = pd.read_csv('../data/processed/CAN_Categorized_Products_and_Services_NEW.csv')
df_model_data_USA = pd.read_csv('../data/processed/US_Categorized_Products_and_Services_NEW.csv')

#CPI data for Canada and USA to analyze grocery and clothing and footwear inflation
df_Canada_CPI = pd.read_csv('../data/processed/Canada_CPI_Processed_2018_2019.csv')
df_USA_CPI = pd.read_csv('../data/processed/USA_CPI_Processed_2018_2019.csv')

print(df_model_data_CAN.head())
df_model_data_USA.head()

                                   Product_Service Category
0                                    Durable goods      ???
1                                            Goods      ???
2                             Household operations      ???
3  Household operations, furnishings and equipment      ???
4                        Housing (1986 definition)      ???


Unnamed: 0,Product_Service,Category
0,Airline fares,Transportation
1,Alcoholic beverages,Alcohol & Tobacco
2,Alcoholic beverages at home,Alcohol & Tobacco
3,All items,Uncategorized
4,All items less energy,Energy


In [3]:

#standardize and get scaled and non-scaled data for CPI data
df_USA_CPI_melt = pd.melt(df_USA_CPI, var_name = 'Products and product groups',value_name = 'VALUE', id_vars = 'REF_DATE')

df_Canada_CPI_Scaled = df_Canada_CPI.copy()
df_Canada_CPI_NonScaled = df_Canada_CPI.copy()

df_USA_CPI_Scaled = df_USA_CPI_melt.copy()
df_USA_CPI_NonScaled = df_USA_CPI_melt.copy()
for unique_product in df_Canada_CPI_Scaled['Products and product groups'].unique():
    stan_scale = StandardScaler()
    mask = df_Canada_CPI_Scaled['Products and product groups']==unique_product
    df_Canada_CPI_Scaled.loc[mask, "VALUE"] = stan_scale.fit_transform(df_Canada_CPI_Scaled.loc[mask, "VALUE"].values[:, None]).flatten()
for unique_product in df_USA_CPI_Scaled['Products and product groups'].unique():
    stan_scale = StandardScaler()
    mask = df_USA_CPI_Scaled['Products and product groups']==unique_product
    df_USA_CPI_Scaled.loc[mask, "VALUE"] = stan_scale.fit_transform(df_USA_CPI_Scaled.loc[mask, "VALUE"].values[:, None]).flatten()

In [None]:
#Canada and US datasets scaled and non-scaled and make category columns
dict_CAN = df_model_data_CAN.drop_duplicates(subset=['Product_Service']).set_index('Product_Service')['Category'].to_dict()
dict_USA = df_model_data_USA.drop_duplicates(subset=['Product_Service']).set_index('Product_Service')['Category'].to_dict()

df_Canada_CPI_Scaled['Category'] = df_Canada_CPI['Products and product groups'].map(dict_CAN)
df_Canada_CPI_NonScaled['Category'] = df_Canada_CPI['Products and product groups'].map(dict_CAN)
df_USA_CPI_Scaled['Category'] = df_USA_CPI_Scaled['Products and product groups'].map(dict_USA)
df_USA_CPI_NonScaled['Category'] = df_USA_CPI_NonScaled['Products and product groups'].map(dict_USA)


In [None]:
#make groupby dataframe get median of each category
df_Canada_CPI_Cat = df_Canada_CPI_Scaled.copy().drop(columns = ['Products and product groups'])
df_Canada_CPI_groupby = df_Canada_CPI_Cat.groupby(['REF_DATE', 'Category']).median().reset_index()

df_Canada_CPI_Cat_NonScaled = df_Canada_CPI_NonScaled.copy().drop(columns = ['Products and product groups'])
df_Canada_CPI_groupby_NonScaled = df_Canada_CPI_Cat_NonScaled.groupby(['REF_DATE', 'Category']).median().reset_index()

df_USA_CPI_Cat = df_USA_CPI_Scaled.copy().drop(columns = ['Products and product groups'])
df_USA_CPI_groupby = df_USA_CPI_Cat.groupby(['REF_DATE', 'Category']).median().reset_index()

df_USA_CPI_Cat_NonScaled = df_USA_CPI_NonScaled.copy().drop(columns = ['Products and product groups'])
df_USA_CPI_groupby_NonScaled = df_USA_CPI_Cat_NonScaled.groupby(['REF_DATE', 'Category']).median().reset_index()

In [None]:
#look at all potential categories
for column in df_Canada_CPI_groupby['Category'].unique():
    print(column)

In [None]:
#look at plot groceries which was more impacted and compare to housing, rent, tax and insurance which was less impacted by tariffs
plot_for_parallel_trends(df_Canada_CPI_groupby, '2018-01-01', '2018-11-01', 'Shelter', 'Education & Reading')

In [None]:
#look at china tariffs for differences in differences to get a better idea for one month
differences_differences(df_Canada_CPI_groupby, 'Shelter', 'Education & Reading', '2018-07-01', '2018-09-01', '2018-08-01')

In [None]:
#It seems like the groupby shows some interaction 
#let us investigate into specific grocery categories to see which groceries may have been impacted by the tariffs

In [None]:

grocery_items = df_Canada_CPI_NonScaled[(df_Canada_CPI_NonScaled['Category'] == 'Shelter')]['Products and product groups'].unique()

grocery_dataframe = df_Canada_CPI_NonScaled[(df_Canada_CPI_NonScaled['Category'] == 'Shelter') | (df_Canada_CPI_NonScaled['Category'] == 'Education & Reading')]


dict_grocery = food_categories = {
    "Homeowners' home and mortgage insurance": 'Owned accommodation',
    "Homeowners' maintenance and repairs": 'Owned accommodation',
    "Homeowners' replacement cost": 'Owned accommodation',
    "Mortgage interest cost": 'Owned accommodation',
    "Other owned accommodation expenses": 'Owned accommodation',
    "Property taxes and other special charges": 'Owned accommodation',
    "Rent": 'Rented accommodation',
    "Tenants' insurance premiums": 'Rented accommodation',
    "Tenants' maintenance, repairs and other expenses": 'Rented accommodation',
    "Electricity": 'Water, fuel and electricity',
    "Fuel oil and other fuels": 'Water, fuel and electricity',
    "Natural gas": 'Water, fuel and electricity',
    "Water": 'Water, fuel and electricity',
    'Books and reading material (excluding textbooks)':'Books, Magazines & Newspaper',
    'Education':'Education',					
    'Education and reading':'Education',				
    'Magazines and periodicals':'Books, Magazines & Newspaper',			
    'Newspapers':'Books, Magazines & Newspaper',					
    'Reading material (excluding textbooks)':'Books, Magazines & Newspaper',		
    'Recreation, education and reading':'Education',		
    'School textbooks and supplies':'Books, Magazines & Newspaper',			
    'Tuition fees':'Education'
}

dict_grocery_US= {
    'Owned accommodation': [
        'Fuels and utilities', 'Household energy', 'Housing', 'Housing at school, excluding board',
        "Owners' equivalent rent of primary residence", "Owners' equivalent rent of residences"
    ],
    'Rented accommodation': [
        'Rent of primary residence', 'Rent of shelter', 'Services less rent of shelter'
    ],
    'Water, fuel and electricity': [
        'Utility (piped) gas service', 'Water and sewer and trash collection services','Water and sewerage maintenance'
    ]
}


US_grocery_dataset = df_USA_CPI_NonScaled[(df_USA_CPI_NonScaled['Category'] == 'Shelter') | (df_USA_CPI_NonScaled['Category'] == 'Education & Reading')]

print(US_grocery_dataset['Products and product groups'].unique())

US_grocery_dataset['Category'] = US_grocery_dataset['Category'].map(dict_grocery_US)



In [None]:
#make grocery categories within the grocery category dataframe
grocery_dataframe['Category'] = grocery_dataframe['Products and product groups'].map(dict_grocery)

grocery_dataframe.drop(columns = ['Products and product groups'], inplace=True)
print(grocery_dataframe['Category'].unique())
grocery_dataframe_groupby = grocery_dataframe.groupby(['REF_DATE', 'Category']).mean().reset_index()

In [None]:
model_Canada_RC_groceries_first_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-10-01', '2018-06-01', '2018-03-01', '2018-04-01')

In [None]:

model_Canada_RC_groceries_first_tariff.summary()

In [None]:
model_Canada_RC_groceries_second_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2018-05-01', '2018-12-01', '2018-07-01', '2018-10-01')

In [None]:
model_Canada_RC_groceries_second_tariff.summary()

In [None]:
first_tariff_vs_second_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-10-01', '2018-12-01', '2018-05-01')

first_tariff_vs_second_tariff.summary()

In [None]:
#it seems like american tariffs had little immediate impact on Canadian grocery retailers when looking at manufacturing even the only month affected was the China tariffs in August as mentioned supply was still high - it is hard to disentangle the impact of the tariffs and the news so perhaps we should look for a more long term approach to gain more insight to what happened with groceries


Canada_parallel_trends_groceries = plot_for_parallel_trends(df_Canada_CPI_groupby, '2017-01-01', '2019-10-01', 'Shelter', 'Education & Reading')



In [None]:
#it is hard to quantify the impact of groceries based on specific tariffs due to the serial correlation of tariffs but looking from the start of the breakdown in China talks to the Chinese hike tax we will look at RD between these periods and see if we can draw conclusions
#there does seem to be greater variance from the start of the Intellectual Property Trials to the Chinese hike in tariffs that started propagating like a wave with each tariff with minimal activity following the results of the IP investigation - let us investigate further
model_groceries_longterm_IP, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-01-01', '2018-04-01', '2017-08-01', heteroskedasticity = 'nonrobust')

model_groceries_longterm_IP.summary()

In [None]:
#Next lets compare with RD the tariff / IP trails period to the Chinese hike in tariffs
model_groceries_longterm_recovery, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2018-02-01', '2019-05-01', '2018-11-01', '2019-01-01')

print(model_groceries_longterm_recovery.summary())



In [None]:
#let us look at what happened in American groceries due to the tariffs and compare the effect of the tariffs on groceries for US tariffs vs Canadian tariffs
#they appear to have a similar effect on American goods

plot_structure(df_USA_CPI_groupby, 'Shelter', '2017-01-01', '2020-02-01')



In [None]:
#It appears that the American grocery dataset follows the Canadian with waves except the breakdown in china talks led to a decrease in inflation for Canadian groceries but an increase for American groceries
# We will look at the IP investigation later but it appears trumps tariffs on Canadian goods had only a small impact on American groceries but the combination of Canadian announcement of tariffs, canadian tariffs and chinese tariffs on American exported goods may have had a larger impact, let us investigate further
model_USA_RC_grocery_cutoff_first_tariff, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2017-10-01', '2018-06-01', '2018-03-01', '2018-04-01')

print(model_USA_RC_grocery_cutoff_first_tariff.summary())
model_USA_RC_grocery_cutoff_second_tariff, _, _, _= regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2018-05-01', '2018-12-01', '2018-07-01', '2018-10-01')

print(model_USA_RC_grocery_cutoff_second_tariff.summary())

model_longterm_impact_groceries, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2017-10-01', '2019-12-01', '2018-02-01', '2018-04-01')

print(model_longterm_impact_groceries.summary())



In [None]:
print(df_USA_CPI_groupby['Category'].unique())
USA_parallel_trends_groceries = plot_for_parallel_trends(df_USA_CPI_groupby, '2017-01-01', '2019-12-01', 'Shelter', 'Education & Reading', category_3 = None)

USA_parallel_trends_groceries

In [None]:
differences_differences(df_USA_CPI_Scaled, 'Shelter', 'Education & Reading', '2018-03-01', '2018-05-01', '2018-04-01')

In [None]:
differences_differences(df_USA_CPI_groupby, 'Shelter', 'Education & Reading', '2018-03-01', '2018-05-01', '2018-04-01')