In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import sys
import altair as alt
sys.path.append('../src')
from models import plot_structure, plot_supply_and_demand, regression_discontinuity_model, differences_differences, plot_for_parallel_trends 

#categories for products and services gotten by LLMs
df_model_data_CAN = pd.read_csv('../data/processed/CAN_Categorized_Products_and_Services_NEW.csv')
df_model_data_USA = pd.read_csv('../data/processed/US_Categorized_Products_and_Services_NEW.csv')

#CPI data for Canada and USA to analyze grocery and clothing and footwear inflation
df_Canada_CPI = pd.read_csv('../data/processed/Canada_CPI_Processed_2018_2019.csv')
df_USA_CPI = pd.read_csv('../data/processed/USA_CPI_Processed_2018_2019.csv')

print(df_model_data_CAN.head())
df_model_data_USA.head()

                                   Product_Service Category
0                                    Durable goods      ???
1                                            Goods      ???
2                             Household operations      ???
3  Household operations, furnishings and equipment      ???
4                        Housing (1986 definition)      ???


Unnamed: 0,Product_Service,Category
0,Airline fares,Transportation
1,Alcoholic beverages,Alcohol & Tobacco
2,Alcoholic beverages at home,Alcohol & Tobacco
3,All items,Uncategorized
4,All items less energy,Energy


In [3]:

#standardize and get scaled and non-scaled data for CPI data
df_USA_CPI_melt = pd.melt(df_USA_CPI, var_name = 'Products and product groups',value_name = 'VALUE', id_vars = 'REF_DATE')

df_Canada_CPI_Scaled = df_Canada_CPI.copy()
df_Canada_CPI_NonScaled = df_Canada_CPI.copy()

df_USA_CPI_Scaled = df_USA_CPI_melt.copy()
df_USA_CPI_NonScaled = df_USA_CPI_melt.copy()
for unique_product in df_Canada_CPI_Scaled['Products and product groups'].unique():
    stan_scale = StandardScaler()
    mask = df_Canada_CPI_Scaled['Products and product groups']==unique_product
    df_Canada_CPI_Scaled.loc[mask, "VALUE"] = stan_scale.fit_transform(df_Canada_CPI_Scaled.loc[mask, "VALUE"].values[:, None]).flatten()
for unique_product in df_USA_CPI_Scaled['Products and product groups'].unique():
    stan_scale = StandardScaler()
    mask = df_USA_CPI_Scaled['Products and product groups']==unique_product
    df_USA_CPI_Scaled.loc[mask, "VALUE"] = stan_scale.fit_transform(df_USA_CPI_Scaled.loc[mask, "VALUE"].values[:, None]).flatten()

In [None]:
#Canada and US datasets scaled and non-scaled and make category columns
dict_CAN = df_model_data_CAN.drop_duplicates(subset=['Product_Service']).set_index('Product_Service')['Category'].to_dict()
dict_USA = df_model_data_USA.drop_duplicates(subset=['Product_Service']).set_index('Product_Service')['Category'].to_dict()

df_Canada_CPI_Scaled['Category'] = df_Canada_CPI['Products and product groups'].map(dict_CAN)
df_Canada_CPI_NonScaled['Category'] = df_Canada_CPI['Products and product groups'].map(dict_CAN)
df_USA_CPI_Scaled['Category'] = df_USA_CPI_Scaled['Products and product groups'].map(dict_USA)
df_USA_CPI_NonScaled['Category'] = df_USA_CPI_NonScaled['Products and product groups'].map(dict_USA)


In [None]:
#make groupby dataframe get median of each category
df_Canada_CPI_Cat = df_Canada_CPI_Scaled.copy().drop(columns = ['Products and product groups'])
df_Canada_CPI_groupby = df_Canada_CPI_Cat.groupby(['REF_DATE', 'Category']).median().reset_index()

df_Canada_CPI_Cat_NonScaled = df_Canada_CPI_NonScaled.copy().drop(columns = ['Products and product groups'])
df_Canada_CPI_groupby_NonScaled = df_Canada_CPI_Cat_NonScaled.groupby(['REF_DATE', 'Category']).median().reset_index()

df_USA_CPI_Cat = df_USA_CPI_Scaled.copy().drop(columns = ['Products and product groups'])
df_USA_CPI_groupby = df_USA_CPI_Cat.groupby(['REF_DATE', 'Category']).median().reset_index()

df_USA_CPI_Cat_NonScaled = df_USA_CPI_NonScaled.copy().drop(columns = ['Products and product groups'])
df_USA_CPI_groupby_NonScaled = df_USA_CPI_Cat_NonScaled.groupby(['REF_DATE', 'Category']).median().reset_index()

In [None]:
#look at all potential categories
for column in df_Canada_CPI_groupby['Category'].unique():
    print(column)

In [None]:
#look at plot groceries which was more impacted and compare to housing, rent, tax and insurance which was less impacted by tariffs
plot_for_parallel_trends(df_Canada_CPI_groupby, '2018-01-01', '2018-11-01', 'Shelter', 'Education & Reading')

In [None]:
#look at china tariffs for differences in differences to get a better idea for one month
differences_differences(df_Canada_CPI_groupby, 'Shelter', 'Education & Reading', '2018-07-01', '2018-09-01', '2018-08-01')

In [None]:
#It seems like the groupby shows some interaction 
#let us investigate into specific grocery categories to see which groceries may have been impacted by the tariffs

In [None]:

grocery_items = df_Canada_CPI_NonScaled[(df_Canada_CPI_NonScaled['Category'] == 'Shelter')]['Products and product groups'].unique()

grocery_dataframe = df_Canada_CPI_NonScaled[(df_Canada_CPI_NonScaled['Category'] == 'Shelter') | (df_Canada_CPI_NonScaled['Category'] == 'Education & Reading')]


dict_grocery = food_categories = {
    "Homeowners' home and mortgage insurance": 'Owned accommodation',
    "Homeowners' maintenance and repairs": 'Owned accommodation',
    "Homeowners' replacement cost": 'Owned accommodation',
    "Mortgage interest cost": 'Owned accommodation',
    "Other owned accommodation expenses": 'Owned accommodation',
    "Property taxes and other special charges": 'Owned accommodation',
    "Rent": 'Rented accommodation',
    "Tenants' insurance premiums": 'Rented accommodation',
    "Tenants' maintenance, repairs and other expenses": 'Rented accommodation',
    "Electricity": 'Water, fuel and electricity',
    "Fuel oil and other fuels": 'Water, fuel and electricity',
    "Natural gas": 'Water, fuel and electricity',
    "Water": 'Water, fuel and electricity',
    'Books and reading material (excluding textbooks)':'Books, Magazines & Newspaper',
    'Education':'Education',					
    'Education and reading':'Education',				
    'Magazines and periodicals':'Books, Magazines & Newspaper',			
    'Newspapers':'Books, Magazines & Newspaper',					
    'Reading material (excluding textbooks)':'Books, Magazines & Newspaper',		
    'Recreation, education and reading':'Education',		
    'School textbooks and supplies':'Books, Magazines & Newspaper',			
    'Tuition fees':'Education'
}

dict_grocery_US= {
    'Owned accommodation': [
        'Fuels and utilities', 'Household energy', 'Housing', 'Housing at school, excluding board',
        "Owners' equivalent rent of primary residence", "Owners' equivalent rent of residences"
    ],
    'Rented accommodation': [
        'Rent of primary residence', 'Rent of shelter', 'Services less rent of shelter'
    ],
    'Water, fuel and electricity': [
        'Utility (piped) gas service', 'Water and sewer and trash collection services','Water and sewerage maintenance'
    ]
}


US_grocery_dataset = df_USA_CPI_NonScaled[(df_USA_CPI_NonScaled['Category'] == 'Shelter') | (df_USA_CPI_NonScaled['Category'] == 'Education & Reading')]

print(US_grocery_dataset['Products and product groups'].unique())

US_grocery_dataset['Category'] = US_grocery_dataset['Category'].map(dict_grocery_US)



In [None]:
#make grocery categories within the grocery category dataframe
grocery_dataframe['Category'] = grocery_dataframe['Products and product groups'].map(dict_grocery)

grocery_dataframe.drop(columns = ['Products and product groups'], inplace=True)
print(grocery_dataframe['Category'].unique())
grocery_dataframe_groupby = grocery_dataframe.groupby(['REF_DATE', 'Category']).mean().reset_index()

In [None]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'Fish and Seafood', 'Dairy')

#Dairy seems unimpacted due to the protection tariffs by Canada while fish and seafood the counterfactual seems to go down following the treatment period as a sort of recovery period

In [None]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'Grains', 'Pasta', 'Vegetables')

In [None]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'Sweets', 'Fruit')

In [None]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'Meat', 'Pet Supplies')

In [None]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'General Category', 'Dining Out')

In [None]:
#It seems most items follow a sharp downward trend in august when china tariffs were implemented and were not as affected by Canadian tariffs
#Only dairy and seafood and fish seem to follow the parallel trends assumption 
#let us investigate differences in differences to get an idea of the impact of the tariffs on groceries that followed the aforementioned pattern
#these groceries seem to have to do with everyday food group except carbohydrates and vegetables
differences_differences(grocery_dataframe_groupby, 'Fish and Seafood', 'Dairy', '2018-07-01', '2018-09-01', '2018-08-01')

In [None]:
#It seems the groupby shows a significant difference but when looking at the individual items, the difference is not significant
#perhaps there was not enough data and the set was underpowered but it provides some evidence from the plots that the tariffs may have had a slight impact on goods - let us investigate further

In [None]:
#df_sales for Canada and USA
df_sales = pd.read_csv("../data/processed/Canada_Sales_Processed.csv")

In [None]:
df_sales.columns = ['PrincipleStats', 'GoodType', 'REF_DATE', 'VALUE']
df_sales['PrincipleStats'].unique()

In [None]:
for column in df_sales['GoodType'].unique():
    print(column)

In [None]:
#On august the unfilled orders was lowered meaning less food manufacturing demand and less supply to retail store and more inflation for that month
plot_supply_and_demand(df_sales, "Food manufacturing [311]")

In [None]:
#it seems more inventory meant that less orders were filled - this could be due to the tariffs - and it also seems that there were fewer unfilled orders meaning less demand for food manufacturing which means supply went down and inflation should go up.
#but since demand and inventory are similar, it is hard to say if the tariffs had an impact on inflation and the original models without the groupby may be right 

In [None]:
#lets look earlier
plot_supply_and_demand(df_sales, "Food manufacturing [311]", '2017-10-01', '2018-06-01')


In [None]:
model_Canada_RC_groceries_first_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-10-01', '2018-06-01', '2018-03-01', '2018-04-01')

In [None]:

model_Canada_RC_groceries_first_tariff.summary()

In [None]:
model_Canada_RC_groceries_second_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2018-05-01', '2018-12-01', '2018-07-01', '2018-10-01')

In [None]:
model_Canada_RC_groceries_second_tariff.summary()

In [None]:
first_tariff_vs_second_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-10-01', '2018-12-01', '2018-05-01')

first_tariff_vs_second_tariff.summary()

In [None]:
#it seems like american tariffs had little immediate impact on Canadian grocery retailers when looking at manufacturing even the only month affected was the China tariffs in August as mentioned supply was still high - it is hard to disentangle the impact of the tariffs and the news so perhaps we should look for a more long term approach to gain more insight to what happened with groceries


Canada_parallel_trends_groceries = plot_for_parallel_trends(df_Canada_CPI_groupby, '2017-01-01', '2019-10-01', 'Shelter', 'Education & Reading')



In [None]:
#it is hard to quantify the impact of groceries based on specific tariffs due to the serial correlation of tariffs but looking from the start of the breakdown in China talks to the Chinese hike tax we will look at RD between these periods and see if we can draw conclusions
#there does seem to be greater variance from the start of the Intellectual Property Trials to the Chinese hike in tariffs that started propagating like a wave with each tariff with minimal activity following the results of the IP investigation - let us investigate further
model_groceries_longterm_IP, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-01-01', '2018-04-01', '2017-08-01', heteroskedasticity = 'nonrobust')

model_groceries_longterm_IP.summary()

In [None]:
#Next lets compare with RD the tariff / IP trails period to the Chinese hike in tariffs
model_groceries_longterm_recovery, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2018-02-01', '2019-05-01', '2018-11-01', '2019-01-01')

print(model_groceries_longterm_recovery.summary())



In [None]:
#The IP trials overall impact including the US tariffs on Canada shows a reduction on inflation in groceries in Canada while the Chinese hike in tariffs to the end of the data does not show a significant impact on groceries with the groupby

#the smaller impact of each tariff appear to cause a wave that ripples through time causing greater variance in the data, each individual month of tariffs increased inflation at the treatment period but appeared to reduce inflation the following month before another wave occurs. 

#overall groceries were not as impacted when grouped together as there were some groceries influenced such as seafood while other groceries such as dairy remained unimpacted 


#In other words the tariffs had a short term impact on inflation but contributed to larger ripples in waves that caused greater variance in the data.

#The news of breakdown in China talks momentarily caused economic shock that influenced the agricultural industry to decrease food supply in Canada and increase inflation
# and the proceeding recovery after the break down in China talks and beginning of IP trade investigations, did appear to make grocery inflation lower for a short period of time.





In [None]:

#let us plot the char for sales for food manufucturing during this time period again for further investigate invesigation

Canada_groceries_supply_demand = plot_supply_and_demand(df_sales, "Food manufacturing [311]", "2017-02-01", "2019-12-01")

Canada_groceries_supply_demand

In [None]:
#It appears that demand for food manufacturing was lower than supply during the trade talk breakdown which means supply for food was limited during the trade talks which caused inflation but demand rose following the trade talk breakdown and remained unaffected throughout the tariffs which is why groceries were largely unaffected



In [None]:
#while inventory did increase meaning a decrease in supply, there was still enough demand and that is why groceries were not impacted despite the sharp decrease in August 2018 perhaps mainly due to the China tariffs on US goods.



In [None]:
#let us look at what happened in American groceries due to the tariffs and compare the effect of the tariffs on groceries for US tariffs vs Canadian tariffs
#they appear to have a similar effect on American goods

plot_structure(df_USA_CPI_groupby, 'Shelter', '2017-01-01', '2020-02-01')



In [None]:
#It appears that the American grocery dataset follows the Canadian with waves except the breakdown in china talks led to a decrease in inflation for Canadian groceries but an increase for American groceries
# We will look at the IP investigation later but it appears trumps tariffs on Canadian goods had only a small impact on American groceries but the combination of Canadian announcement of tariffs, canadian tariffs and chinese tariffs on American exported goods may have had a larger impact, let us investigate further
model_USA_RC_grocery_cutoff_first_tariff, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2017-10-01', '2018-06-01', '2018-03-01', '2018-04-01')

print(model_USA_RC_grocery_cutoff_first_tariff.summary())
model_USA_RC_grocery_cutoff_second_tariff, _, _, _= regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2018-05-01', '2018-12-01', '2018-07-01', '2018-10-01')

print(model_USA_RC_grocery_cutoff_second_tariff.summary())

model_longterm_impact_groceries, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2017-10-01', '2019-12-01', '2018-02-01', '2018-04-01')

print(model_longterm_impact_groceries.summary())



In [None]:
#It appear american groceries were more impacted than canadian groceries for tariffs, perhaps this impact was more measureable since Canada had higher variance changes
#It is also important to note that Canada has a lower population and are more likely to have higher variance as a result over time
#but American tariffs on American goods did significantly increase inflation in the post period and treatment period while Canadian tariffs appeared to lower the tariffs post period but increase tariffs during the treatment period signficant. Significance was not found for the two tariff periods in the Canadian groceries.

#The tariffs only caused a small blimp in the groceries while long term impact was harder to measure - let us investigate further



In [None]:
print(df_USA_CPI_groupby['Category'].unique())
USA_parallel_trends_groceries = plot_for_parallel_trends(df_USA_CPI_groupby, '2017-01-01', '2019-12-01', 'Shelter', 'Education & Reading', category_3 = None)

USA_parallel_trends_groceries

In [None]:
differences_differences(df_USA_CPI_Scaled, 'Shelter', 'Education & Reading', '2018-03-01', '2018-05-01', '2018-04-01')

In [None]:
differences_differences(df_USA_CPI_groupby, 'Shelter', 'Education & Reading', '2018-03-01', '2018-05-01', '2018-04-01')

In [None]:


#differences in differences shows little impact for American groceries however on very short term impact (1 month) whereas in the Canadian dataset there was more of an impact for American tariffs on Canadian goods - let us examine why with datasets of export and import





In [None]:
USA_sales = pd.read_csv("../data/processed/USA_Sales_Processed.csv")
USA_sales_groceries_vehicles = pd.read_csv("../data/processed/USA_Sales_Groceries_Vehicles_Processed.csv")
print(USA_sales.tail())
print(USA_sales_groceries_vehicles.tail())

In [None]:
for column in USA_sales_groceries_vehicles['cat_idx'].unique():
    print(column)

In [None]:
USA_sales['PrincipleStats'] = USA_sales['dt_idx']
USA_sales['GoodType'] = USA_sales['cat_idx']
USA_sales['VALUE'] = USA_sales["val"]



In [None]:
for column in USA_sales['PrincipleStats'].unique():
    print(column)

print(sep = '\n')
for column in USA_sales['GoodType'].unique():
    print(column)
print(sep = '\n')
for column in USA_sales.columns:
    print(column)

USA_sales['PrincipleStats'] = USA_sales["PrincipleStats"].astype(str)
USA_sales['GoodType'] = USA_sales['GoodType'].astype(str)

USA_sales.to_csv("../data/processed/USA_Sales_Processed_Final.csv", index=False)
df_sales.to_csv("../data/processed/Canada_Sales_Processed_Final.csv", index=False)

In [None]:
for column in USA_sales['GoodType'].unique():
    print(column)
    print(USA_sales[(USA_sales['GoodType']==column) & (USA_sales['PrincipleStats']=='Finished Goods Inventories Percent Change Monthly') & (USA_sales['REF_DATE']>='2017-01-01')&(USA_sales['REF_DATE']<='2020-02-01')])


USA_groceries_supply_demand = plot_supply_and_demand(USA_sales, "Food Products", '2017-01-01', '2019-10-01', "Finished Goods Inventories", "Inventories to Shipments Ratios")
USA_groceries_supply_demand

In [None]:
#It seems that the tariffs increased finished inventory percentage change per month which decreased the following month meaning demand was changing based on the tariff changes in the news notably on 2018-04 and 2018-08 which indicates an instability in the market demand coinciding with momentary inflation for that month for American groceries.

In [None]:
#lets compare the change in variance of groceries in America to Canada to compare the effect of tariffs on groceries in both countries

mask_USA_groceries = df_USA_CPI_Scaled['Category'] == 'Shelter'
mask_Canada_groceries = df_Canada_CPI_Scaled['Category'] == 'Shelter'

df_USA_CPI_Scaled_groceries = df_USA_CPI_Scaled[mask_USA_groceries]
df_Canada_CPI_Scaled_groceries = df_Canada_CPI_Scaled[mask_Canada_groceries]

df_USA_CPI_Scaled_groceries['Country'] = ['USA']*len(df_USA_CPI_Scaled_groceries)
df_Canada_CPI_Scaled_groceries['Country'] = ['Canada']*len(df_Canada_CPI_Scaled_groceries)

df_USA_CPI_Scaled_groceries.drop(columns = ['Products and product groups'], inplace=True)
df_Canada_CPI_Scaled_groceries.drop(columns = ['Products and product groups'], inplace=True)

df_USA_CPI_Scaled_groc_var = df_USA_CPI_Scaled_groceries.groupby(['REF_DATE', 'Category', 'Country']).std().reset_index()
df_Canada_CPI_Scaled_groc_var = df_Canada_CPI_Scaled_groceries.groupby(['REF_DATE', 'Category', 'Country']).std().reset_index()

df_USA_Canada_groceries = pd.concat([df_USA_CPI_Scaled_groc_var, df_Canada_CPI_Scaled_groc_var])

variance_groceries_CPI = plot_structure(df_USA_Canada_groceries, 'Shelter', '2017-01-01', '2020-02-01', color = 'Country', x_label = "Date", y_label = "Variance of CPI index for Groceries", lines_to_plot = ['2017-08-01', '2017-11-01', '2019-05-01'], title = 'CPI Variance for Groceries in Canada and USA')

((USA_parallel_trends_groceries|Canada_parallel_trends_groceries)&(USA_groceries_supply_demand | Canada_groceries_supply_demand)).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'US and Canada Groceries Inflation and Supply and Demand', subtitle = 'The tariffs caused a waves of inflation in groceries in both countries with food manufacturing being more affected in Canada than US')).save('US_Canada_Groceries_Inflation_Supply_Demand.html')


In [None]:
#It seems like the break down of the China talks decreased variance and the proceeding IP investigation increased variance in America first while Canada had a lag in their decrease.
#The IP investigation and the tariffs appeared to increase variance following the breakdown in talks meaning there was recooperation but America did not quite recover following the end of the tariffs in May 2019.
# the variance of the CPI decrease may mean that the consumer market relied on only essential grocery goods during the tariffs - we should further investigate 

In [None]:
df_Canada_Number_Groceries = pd.read_csv("../data/processed/Canada_Number_Groceries_Processed.csv")

In [None]:
print(df_Canada_Number_Groceries.head())
for uniq_product in df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'].unique():
    print(uniq_product)

In [None]:
lst_groceries_category = [0]*8 + [1]*6 + [0]*16
dict_groceries = {uniq_product: category for uniq_product, category in zip(df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'].unique(), lst_groceries_category)}

In [None]:
df_Canada_Number_Groceries['Category'] = df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'].map(dict_groceries)

In [None]:
df_Canada_Number_Groceries_groupby = df_Canada_Number_Groceries.copy()
df_Canada_Number_Groceries_groupby.drop(columns = ['North American Industry Classification System (NAICS)', 'Sales'], inplace=True)
df_Canada_Number_Groceries_groupby = df_Canada_Number_Groceries_groupby.groupby(['REF_DATE', 'Category']).std().reset_index()
df_Canada_Number_Groceries_groupby.reset_index(inplace=True)
mask = (df_Canada_Number_Groceries_groupby['Category']==1)
scaler = StandardScaler()
df_Canada_Number_Groceries_plot_all = df_Canada_Number_Groceries[df_Canada_Number_Groceries['Category']==1]
df_Canada_Number_Groceries_groupby.loc[mask, "VALUE"] = scaler.fit_transform(df_Canada_Number_Groceries_groupby.loc[mask, 'VALUE'].values.reshape(-1, 1))
plot_structure(df_Canada_Number_Groceries_groupby,1,'2017-01-01', '2020-02-01')
for unique_class in df_Canada_Number_Groceries_plot_all['North American Industry Classification System (NAICS)'].unique():
    scale = StandardScaler()
    mask = df_Canada_Number_Groceries_plot_all['North American Industry Classification System (NAICS)'] == unique_class
    df_Canada_Number_Groceries_plot_all.loc[mask, "VALUE"] = scale.fit_transform(df_Canada_Number_Groceries_plot_all[mask]['VALUE'].values.reshape(-1, 1))
alt.Chart(df_Canada_Number_Groceries_plot_all).mark_line().encode(
    x='REF_DATE',
    y='VALUE',
    color='North American Industry Classification System (NAICS)'
)

In [None]:
#It seems there was higher variance in terms of where the groceries were bought in Canada coinciding with results from the breakdown in talks with China, the IP announcment and the tariffs - there was a reliance on convenience retailers and vending machines during the tariff period which was then under-utilized durign the recovery period follow May 2019
# 
# This seems to indicate that the Canadian groceries for consumers did indeed recover following the tariffs and the recovery may have been stronger than American groceries.

#let us now turn out attention to clothing. 

In [None]:
plot_structure(df_Canada_CPI_groupby, 'Clothing and footwear', '2017-01-01', '2019-10-01')

In [None]:
#It doesnt appear Canadian clothing and footwear was that impacted by tariffs but let us look closer

In [None]:
Canada_Supply_Demand_Clothing = plot_supply_and_demand(df_sales, "Leather and allied product manufacturing [316]", "2017-01-01", "2019-10-01")

In [None]:

Apparel_Canada = plot_supply_and_demand(df_sales, "Apparel manufacturing [315]", "2017-01-01", "2019-10-01")

Apparel_Canada

In [None]:
#IT seems like manufacturing demand for clothing took a hit during July which means there were less orders from clothing stores and supply for clothing was decreased

#Inflation should have increased a bit for clothing and footwear but it was the impact of trumps tariffs that caused the reduction in supply - this increase in inflation is difficult to measure in July due to the seasonal nature of clothing and footwear but perhaps we can look at regression discontinuity 

In [None]:
model_clothing_US_tariff_on_Canada, _, _, _ = regression_discontinuity_model(df_Canada_CPI_Scaled, 'Clothing and footwear', '2018-03-01', '2018-09-01', '2018-07-01', '2018-08-01')

model_clothing_US_tariff_on_Canada.summary()

In [None]:
model_clothing_US_tariff_groupby, clothing_chart_clothing_US_original, clothing_chart_clothing_US_diff, clothing_chart_clothing_US_vtrend, clothing_chart_clothing_US_trend = regression_discontinuity_model(df_Canada_CPI_groupby, 'Clothing and footwear', '2017-08-01', '2019-10-01', '2019-03-01', '2019-08-01', seasonality=True)

model_clothing_US_tariff_groupby.summary()

In [None]:
#this visual trend is not picked up by statistics - which means there was lower impact on inflation perhaps because of the already abundance of clothing in stores and the already lack of demand

#In fact the effect of the tariffs was not seen until during March 2019 - August 2019 meaning there was a lack of supply for clothing which is why there was inflation, this impact was significant. 

In [None]:
#It appears to have caused a decrease in orders or manufacturing demand which means less supply for clothing and footwear, however, demand was still above supply in this time frame - the impact may not have been felt until in March 2019 because of the already high supply of clothing and footwear in retail stores



In [None]:
for column in USA_sales['GoodType'].unique():
    print(column)
    print(USA_sales[(USA_sales['GoodType']==column) & (USA_sales['PrincipleStats']=='Finished Goods Inventories Percent Change Monthly') & (USA_sales['REF_DATE']>='2017-01-01')&(USA_sales['REF_DATE']<='2020-02-01')])


USA_leather = plot_supply_and_demand(USA_sales, "Leather and Allied Products", '2017-01-01', '2019-10-01', "Finished Goods Inventories Percent Change Monthly", "Value of Shipments")


In [None]:
#let us look at the American dataset to see if the same trend is seen

plot_structure(df_USA_CPI_groupby, 'Clothing and footwear', '2017-01-01', '2019-10-01')

In [None]:
model_clothing_US_tariff, USA_chart_clothing_original, US_chart_clothing_diff, US_chart_clothing_vtrend, US_chart_clothing_trend = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-01-01', '2019-08-01', '2017-10-01', '2019-02-01', seasonality=True)

print(model_clothing_US_tariff.summary())
model_clothing_US_tariff, USA_chart_clothing_original, US_chart_clothing_diff, US_chart_clothing_vtrend, US_chart_clothing_trend = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-01-01', '2019-08-01', '2017-10-01', '2019-02-01', seasonality=True, fuzzy_sharp_omit = True)

model_clothing_US_tariff.summary()


In [None]:
#It seems that both trumps tariffs caused American goods to increase in inflation while Canadian/Chinese tariffs caused a decrease in inflation for American clothings and footwear

#let us investigate further with regression discontinuity

model_clothing_US_tariff, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-10-01', '2018-06-01', '2018-02-01', '2018-04-01')

model_clothing_US_tariff.summary()

In [None]:
model_clothing_US_tariff, _, _, _ = regression_discontinuity_model(df_USA_CPI_Scaled, 'Clothing and footwear', '2017-10-01', '2018-06-01', '2018-02-01', '2018-04-01')

model_clothing_US_tariff.summary()

In [None]:
model_clothing_US_tariff, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2018-05-01', '2018-12-01', '2018-07-01', '2018-10-01')

model_clothing_US_tariff.summary()

In [None]:


mask_USA_clothing = df_USA_CPI_Scaled['Category'] == 'Clothing and footwear'
mask_Canada_clothing = df_Canada_CPI_Scaled['Category'] == 'Clothing and footwear'

df_USA_CPI_Scaled_clothing = df_USA_CPI_Scaled[mask_USA_clothing]
df_Canada_CPI_Scaled_clothing = df_Canada_CPI_Scaled[mask_Canada_clothing]
print(df_USA_CPI_Scaled_clothing.head())
df_USA_CPI_Scaled_clothing['Country'] = ['USA']*len(df_USA_CPI_Scaled_clothing)
df_Canada_CPI_Scaled_clothing['Country'] = ['Canada']*len(df_Canada_CPI_Scaled_clothing)

df_USA_CPI_Scaled_clothing.drop(columns = ['Products and product groups'], inplace=True)
df_Canada_CPI_Scaled_clothing.drop(columns = ['Products and product groups'], inplace=True)

df_USA_CPI_Scaled_clothing_var = df_USA_CPI_Scaled_clothing.groupby(['REF_DATE', 'Category', 'Country']).std().reset_index()
df_Canada_CPI_Scaled_clothing_var = df_Canada_CPI_Scaled_clothing.groupby(['REF_DATE', 'Category', 'Country']).std().reset_index()

df_USA_Canada_clothing = pd.concat([df_USA_CPI_Scaled_clothing_var, df_Canada_CPI_Scaled_clothing_var])
print(df_USA_Canada_clothing.head())
CPI_variance_clothing = plot_structure(df_USA_Canada_clothing, 'Clothing and footwear', '2017-01-01', '2020-02-01', color = 'Country', lines_to_plot = ['2019-05-01', '2019-11-01'], x_label = "Date", y_label = "Variance of CPI index for Clothing & Footwear", title = 'CPI Variance for Clothing & Footwear in Canada and USA')
(variance_groceries_CPI & CPI_variance_clothing).configure_axis(grid=False)

In [None]:
((US_chart_clothing_trend | clothing_chart_clothing_US_trend) & (USA_leather | Apparel_Canada)).properties(title = alt.TitleParams(text = 'Clothing & Footwear Inflation and Supply and Demand', subtitle = 'The tariffs impact caused a rise in inflation which, in the post period seemed to affect Canada more than America, particularly in regards to manufacturing')).save('US_Canada_Clothing_Inflation_Supply_Demand.html')
display(((US_chart_clothing_trend | clothing_chart_clothing_US_trend) & (USA_leather | Apparel_Canada)).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'Clothing & Footwear Inflation and Supply and Demand', subtitle = 'The tariffs impact caused a rise in inflation which, in the post period seemed to affect Canada more than America, particularly in regards to manufacturing')))

In [None]:
print("USA Clothing & Footwear")
for column in df_USA_CPI_Scaled[df_USA_CPI_Scaled['Category'] == 'Clothing and footwear']['Products and product groups'].unique():
    print(column)

print("Canada Clothing & Footwear")
for column in df_Canada_CPI_Scaled[df_Canada_CPI_Scaled['Category'] == 'Clothing and footwear']['Products and product groups'].unique():
    print(column)


In [None]:
US_women = ["Women apparel",
"Women dresses",
"Women footwear",
"Women outerwear",
"Women suits and separates",
"Women underwear, nightwear, swimwear, and accessories"]
Canada_Jewellery = ["Jewellery"]

US_Jewelry = ['Jewelry']
Canada_women = [
    "Women clothing",
    "Women footwear (excluding athletic)"
]

Canada_children = [
    "Children clothing",
    "Children footwear (excluding athletic)"
]

df_Canada_CPI_Scaled_clean = df_Canada_CPI_Scaled.copy()
df_Canada_CPI_Scaled_clean['Products and product groups'] = df_Canada_CPI_Scaled_clean['Products and product groups'].str.replace('\'s', '')
df_Canada_CPI_Scaled_clean['Products and product groups'] = df_Canada_CPI_Scaled_clean['Products and product groups'].str.replace('\'', '')
df_USA_CPI_Scaled_clean = df_USA_CPI_Scaled.copy()
df_USA_CPI_Scaled_clean['Products and product groups'] = df_USA_CPI_Scaled_clean['Products and product groups'].str.replace('\'s', '')
df_USA_CPI_Scaled_clean['Products and product groups'] = df_USA_CPI_Scaled_clean['Products and product groups'].str.replace('\'', '')

US_children = ["Infants and toddlers apparel"]

def mask_category(df, category_list):
    for count, cat in enumerate(category_list):
        if count ==0:
            mask =(df['Products and product groups'] == cat)
        else:
            mask = mask | (df['Products and product groups'] == cat)
    return mask
US_women_category = mask_category(df_USA_CPI_Scaled_clean, US_women)
Canada_women_category = mask_category(df_Canada_CPI_Scaled_clean, Canada_women)
Canada_children_category = mask_category(df_Canada_CPI_Scaled_clean, Canada_children)
US_children_category = mask_category(df_USA_CPI_Scaled_clean, US_children)
US_Jewelry_category = mask_category(df_USA_CPI_Scaled_clean, US_Jewelry)
Canada_Jewellery_category = mask_category(df_Canada_CPI_Scaled_clean, Canada_Jewellery)

US_woman_data = df_USA_CPI_Scaled_clean[US_women_category]
US_woman_data.drop(columns = ['Products and product groups'], inplace
=True)
US_woman_data = US_woman_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
US_woman_data['Country'] = ['USA']*len(US_woman_data)
US_woman_data['Category'] = ['Women']*len(US_woman_data)

US_Jewelry_data = df_USA_CPI_Scaled_clean[US_Jewelry_category]
US_Jewelry_data.drop(columns = ['Products and product groups'], inplace=True)
US_Jewelry_data = US_Jewelry_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
US_Jewelry_data['Country'] = ['USA']*len(US_Jewelry_data)
US_Jewelry_data['Category'] = ['Jewelry']*len(US_Jewelry_data)

US_children_data = df_USA_CPI_Scaled_clean[US_children_category]
US_children_data.drop(columns = ['Products and product groups'], inplace=True)
US_children_data = US_children_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
US_children_data['Country'] = ['USA']*len(US_children_data)
US_children_data['Category'] = ['Child']*len(US_children_data)

Canada_woman_data = df_Canada_CPI_Scaled_clean[Canada_women_category]
Canada_woman_data.drop(columns = ['Products and product groups'], inplace
=True)


Canada_woman_data = Canada_woman_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
Canada_woman_data['Country'] = ['Canada']*len(Canada_woman_data)
Canada_woman_data['Category'] = ['Women']*len(Canada_woman_data)

Canada_Jewelry_data = df_Canada_CPI_Scaled_clean[Canada_Jewellery_category]
Canada_Jewelry_data.drop(columns = ['Products and product groups'], inplace=True)
Canada_Jewelry_data = Canada_Jewelry_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
Canada_Jewelry_data['Country'] = ['Canada']*len(Canada_Jewelry_data)
Canada_Jewelry_data['Category'] = ['Jewelry']*len(Canada_Jewelry_data)


Canada_children_data = df_Canada_CPI_Scaled_clean[Canada_children_category]
Canada_children_data.drop(columns = ['Products and product groups'], inplace=True)
Canada_children_data = Canada_children_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
Canada_children_data['Country'] = ['Canada']*len(Canada_children_data)
Canada_children_data['Category'] = ['Child']*len(Canada_children_data)

df_clothing_US = pd.concat([US_woman_data, US_Jewelry_data, US_children_data])
df_clothing_Canada = pd.concat([Canada_woman_data, Canada_Jewelry_data, Canada_children_data], axis = 0)

df_clothing = pd.concat([df_clothing_US, df_clothing_Canada], axis = 0)

print(df_clothing.tail())
def plot_clothing(df, product):
    df_cat = df.copy()[df['Category']==product]
    df_cat = df_cat[(df_cat['REF_DATE']>='2017-01-01') & (df_cat['REF_DATE']<='2020-02-01')]
    chart1 = alt.Chart(df_cat).mark_line().encode(
        x='REF_DATE',
        y='VALUE',
        color='Country'
    ).properties(title = product)
    return chart1






In [None]:
plot_clothing(df_clothing, 'Jewelry')

In [None]:
plot_clothing(df_clothing, 'Child')

In [None]:
plot_clothing(df_clothing, 'Women')

In [None]:
import numpy as np
df_clothing_women = df_clothing.copy()[(df_clothing['Category']=='Women') & ((df_clothing['Country']=='Canada') | (df_clothing['Country']=='USA'))]
df_clothing_women = df_clothing_women.copy()[['REF_DATE', 'Country', 'VALUE']]
from sklearn.preprocessing import StandardScaler

scale = StandardScaler()

def std_group(group):
    scale = StandardScaler()
    value = scale.fit_transform(group['VALUE'].values.reshape(-1, 1))
    return value[0][0]
df_clothing_women = df_clothing_women.groupby(['REF_DATE', 'Country']).apply(lambda x: std_group(x)).reset_index()

Canada_woman_CPI = df_Canada_CPI_Scaled[Canada_women_category]
Canada_woman_CPI = Canada_woman_CPI[['REF_DATE', 'VALUE']]
Canada_woman_CPI['Country'] = len(Canada_woman_CPI)*['Canada']
US_woman_CPI = df_USA_CPI_Scaled[US_women_category]
US_woman_CPI['Country'] = len(US_woman_CPI)*['USA']
US_woman_CPI = US_woman_CPI[['REF_DATE', 'VALUE', 'Country']]
US_woman_CPI = US_woman_CPI.groupby(['REF_DATE', 'Country']).std().reset_index()
Canada_woman_CPI = Canada_woman_CPI[['REF_DATE', 'VALUE', 'Country']]
Canada_woman_CPI = Canada_woman_CPI.groupby(['REF_DATE', 'Country']).std().reset_index()
df_clothing_wom = pd.concat([US_woman_CPI, Canada_woman_CPI], axis = 0)
# df_clothing_wom = df_clothing_wom[(df_clothing_wom['REF_DATE']>='2017-01-01')&(df_clothing_wom['REF_DATE']<='2020-02-01')]
alt.Chart(df_clothing_wom).mark_line().encode(
    x = 'REF_DATE',
    y = 'VALUE',
    color = 'Country'
)



In [None]:
for uniq_product in df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'].unique():
    print(uniq_product) 

In [None]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
df_clothing_canada_sales = df_Canada_Number_Groceries[(df_Canada_Number_Groceries['Sales'] == 'Total retail sales') & (df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'] == 'Clothing, clothing accessories, shoes, jewelry, luggage and leather goods retailers [458]')]
df_clothing_canada_sales['VALUE'] = scale.fit_transform(df_clothing_canada_sales['VALUE'].values.reshape(-1, 1))
alt.Chart(df_clothing_canada_sales).mark_line().encode(
    x = 'REF_DATE',
    y = 'VALUE',
    
)

In [None]:
woman_clothing_US_sales = pd.read_csv('../data/raw/MRTSSM44812USS.csv')


In [None]:
woman_clothing_US_sales.head()

In [None]:
scale = StandardScaler()
woman_clothing_US_sales['MRTSSM44812USS'] = scale.fit_transform(woman_clothing_US_sales['MRTSSM44812USS'].values.reshape(-1, 1))
alt.Chart(woman_clothing_US_sales).mark_line().encode(
    x = 'observation_date',
    y = 'MRTSSM44812USS',
)

In [None]:
US_sales_groceries_clothing = pd.read_csv("../data/raw/ClothingGroceriesUSSalesData.csv")

In [None]:


US_sales_groceries_clothing['Clothing Sales'] = US_sales_groceries_clothing['Clothing Sales'].str.replace(',', '')  
US_sales_groceries_clothing['Grocery Sales'] = US_sales_groceries_clothing['Grocery Sales'].str.replace(',', '')
US_sales_groceries_clothing['Clothing Sales'] = US_sales_groceries_clothing['Clothing Sales'].astype(float)
US_sales_groceries_clothing['Grocery Sales'] = US_sales_groceries_clothing['Grocery Sales'].astype(float)
print(US_sales_groceries_clothing.head())

In [None]:
US_sales_groceries_clothing['REF_DATE'] = pd.to_datetime(US_sales_groceries_clothing['Date'])

In [None]:
scale = StandardScaler()
US_sales_groceries_clothing['Clothing Sales'] = scale.fit_transform(US_sales_groceries_clothing['Clothing Sales'].values.reshape(-1, 1))
US_sales_groceries_clothing['Grocery Sales'] = scale.fit_transform(US_sales_groceries_clothing['Grocery Sales'].values.reshape(-1, 1))
US_sales_groceries = US_sales_groceries_clothing[['REF_DATE', 'Grocery Sales']]
US_sales_groceries['Category'] = ['Shelter']*len(US_sales_groceries)
US_sales_groceries['VALUE'] = US_sales_groceries['Grocery Sales']
alt.Chart(US_sales_groceries_clothing).mark_line().encode(
    x = 'REF_DATE',
    y = 'Clothing Sales',
    
)

In [None]:
alt.Chart(US_sales_groceries_clothing).mark_line().encode(
    x = 'REF_DATE',
    y = alt.Y('Clothing Month Difference', title = 'Clothing 12 month difference for Inventory to Sales ratio')
    
)

In [None]:
alt.Chart(US_sales_groceries_clothing).mark_line().encode(
    x = 'REF_DATE',
    y = 'Grocery Sales',
    
).interactive()

In [None]:
df_num_sales_Canada = pd.read_csv("../data/raw/egg_dataset.csv")

df_num_sales_Canada['VALUE'] = df_num_sales_Canada['VALUE'].str.replace(',', '')
df_num_sales_Canada['VALUE'] = df_num_sales_Canada['VALUE'].astype(float)

In [None]:
df_num_sales_Canada['Month'] = df_num_sales_Canada['Date'].str.split('-').str[0]
dict_month = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'}
df_num_sales_Canada['Month'] = df_num_sales_Canada['Month'].map(dict_month)

df_num_sales_Canada['Year'] = ['20']*len(df_num_sales_Canada) + df_num_sales_Canada['Date'].str.split('-').str[1]
df_num_sales_Canada['Day'] = ['01']*len(df_num_sales_Canada)
df_num_sales_Canada['REF_DATE'] = pd.to_datetime(df_num_sales_Canada[['Year', 'Month', 'Day']])


In [None]:

for num in range(2, 58):
    df_num_sales_Canada = df_num_sales_Canada.drop(columns = ['Unnamed: '+str(num)])
scaler = StandardScaler()
df_num_sales_Canada['VALUE'] = scaler.fit_transform(df_num_sales_Canada['VALUE'].values.reshape(-1, 1))
df_num_sales_Canada['Category'] = ['Eggs']*len(df_num_sales_Canada)
alt.Chart(df_num_sales_Canada).mark_line().encode(
    x = 'REF_DATE',
    y = 'VALUE',
    
).interactive()
print(df_num_sales_Canada.head(50))

In [None]:
Sales_Canada_Groceries = pd.read_csv('../data/raw/20100082.csv')
Sales_Canada_Groceries['Month'] = Sales_Canada_Groceries['REF_DATE'].str.split('-').str[0]
Sales_Canada_Groceries['Month'] = Sales_Canada_Groceries['Month'].map(dict_month)

Sales_Canada_Groceries['Year'] = ['20']*len(Sales_Canada_Groceries) + Sales_Canada_Groceries['REF_DATE'].str.split('-').str[1]
Sales_Canada_Groceries['Day'] = ['01']*len(Sales_Canada_Groceries)
Sales_Canada_Groceries['REF_DATE'] = pd.to_datetime(Sales_Canada_Groceries[['Year', 'Month', 'Day']])


In [None]:
Sales_Canada_Groceries['Category'] = Sales_Canada_Groceries['North American Industry Classification System (NAICS)']

print(Sales_Canada_Groceries['Category'].unique())

Sales_Canada_Clothing = Sales_Canada_Groceries[Sales_Canada_Groceries['Category'] == 'Clothing and clothing accessories retailers [4581]']

alt.Chart(Sales_Canada_Clothing).mark_point().encode(
    x = 'REF_DATE',
    y = 'VALUE',
    
).interactive()

In [None]:
regression_discontinuity_sales_canada_groceries, _, _, _ = regression_discontinuity_model(Sales_Canada_Groceries, 'Supermarkets and other grocery retailers (except convenience retailers) [44511]', pd.Timestamp('2017-10-01'), pd.Timestamp('2019-12-01'), pd.Timestamp('2018-07-01'), pd.Timestamp('2018-10-01'), point_line='point')

regression_discontinuity_sales_canada_eggs, _, _, _ = regression_discontinuity_model(df_num_sales_Canada, 'Eggs', pd.Timestamp('2018-03-01'), pd.Timestamp('2019-12-01'), pd.Timestamp('2019-05-01'))

regression_discontinuity_sales_canada_eggs_IP, _, _, _ = regression_discontinuity_model(df_num_sales_Canada, 'Eggs', pd.Timestamp('2017-10-01'), pd.Timestamp('2019-05-01'), pd.Timestamp('2018-03-01'))

regression_discontinuity_sales_usa_groceries, _, _, _ = regression_discontinuity_model(US_sales_groceries, 'Shelter', pd.Timestamp('2017-10-01'), pd.Timestamp('2019-12-01'), pd.Timestamp('2018-02-01'), pd.Timestamp('2018-04-01'))



print(regression_discontinuity_sales_canada_groceries.summary())
print(regression_discontinuity_sales_canada_eggs.summary())
print(regression_discontinuity_sales_canada_eggs_IP.summary())
print(regression_discontinuity_sales_usa_groceries.summary())


In [None]:
#It seems clothing and footwear have a seasonal impact for both countries but
#The american market seemed less impacted by the tariffs and while clothing did appear to reduce supply, the abundance of normal clothing perhaps was able to account for it. In fact it appears manufacturing was not completely impacted except for month to month during tariffs until near COVID period.
#The american clothing market inflation increased during the treatment period due to American tariffs which may have been beneficial for the clothing industry but this was reversed by Canadian tariffs.    
#The Canadian market for clothing and footwear was more impacted by the tariffs and the recovery was not as strong as the American market - the impact of the tariffs from Trump had an estimated effect in March 2019 due to the high supply of clothing already in retail stores. 
#In other words, manufacturing for clothing was affected by August 2018 but impact on retail stores was not until March 2019 and was very disrupted near COVID period.




In [None]:
#This ends the groceries and clothing/footwear section of our analysis.