In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import sys
import altair as alt
sys.path.append('../src')
from models import plot_structure, plot_supply_and_demand, regression_discontinuity_model, differences_differences, plot_for_parallel_trends 

#categories for products and services gotten by LLMs
df_model_data_CAN = pd.read_csv('../data/processed/CAN_Categorized_Products_and_Services_NEW.csv')
df_model_data_USA = pd.read_csv('../data/processed/US_Categorized_Products_and_Services_NEW.csv')

#CPI data for Canada and USA to analyze grocery and clothing and footwear inflation
df_Canada_CPI = pd.read_csv('../data/processed/Canada_CPI_Processed_2018_2019.csv')
df_USA_CPI = pd.read_csv('../data/processed/USA_CPI_Processed_2018_2019.csv')

print(df_model_data_CAN.head())
df_model_data_USA.head()

                                   Product_Service Category
0                                    Durable goods      ???
1                                            Goods      ???
2                             Household operations      ???
3  Household operations, furnishings and equipment      ???
4                        Housing (1986 definition)      ???


Unnamed: 0,Product_Service,Category
0,Airline fares,Transportation
1,Alcoholic beverages,Alcohol & Tobacco
2,Alcoholic beverages at home,Alcohol & Tobacco
3,All items,Uncategorized
4,All items less energy,Energy


In [2]:

#standardize and get scaled and non-scaled data for CPI data
df_USA_CPI_melt = pd.melt(df_USA_CPI, var_name = 'Products and product groups',value_name = 'VALUE', id_vars = 'REF_DATE')

df_Canada_CPI_Scaled = df_Canada_CPI.copy()
df_Canada_CPI_NonScaled = df_Canada_CPI.copy()

df_USA_CPI_Scaled = df_USA_CPI_melt.copy()
df_USA_CPI_NonScaled = df_USA_CPI_melt.copy()
for unique_product in df_Canada_CPI_Scaled['Products and product groups'].unique():
    stan_scale = StandardScaler()
    mask = df_Canada_CPI_Scaled['Products and product groups']==unique_product
    df_Canada_CPI_Scaled.loc[mask, "VALUE"] = stan_scale.fit_transform(df_Canada_CPI_Scaled.loc[mask, "VALUE"].values[:, None]).flatten()
for unique_product in df_USA_CPI_Scaled['Products and product groups'].unique():
    stan_scale = StandardScaler()
    mask = df_USA_CPI_Scaled['Products and product groups']==unique_product
    df_USA_CPI_Scaled.loc[mask, "VALUE"] = stan_scale.fit_transform(df_USA_CPI_Scaled.loc[mask, "VALUE"].values[:, None]).flatten()

In [3]:
#Canada and US datasets scaled and non-scaled and make category columns
dict_CAN = df_model_data_CAN.drop_duplicates(subset=['Product_Service']).set_index('Product_Service')['Category'].to_dict()
dict_USA = df_model_data_USA.drop_duplicates(subset=['Product_Service']).set_index('Product_Service')['Category'].to_dict()

df_Canada_CPI_Scaled['Category'] = df_Canada_CPI['Products and product groups'].map(dict_CAN)
df_Canada_CPI_NonScaled['Category'] = df_Canada_CPI['Products and product groups'].map(dict_CAN)
df_USA_CPI_Scaled['Category'] = df_USA_CPI_Scaled['Products and product groups'].map(dict_USA)
df_USA_CPI_NonScaled['Category'] = df_USA_CPI_NonScaled['Products and product groups'].map(dict_USA)


In [4]:
#make groupby dataframe get median of each category
df_Canada_CPI_Cat = df_Canada_CPI_Scaled.copy().drop(columns = ['Products and product groups'])
df_Canada_CPI_groupby = df_Canada_CPI_Cat.groupby(['REF_DATE', 'Category']).median().reset_index()

df_Canada_CPI_Cat_NonScaled = df_Canada_CPI_NonScaled.copy().drop(columns = ['Products and product groups'])
df_Canada_CPI_groupby_NonScaled = df_Canada_CPI_Cat_NonScaled.groupby(['REF_DATE', 'Category']).median().reset_index()

df_USA_CPI_Cat = df_USA_CPI_Scaled.copy().drop(columns = ['Products and product groups'])
df_USA_CPI_groupby = df_USA_CPI_Cat.groupby(['REF_DATE', 'Category']).median().reset_index()

df_USA_CPI_Cat_NonScaled = df_USA_CPI_NonScaled.copy().drop(columns = ['Products and product groups'])
df_USA_CPI_groupby_NonScaled = df_USA_CPI_Cat_NonScaled.groupby(['REF_DATE', 'Category']).median().reset_index()

In [5]:
#look at all potential categories
for column in df_Canada_CPI_groupby['Category'].unique():
    print(column)

???
Alcohol & Tobacco
Appliances & Equipment
Clothing & Footwear Groups
Clothing and footwear
Communication
Education & Reading
Energy
Furniture & Home Decor
Gardening
Groceries
Health & Personal Care
Household Supplies
Housing & Utilities
Housing, Rent, Tax  & Insurance
Recreation & Entertainment
Services
Shelter
Transportation
Vehicles


In [6]:
#look at plot groceries which was more impacted and compare to housing, rent, tax and insurance which was less impacted by tariffs
plot_for_parallel_trends(df_Canada_CPI_groupby, '2018-01-01', '2018-11-01', 'Groceries', 'Housing, Rent, Tax  & Insurance')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [7]:
#look at china tariffs for differences in differences to get a better idea for one month
differences_differences(df_Canada_CPI_groupby, 'Groceries', 'Housing, Rent, Tax  & Insurance', '2018-07-01', '2018-09-01', '2018-08-01')

  warn("omni_normtest is not valid with less than 8 observations; %i "


0,1,2,3
Dep. Variable:,VALUE,R-squared:,0.995
Model:,OLS,Adj. R-squared:,0.989
Method:,Least Squares,F-statistic:,38.83
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.0252
Time:,21:51:12,Log-Likelihood:,7.6389
No. Observations:,6,AIC:,-7.278
Df Residuals:,2,BIC:,-8.111
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.1294,0.053,21.284,0.000,1.025,1.233
tariff_non_tariffed,-1.7689,0.166,-10.661,0.000,-2.094,-1.444
post,-0.5890,1.085,-0.543,0.587,-2.715,1.537
tariff_non_tariffed:post,-0.2919,1.858,-0.157,0.875,-3.933,3.349

0,1,2,3
Omnibus:,,Durbin-Watson:,1.854
Prob(Omnibus):,,Jarque-Bera (JB):,0.076
Skew:,0.0,Prob(JB):,0.963
Kurtosis:,2.449,Cond. No.,6.32


In [8]:
#It seems like the groupby shows some interaction 
#let us investigate into specific grocery categories to see which groceries may have been impacted by the tariffs

In [9]:

grocery_items = df_Canada_CPI_NonScaled[(df_Canada_CPI_NonScaled['Category'] == 'Groceries')]['Products and product groups'].unique()

grocery_dataframe = df_Canada_CPI_NonScaled[(df_Canada_CPI_NonScaled['Category'] == 'Groceries') | (df_Canada_CPI_NonScaled['Category'] == 'Housing, Rent, Tax  & Insurance')]


dict_grocery = food_categories = {
    'Apples': 'Fruit',
    'Baby foods': 'Prepared Foods',
    'Bakery and cereal products (excluding baby food)': 'Bakery and Cereal',
    'Bakery products': 'Bakery and Cereal',
    'Bananas': 'Fruit',
    'Bread, rolls and buns': 'Bakery and Cereal',
    'Breakfast cereal and other cereal products (excluding baby food)': 'Bakery and Cereal',
    'Butter': 'Dairy',
    'Canned and other preserved fish': 'Fish and Seafood',
    'Canned vegetables and other vegetable preparations': 'Vegetables',
    'Cereal products (excluding baby food)': 'Bakery and Cereal',
    'Cheese': 'Dairy',
    'Coffee': 'Beverages',
    'Coffee and tea': 'Beverages',
    'Condiments, spices and vinegars': 'Condiments and Spices',
    'Confectionery': 'Sweets',
    'Cookies and crackers': 'Bakery and Cereal',
    'Dairy products': 'Dairy',
    'Dairy products and eggs': 'Dairy',
    'Edible fats and oils': 'Fats and Oils',
    'Eggs': 'Dairy',
    'Fish': 'Fish and Seafood',
    'Fish, seafood and other marine products': 'Fish and Seafood',
    'Flour and flour-based mixes': 'Bakery and Cereal',
    'Food': 'General Category',
    'Food and energy': 'General Category',
    'Food purchased from cafeterias and other restaurants': 'Dining Out',
    'Food purchased from fast food and take-out restaurants': 'Dining Out',
    'Food purchased from restaurants': 'Dining Out',
    'Food purchased from stores': 'General Category',
    'Food purchased from table-service restaurants': 'Dining Out',
    'Fresh fruit': 'Fruit',
    'Fresh fruit and vegetables': 'Produce',
    'Fresh milk': 'Dairy',
    'Fresh or frozen beef': 'Meat',
    'Fresh or frozen chicken': 'Meat',
    'Fresh or frozen fish (including portions and fish sticks)': 'Fish and Seafood',
    'Fresh or frozen meat (excluding poultry)': 'Meat',
    'Fresh or frozen pork': 'Meat',
    'Fresh or frozen poultry': 'Meat',
    'Fresh vegetables': 'Vegetables',
    'Frozen and dried vegetables': 'Vegetables',
    'Frozen food preparations': 'Prepared Foods',
    'Fruit juices': 'Beverages',
    'Fruit, fruit preparations and nuts': 'Fruit',
    'Goods excluding food purchased from stores': 'General Category',
    'Goods excluding food purchased from stores and energy': 'General Category',
    'Ham and bacon': 'Meat',
    'Ice cream and related products': 'Dairy',
    'Lettuce': 'Vegetables',
    'Margarine': 'Fats and Oils',
    'Meat': 'Meat',
    'Non-alcoholic beverages': 'Beverages',
    'Non-durable goods excluding food purchased from stores': 'General Category',
    'Non-durable goods excluding food purchased from stores and energy': 'General Category',
    'Nuts and seeds': 'Nuts and Seeds',
    'Oranges': 'Fruit',
    'Pasta products': 'Pasta',
    'Pet food and supplies': 'Pet Supplies',
    'Potatoes': 'Vegetables',
    'Preserved fruit and fruit preparations': 'Fruit',
    'Preserved vegetables and vegetable preparations': 'Vegetables',
    'Processed meat': 'Meat',
    'Rice and rice-based mixes': 'Grains',
    'Seafood and other marine products': 'Fish and Seafood',
    'Soup': 'Prepared Foods',
    'Sugar and confectionery': 'Sweets',
    'Sugar and syrup': 'Sweets',
    'Tea': 'Beverages',
    'Tomatoes': 'Vegetables',
    'Vegetables and vegetable preparations': 'Vegetables',
    'Homeowners': 'Housing, Rent, Tax  & Insurance',
    'Homeowners\' replacement cost': 'Housing, Rent, Tax  & Insurance',
    'Homeowners\' maintenance and repairs': 'Housing, Rent, Tax  & Insurance',
    'Mortgage interest cost': 'Housing, Rent, Tax  & Insurance',
    'Property taxes': 'Housing, Rent, Tax  & Insurance',
    'Rent': 'Housing, Rent, Tax  & Insurance',
    'Rented accommodation': 'Housing, Rent, Tax  & Insurance',
    'Shelter': 'Housing, Rent, Tax  & Insurance',
    'Shelter (1986)': 'Housing, Rent, Tax  & Insurance',
    'Tenants\' maintenance, repairs and other expenses': 'Housing, Rent, Tax  & Insurance'

}

dict_grocery_US = {
    'Canned fruits': 'Fruit',
    'Citrus fruits': 'Fruit',
    'Fresh fruits': 'Fruit',
    'Frozen fruits and vegetables': 'Fruit',
    'Other fresh fruits': 'Fruit',
    'Processed fruits and vegetables': 'Fruit',
    'Fruits and vegetables': 'Fruit',
    'Fresh fruits and vegetables': 'Fruit',
    
    'Canned vegetables': 'Vegetables',
    'Fresh vegetables': 'Vegetables',
    'Frozen vegetables': 'Vegetables',
    'Other fresh vegetables': 'Vegetables',
    
    'Cereals and bakery products': 'Bakery and Cereal',
    'Other bakery products': 'Bakery and Cereal',
    
    'Dairy and related products': 'Dairy',
    
    'Fish and seafood': 'Fish and Seafood',
    'Frozen fish and seafood': 'Fish and Seafood',
    'Processed fish and seafood': 'Fish and Seafood',
    'Shelf stable fish and seafood': 'Fish and Seafood',
    
    'Meats': 'Meat',
    'Meats, poultry, and fish': 'Meat',
    'Meats, poultry, fish, and eggs': 'Meat',
    'Other meats': 'Meat',
    
    'Beverage materials including coffee and tea': 'Beverages',
    
    'Frozen and freeze dried prepared foods': 'Prepared Foods',
    
    'All items less food': 'General Category',
    'All items less food and shelter': 'General Category',
    'Commodities less food': 'General Category',
    'Commodities less food and beverages': 'General Category',
    'Food': 'General Category',
    'Food and beverages': 'General Category',
    'Food at home': 'General Category',
    'Nondurables less food': 'General Category',
    'Nondurables less food and beverages': 'General Category',

    'Other food at home': 'Other Foods',
    'Other foods': 'Other Foods',
    'Other miscellaneous foods': 'Other Foods'
}



US_grocery_dataset = df_USA_CPI_NonScaled[(df_USA_CPI_NonScaled['Category'] == 'Groceries')]



US_grocery_dataset['Category'] = US_grocery_dataset['Products and product groups'].map(dict_grocery_US)

for cat in US_grocery_dataset['Category'].unique():
    try: 
        scale = StandardScaler()
        mask = US_grocery_dataset['Category']==cat
        US_grocery_dataset.loc[mask, 'VALUE'] = scale.fit_transform(US_grocery_dataset.loc[mask, 'VALUE'].values[:, None]).flatten()
    except:
        continue

US_fish_dairy = US_grocery_dataset[(US_grocery_dataset['REF_DATE']>='2018-06-01') & (US_grocery_dataset['REF_DATE']<='2018-09-01') & ((US_grocery_dataset['Category'] == 'Dairy')|(US_grocery_dataset['Category'] == 'Fish and Seafood'))]
print(US_fish_dairy.head(50))
US_fish_dairy_groupby = US_fish_dairy.copy()
US_fish_dairy_groupby = US_fish_dairy_groupby[['REF_DATE', 'Category', 'VALUE']]
US_fish_dairy_groupby = US_fish_dairy_groupby.groupby(['REF_DATE', 'Category']).median().reset_index()



        REF_DATE    Products and product groups     VALUE          Category
2129  2018-06-01     Dairy and related products -0.548498             Dairy
2130  2018-07-01     Dairy and related products -0.677357             Dairy
2131  2018-08-01     Dairy and related products -0.653838             Dairy
2132  2018-09-01     Dairy and related products -0.741490             Dairy
2849  2018-06-01               Fish and seafood  0.780318  Fish and Seafood
2850  2018-07-01               Fish and seafood  0.790353  Fish and Seafood
2851  2018-08-01               Fish and seafood  0.853603  Fish and Seafood
2852  2018-09-01               Fish and seafood  0.823345  Fish and Seafood
3521  2018-06-01        Frozen fish and seafood  0.913757  Fish and Seafood
3522  2018-07-01        Frozen fish and seafood  0.880561  Fish and Seafood
3523  2018-08-01        Frozen fish and seafood  0.949278  Fish and Seafood
3524  2018-09-01        Frozen fish and seafood  1.000974  Fish and Seafood
8225  2018-0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_grocery_dataset['Category'] = US_grocery_dataset['Products and product groups'].map(dict_grocery_US)


In [10]:
#make grocery categories within the grocery category dataframe
grocery_dataframe['Category'] = grocery_dataframe['Products and product groups'].map(dict_grocery)

grocery_dataframe.drop(columns = ['Products and product groups'], inplace=True)
print(grocery_dataframe['Category'].unique())
grocery_dataframe_groupby = grocery_dataframe.groupby(['REF_DATE', 'Category']).mean().reset_index()

['Fruit' 'Prepared Foods' 'Bakery and Cereal' 'Dairy' 'Fish and Seafood'
 'Vegetables' 'Beverages' 'Condiments and Spices' 'Sweets' 'Fats and Oils'
 'General Category' 'Dining Out' 'Produce' 'Meat' 'Nuts and Seeds' 'Pasta'
 'Pet Supplies' 'Housing, Rent, Tax  & Insurance' 'Grains' nan]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grocery_dataframe['Category'] = grocery_dataframe['Products and product groups'].map(dict_grocery)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grocery_dataframe.drop(columns = ['Products and product groups'], inplace=True)


In [11]:
fish_vs_dairy = plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'Fish and Seafood', 'Dairy', title = 'Fish vs Diary')

fish_vs_dairy

fish_dairy_US = alt.Chart(US_fish_dairy_groupby).mark_line().encode(
    x = 'REF_DATE',
    y = 'VALUE',
    color = 'Category:N'
).properties(
    title = 'Dairy and Fish and Seafood CPI in USA')

fish_dairy_US

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [12]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'Grains', 'Pasta', 'Vegetables')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [13]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'Sweets', 'Fruit')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [14]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'Meat', 'Pet Supplies')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [15]:
plot_for_parallel_trends(grocery_dataframe_groupby, '2018-06-01', '2018-09-01', 'General Category', 'Dining Out')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [16]:
#It seems most items follow a sharp downward trend in august when china tariffs were implemented and were not as affected by Canadian tariffs
#Only dairy and seafood and fish seem to follow the parallel trends assumption 
#let us investigate differences in differences to get an idea of the impact of the tariffs on groceries that followed the aforementioned pattern
#these groceries seem to have to do with everyday food group except carbohydrates and vegetables
differences_differences(grocery_dataframe_groupby, 'Fish and Seafood', 'Dairy', '2018-07-01', '2018-09-01', '2018-08-01')

  warn("omni_normtest is not valid with less than 8 observations; %i "


0,1,2,3
Dep. Variable:,VALUE,R-squared:,0.855
Model:,OLS,Adj. R-squared:,0.637
Method:,Least Squares,F-statistic:,4.082
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.203
Time:,21:51:13,Log-Likelihood:,-2.7231
No. Observations:,6,AIC:,13.45
Df Residuals:,2,BIC:,12.61
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.3060,0.558,-0.548,0.584,-1.401,0.789
tariff_non_tariffed,0.7692,0.933,0.824,0.410,-1.060,2.598
post,1.6425,0.558,2.941,0.003,0.548,2.737
tariff_non_tariffed:post,-3.7570,2.207,-1.702,0.089,-8.083,0.569

0,1,2,3
Omnibus:,,Durbin-Watson:,1.341
Prob(Omnibus):,,Jarque-Bera (JB):,0.476
Skew:,-0.0,Prob(JB):,0.788
Kurtosis:,1.621,Cond. No.,6.32


In [17]:
#It seems the groupby shows a significant difference but when looking at the individual items, the difference is not significant
#perhaps there was not enough data and the set was underpowered but it provides some evidence from the plots that the tariffs may have had a slight impact on goods - let us investigate further

In [18]:
#df_sales for Canada and USA
df_sales = pd.read_csv("../data/processed/Canada_Sales_Processed_Final.csv")

In [19]:
df_sales.columns = ['PrincipleStats', 'GoodType', 'REF_DATE', 'VALUE']
df_sales['PrincipleStats'].unique()

array(['Finished goods manufactured, estimated values at end of  month',
       'Goods or work in process, estimated values at end of month',
       'New orders, estimated values of orders received during month',
       'Raw materials, fuel, supplies, components, estimated values at end of month',
       'Sales of goods manufactured (shipments)',
       'Total inventory, estimated values of total inventory at end of the month',
       'Unfilled orders, estimated values of orders at end of month'],
      dtype=object)

In [20]:
for column in df_sales['GoodType'].unique():
    print(column)

Aerospace product and parts manufacturing [3364]
Apparel manufacturing [315]
Beverage and tobacco product manufacturing [312]
Chemical manufacturing [325]
Chocolate and chocolate confectionery manufacturing [31135]
Computer and electronic product manufacturing [334]
Durable goods industries
Electrical equipment, appliance and component manufacturing [335]
Fabricated metal product manufacturing [332]
Flour mixes, dough, and pasta manufacturing from purchased flour [311824]
Food manufacturing [311]
Furniture and related product manufacturing [337]
Industrial machinery manufacturing [33324]
Infants' cut and sew clothing manufacturing [315241]
Leather and allied product manufacturing [316]
Machinery manufacturing [333]
Manufacturing [31-33]
Miscellaneous manufacturing [339]
Motor vehicle body and trailer manufacturing [3362]
Motor vehicle manufacturing [3361]
Motor vehicle parts manufacturing [3363]
Non-durable goods industries
Non-metallic mineral product manufacturing [327]
Other cut and

In [21]:
#On august the unfilled orders was lowered meaning less food manufacturing demand and less supply to retail store and more inflation for that month
plot_supply_and_demand(df_sales, "Food manufacturing [311]", x_label = 'Date', y_label = 'Food Manufacturing Supply and Demand')

6524    7613953.0
6525    7619427.0
6526    7640012.0
6527    7686838.0
6528    7672824.0
Name: VALUE, dtype: float64
7748    617683.0
7749    644924.0
7750    650985.0
7751    640994.0
7752    594251.0
Name: VALUE, dtype: float64


In [22]:
#it seems more inventory meant that less orders were filled - this could be due to the tariffs - and it also seems that there were fewer unfilled orders meaning less demand for food manufacturing which means supply went down and inflation should go up.
#but since demand and inventory are similar, it is hard to say if the tariffs had an impact on inflation and the original models without the groupby may be right 

In [23]:
#lets look earlier
plot_supply_and_demand(df_sales, "Food manufacturing [311]", '2017-10', '2018-06')


6524    7613953.0
6525    7619427.0
6526    7640012.0
6527    7686838.0
6528    7672824.0
Name: VALUE, dtype: float64
7748    617683.0
7749    644924.0
7750    650985.0
7751    640994.0
7752    594251.0
Name: VALUE, dtype: float64


In [24]:
model_Canada_RC_groceries_first_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Groceries', '2017-10', '2018-06', '2018-03', '2018-04')

model_Canada_RC_groceries_US_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Groceries', '2017-10', '2018-12', '2018-07', '2018-10')



Product:  Groceries


  return hypotest_fun_in(*args, **kwds)


5
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.646
Model:                            OLS   Adj. R-squared:                  0.527
Method:                 Least Squares   F-statistic:                     3.444
Date:                Fri, 11 Apr 2025   Prob (F-statistic):              0.101
Time:                        21:51:13   Log-Likelihood:               -0.20393
No. Observations:                   9   AIC:                             6.408
Df Residuals:                       6   BIC:                             7.000
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.6366      0.231      

9
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.661
Model:                            OLS   Adj. R-squared:                  0.605
Method:                 Least Squares   F-statistic:                     12.69
Date:                Fri, 11 Apr 2025   Prob (F-statistic):            0.00109
Time:                        21:51:13   Log-Likelihood:               -0.92772
No. Observations:                  15   AIC:                             7.855
Df Residuals:                      12   BIC:                             9.980
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.6036      0.121      

  return hypotest_fun_in(*args, **kwds)


In [25]:

print(model_Canada_RC_groceries_first_tariff.summary())
print(model_Canada_RC_groceries_US_tariff.summary()) 

                            OLS Regression Results                            
Dep. Variable:          VALUE_DETREND   R-squared:                       0.260
Model:                            OLS   Adj. R-squared:                 -0.184
Method:                 Least Squares   F-statistic:                    0.3956
Date:                Fri, 11 Apr 2025   Prob (F-statistic):              0.762
Time:                        21:51:13   Log-Likelihood:                 1.0298
No. Observations:                   9   AIC:                             5.940
Df Residuals:                       5   BIC:                             6.729
Df Model:                           3                                         
Covariance Type:                  HC3                                         
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept                 -0

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


In [26]:
model_Canada_RC_groceries_second_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Groceries', '2018-05', '2018-12', '2018-07', '2018-10')

Product:  Groceries


2
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.778
Model:                            OLS   Adj. R-squared:                  0.689
Method:                 Least Squares   F-statistic:                     8.231
Date:                Fri, 11 Apr 2025   Prob (F-statistic):             0.0262
Time:                        21:51:13   Log-Likelihood:                0.20998
No. Observations:                   8   AIC:                             5.580
Df Residuals:                       5   BIC:                             5.818
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5556      0.161      

  return hypotest_fun_in(*args, **kwds)


In [27]:
model_Canada_RC_groceries_second_tariff.summary()

  return hypotest_fun_in(*args, **kwds)


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.529
Model:,OLS,Adj. R-squared:,0.175
Method:,Least Squares,F-statistic:,0.6311
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.633
Time:,21:51:13,Log-Likelihood:,2.1432
No. Observations:,8,AIC:,3.714
Df Residuals:,4,BIC:,4.031
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.2901,0.824,-0.352,0.725,-1.906,1.326
PredTreatment,0.6103,1.546,0.395,0.693,-2.420,3.641
Num_Date,0.1049,0.189,0.554,0.580,-0.266,0.476
PredTreatment:Num_Date,-0.3065,0.565,-0.543,0.587,-1.413,0.800

0,1,2,3
Omnibus:,0.207,Durbin-Watson:,2.599
Prob(Omnibus):,0.902,Jarque-Bera (JB):,0.198
Skew:,0.229,Prob(JB):,0.906
Kurtosis:,2.38,Cond. No.,19.4


In [28]:
first_tariff_vs_second_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Groceries', '2017-10', '2018-12', '2018-05')

first_tariff_vs_second_tariff.summary()

Product:  Groceries


  return hypotest_fun_in(*args, **kwds)


7


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.064
Model:,OLS,Adj. R-squared:,-0.192
Method:,Least Squares,F-statistic:,0.09946
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.959
Time:,21:51:13,Log-Likelihood:,1.2596
No. Observations:,15,AIC:,5.481
Df Residuals:,11,BIC:,8.313
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.1487,0.365,-0.408,0.684,-0.864,0.566
above_or_below,0.1979,0.444,0.446,0.656,-0.672,1.067
Num_Date,-0.0218,0.074,-0.296,0.767,-0.166,0.122
above_or_below:Num_Date,0.0132,0.094,0.141,0.888,-0.170,0.197

0,1,2,3
Omnibus:,0.18,Durbin-Watson:,2.57
Prob(Omnibus):,0.914,Jarque-Bera (JB):,0.106
Skew:,-0.138,Prob(JB):,0.948
Kurtosis:,2.693,Cond. No.,22.2


In [29]:
#it seems like american tariffs had little immediate impact on Canadian grocery retailers when looking at manufacturing even the only month affected was the China tariffs in August as mentioned supply was still high - it is hard to disentangle the impact of the tariffs and the news so perhaps we should look for a more long term approach to gain more insight to what happened with groceries


Canada_parallel_trends_groceries = plot_for_parallel_trends(df_Canada_CPI_groupby, '2017-01-01', '2019-10-01', 'Groceries', 'Education & Reading')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [30]:
#it is hard to quantify the impact of groceries based on specific tariffs due to the serial correlation of tariffs but looking from the start of the breakdown in China talks to the Chinese hike tax we will look at RD between these periods and see if we can draw conclusions
#there does seem to be greater variance from the start of the Intellectual Property Trials to the Chinese hike in tariffs that started propagating like a wave with each tariff with minimal activity following the results of the IP investigation - let us investigate further
model_groceries_longterm_IP, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Groceries', '2017-01', '2018-02', '2017-08', heteroskedasticity = 'nonrobust')

model_groceries_longterm_IP.summary()

Product:  Groceries


6


  return hypotest_fun_in(*args, **kwds)


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.547
Model:,OLS,Adj. R-squared:,0.397
Method:,Least Squares,F-statistic:,3.628
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.0579
Time:,21:51:13,Log-Likelihood:,7.9986
No. Observations:,13,AIC:,-7.997
Df Residuals:,9,BIC:,-5.737
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0225,0.107,0.210,0.838,-0.220,0.265
above_or_below,-0.4347,0.181,-2.397,0.040,-0.845,-0.024
Num_Date,-0.0218,0.030,-0.733,0.482,-0.089,0.045
above_or_below:Num_Date,0.1090,0.048,2.276,0.049,0.001,0.217

0,1,2,3
Omnibus:,5.65,Durbin-Watson:,2.685
Prob(Omnibus):,0.059,Jarque-Bera (JB):,1.42
Skew:,-0.142,Prob(JB):,0.492
Kurtosis:,1.406,Cond. No.,19.4


In [31]:
#Next lets compare with RD the tariff / IP trails period to the Chinese hike in tariffs
model_groceries_longterm_recovery, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Groceries', '2018-02', '2019-05', '2018-11', '2019-01')

print(model_groceries_longterm_recovery.summary())



Product:  Groceries


9
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.582
Model:                            OLS   Adj. R-squared:                  0.518
Method:                 Least Squares   F-statistic:                     5.387
Date:                Fri, 11 Apr 2025   Prob (F-statistic):             0.0198
Time:                        21:51:13   Log-Likelihood:               -0.66959
No. Observations:                  16   AIC:                             7.339
Df Residuals:                      13   BIC:                             9.657
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5693      0.173      

  return hypotest_fun_in(*args, **kwds)


                            OLS Regression Results                            
Dep. Variable:          VALUE_DETREND   R-squared:                       0.157
Model:                            OLS   Adj. R-squared:                 -0.053
Method:                 Least Squares   F-statistic:                    0.7197
Date:                Fri, 11 Apr 2025   Prob (F-statistic):              0.559
Time:                        21:51:13   Log-Likelihood:               -0.39384
No. Observations:                  16   AIC:                             8.788
Df Residuals:                      12   BIC:                             11.88
Df Model:                           3                                         
Covariance Type:                  HC3                                         
                             coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------------
Intercept                 -0

  return hypotest_fun_in(*args, **kwds)


In [32]:
#The IP trials overall impact including the US tariffs on Canada shows a reduction on inflation in groceries in Canada while the Chinese hike in tariffs to the end of the data does not show a significant impact on groceries with the groupby

#the smaller impact of each tariff appear to cause a wave that ripples through time causing greater variance in the data, each individual month of tariffs increased inflation at the treatment period but appeared to reduce inflation the following month before another wave occurs. 

#overall groceries were not as impacted when grouped together as there were some groceries influenced such as seafood while other groceries such as dairy remained unimpacted 


#In other words the tariffs had a short term impact on inflation but contributed to larger ripples in waves that caused greater variance in the data.

#The news of breakdown in China talks momentarily caused economic shock that influenced the agricultural industry to decrease food supply in Canada and increase inflation
# and the proceeding recovery after the break down in China talks and beginning of IP trade investigations, did appear to make grocery inflation lower for a short period of time.





In [33]:

#let us plot the char for sales for food manufucturing during this time period again for further investigate invesigation

Canada_groceries_supply_demand = plot_supply_and_demand(df_sales, "Food manufacturing [311]", "2017-02-01", "2019-12-01", point_line='line', x_label='Date', y_label = 'Canadian Groceries Supply and Demand', title = 'Canadian Groceries Supply and Demand 2017-2019', text_height = 0.8)

display(Canada_groceries_supply_demand)

Canada_groceries_supply_demand_05_12 = plot_supply_and_demand(df_sales, "Food manufacturing [311]", "2018-05-01", "2018-12-01", point_line='line', x_label='Date', y_label = 'Canadian Groceries Supply and Demand', title = 'Canadian Groceries Supply and Demand 2017-2019')

display(Canada_groceries_supply_demand_05_12)

Canada_groceries_supply_demand_10_05 = plot_supply_and_demand(df_sales, "Food manufacturing [311]", "2017-10-01", "2018-05-01", point_line='line', x_label='Date', y_label = 'Canadian Groceries Supply and Demand', title = 'Canadian Groceries Supply and Demand 2017-2019')

Canada_groceries_supply_demand_10_05

6524    7613953.0
6525    7619427.0
6526    7640012.0
6527    7686838.0
6528    7672824.0
Name: VALUE, dtype: float64
7748    617683.0
7749    644924.0
7750    650985.0
7751    640994.0
7752    594251.0
Name: VALUE, dtype: float64


6524    7613953.0
6525    7619427.0
6526    7640012.0
6527    7686838.0
6528    7672824.0
Name: VALUE, dtype: float64
7748    617683.0
7749    644924.0
7750    650985.0
7751    640994.0
7752    594251.0
Name: VALUE, dtype: float64


6524    7613953.0
6525    7619427.0
6526    7640012.0
6527    7686838.0
6528    7672824.0
Name: VALUE, dtype: float64
7748    617683.0
7749    644924.0
7750    650985.0
7751    640994.0
7752    594251.0
Name: VALUE, dtype: float64


In [34]:
#It appears that demand for food manufacturing was lower than supply during the trade talk breakdown which means supply for food was limited during the trade talks which caused inflation but demand rose following the trade talk breakdown and remained unaffected throughout the tariffs which is why groceries were largely unaffected



In [35]:
#while inventory did increase meaning a decrease in supply, there was still enough demand and that is why groceries were not impacted despite the sharp decrease in August 2018 perhaps mainly due to the China tariffs on US goods.



In [36]:
#let us look at what happened in American groceries due to the tariffs and compare the effect of the tariffs on groceries for US tariffs vs Canadian tariffs
#they appear to have a similar effect on American goods

plot_structure(df_USA_CPI_groupby, 'Groceries', '2017-01-01', '2020-02-01')



Product:  Groceries


In [37]:
#It appears that the American grocery dataset follows the Canadian with waves except the breakdown in china talks led to a decrease in inflation for Canadian groceries but an increase for American groceries
# We will look at the IP investigation later but it appears trumps tariffs on Canadian goods had only a small impact on American groceries but the combination of Canadian announcement of tariffs, canadian tariffs and chinese tariffs on American exported goods may have had a larger impact, let us investigate further
model_USA_RC_grocery_cutoff_first_tariff, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Groceries', '2017-10', '2018-06', '2018-03', '2018-04')

print(model_USA_RC_grocery_cutoff_first_tariff.summary())
model_USA_RC_grocery_cutoff_second_tariff, _, _, _= regression_discontinuity_model(df_USA_CPI_groupby, 'Groceries', '2018-05', '2018-12', '2018-07', '2018-10')

print(model_USA_RC_grocery_cutoff_second_tariff.summary())

model_longterm_impact_groceries, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Groceries', '2017-10-01', '2019-10', '2018-03', '2019-05')

print(model_longterm_impact_groceries.summary())



Product:  Groceries


  return hypotest_fun_in(*args, **kwds)


5
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.646
Model:                            OLS   Adj. R-squared:                  0.527
Method:                 Least Squares   F-statistic:                     3.444
Date:                Fri, 11 Apr 2025   Prob (F-statistic):              0.101
Time:                        21:51:13   Log-Likelihood:               -0.20393
No. Observations:                   9   AIC:                             6.408
Df Residuals:                       6   BIC:                             7.000
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.6366      0.231      

  return hypotest_fun_in(*args, **kwds)


2
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.778
Model:                            OLS   Adj. R-squared:                  0.689
Method:                 Least Squares   F-statistic:                     8.231
Date:                Fri, 11 Apr 2025   Prob (F-statistic):             0.0262
Time:                        21:51:13   Log-Likelihood:                0.20998
No. Observations:                   8   AIC:                             5.580
Df Residuals:                       5   BIC:                             5.818
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5556      0.161      

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


4
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.714
Model:                            OLS   Adj. R-squared:                  0.687
Method:                 Least Squares   F-statistic:                     95.66
Date:                Fri, 11 Apr 2025   Prob (F-statistic):           2.82e-11
Time:                        21:51:13   Log-Likelihood:                -1.6041
No. Observations:                  24   AIC:                             9.208
Df Residuals:                      21   BIC:                             12.74
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5309      0.135      

In [38]:
#It appear american groceries were more impacted than canadian groceries for tariffs, perhaps this impact was more measureable since Canada had higher variance changes
#It is also important to note that Canada has a lower population and are more likely to have higher variance as a result over time
#but American tariffs on American goods did significantly increase inflation in the post period and treatment period while Canadian tariffs appeared to lower the tariffs post period but increase tariffs during the treatment period signficant. Significance was not found for the two tariff periods in the Canadian groceries.

#The tariffs only caused a small blimp in the groceries while long term impact was harder to measure - let us investigate further



In [39]:
print(df_USA_CPI_groupby['Category'].unique())
USA_parallel_trends_groceries = plot_for_parallel_trends(df_USA_CPI_groupby, '2017-01-01', '2019-12-01', 'Groceries', 'Education & Reading', category_3 = None)

USA_parallel_trends_groceries
print(df_USA_CPI_Scaled.columns)
df_groceries_USA_grouped = df_USA_CPI_Scaled[df_USA_CPI_Scaled['Category'] == 'Groceries']

df_groceries_USA_grouped['Category'] = df_groceries_USA_grouped['Products and product groups'].map(dict_grocery_US)

print(df_groceries_USA_grouped['Category'].unique())

['Alcohol & Tobacco' 'Appliances & Equipment' 'Clothing and footwear'
 'Communication' 'Education & Reading' 'Energy' 'Furniture & Home Decor'
 'Groceries' 'Health & Personal Care' 'Household Supplies'
 'Housing & Utilities' 'Housing, Rent, Tax & Insurance'
 'Recreation & Entertainment' 'Services' 'Shelter' 'Shelter group'
 'Transportation' 'Uncategorized' 'Vehicles']
Index(['REF_DATE', 'Products and product groups', 'VALUE', 'Category'], dtype='object')
['General Category' 'Beverages' 'Fruit' nan 'Vegetables'
 'Bakery and Cereal' 'Dairy' 'Fish and Seafood' 'Prepared Foods' 'Meat'
 'Other Foods']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_groceries_USA_grouped['Category'] = df_groceries_USA_grouped['Products and product groups'].map(dict_grocery_US)


In [40]:
differences_differences(df_USA_CPI_Scaled, 'Groceries', 'Education & Reading', '2018-03-01', '2018-05-01', '2018-04-01')

0,1,2,3
Dep. Variable:,VALUE,R-squared:,0.022
Model:,OLS,Adj. R-squared:,-0.001
Method:,Least Squares,F-statistic:,4.298
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.00629
Time:,21:51:13,Log-Likelihood:,-190.07
No. Observations:,135,AIC:,388.1
Df Residuals:,131,BIC:,399.8
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.4305,0.093,-4.627,0.000,-0.613,-0.248
tariff_non_tariffed,0.4894,0.150,3.257,0.001,0.195,0.784
post,0.1941,0.198,0.979,0.328,-0.195,0.583
tariff_non_tariffed:post,-0.2021,0.294,-0.688,0.492,-0.778,0.374

0,1,2,3
Omnibus:,21.313,Durbin-Watson:,0.794
Prob(Omnibus):,0.0,Jarque-Bera (JB):,27.505
Skew:,0.898,Prob(JB):,1.06e-06
Kurtosis:,4.29,Cond. No.,12.8


In [41]:
differences_differences(df_USA_CPI_groupby, 'Groceries', 'Education & Reading', '2018-03-01', '2018-05-01', '2018-04-01')

  warn("omni_normtest is not valid with less than 8 observations; %i "


0,1,2,3
Dep. Variable:,VALUE,R-squared:,0.889
Model:,OLS,Adj. R-squared:,0.722
Method:,Least Squares,F-statistic:,3.709
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.22
Time:,21:51:14,Log-Likelihood:,-1.9228
No. Observations:,6,AIC:,11.85
Df Residuals:,2,BIC:,11.01
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.1819,0.420,-2.814,0.005,-2.005,-0.359
tariff_non_tariffed,2.3071,0.817,2.825,0.005,0.707,3.908
post,1.2186,0.753,1.618,0.106,-0.257,2.694
tariff_non_tariffed:post,-2.2674,1.030,-2.201,0.028,-4.287,-0.248

0,1,2,3
Omnibus:,,Durbin-Watson:,1.427
Prob(Omnibus):,,Jarque-Bera (JB):,0.341
Skew:,-0.0,Prob(JB):,0.843
Kurtosis:,1.833,Cond. No.,6.32


In [42]:


#differences in differences shows little impact for American groceries however on very short term impact (1 month) whereas in the Canadian dataset there was more of an impact for American tariffs on Canadian goods - let us examine why with datasets of export and import





In [43]:
USA_sales = pd.read_csv("../data/processed/USA_Sales_Processed_Final.csv")
print(USA_sales.tail())


                                       cat_idx  \
559309  Aluminum and Nonferrous Metal Products   
559310  Aluminum and Nonferrous Metal Products   
559311  Aluminum and Nonferrous Metal Products   
559312  Aluminum and Nonferrous Metal Products   
559313  Aluminum and Nonferrous Metal Products   

                                   dt_idx  geo_idx  is_adj  val    REF_DATE  \
559309  New Orders Percent Change Monthly        1       0 -3.1  2022-12-01   
559310  New Orders Percent Change Monthly        1       1  2.3  2022-01-01   
559311  New Orders Percent Change Monthly        1       1 -1.6  2022-02-01   
559312  New Orders Percent Change Monthly        1       1  2.9  2022-03-01   
559313  New Orders Percent Change Monthly        1       1  5.0  2022-04-01   

                           PrincipleStats  \
559309  New Orders Percent Change Monthly   
559310  New Orders Percent Change Monthly   
559311  New Orders Percent Change Monthly   
559312  New Orders Percent Change Monthly   

  USA_sales = pd.read_csv("../data/processed/USA_Sales_Processed_Final.csv")


In [44]:
USA_sales['PrincipleStats'] = USA_sales['dt_idx']
USA_sales['GoodType'] = USA_sales['cat_idx']
USA_sales['VALUE'] = USA_sales["val"]



In [45]:
for column in USA_sales['PrincipleStats'].unique():
    print(column)

print(sep = '\n')
for column in USA_sales['GoodType'].unique():
    print(column)
print(sep = '\n')
for column in USA_sales.columns:
    print(column)

USA_sales['PrincipleStats'] = USA_sales["PrincipleStats"].astype(str)
USA_sales['GoodType'] = USA_sales['GoodType'].astype(str)

USA_sales.to_csv("../data/processed/USA_Sales_Processed_Final.csv", index=False)
df_sales.to_csv("../data/processed/Canada_Sales_Processed_Final.csv", index=False)

Value of Shipments
New Orders
Unfilled Orders
Total Inventories
Materials and Supplies Inventories
Work in Process Inventories
Finished Goods Inventories
Inventories to Shipments Ratios
Unfilled Orders to Shipments Ratios
Value of Shipments Percent Change Monthly
New Orders Percent Change Monthly
Unfilled Orders Percent Change Monthly
Total Inventories Percent Change Monthly
Materials & Supplies Inventories Percent Change Monthly
Work in Progress Inventories Percent Change Monthly
Finished Goods Inventories Percent Change Monthly

Total Manufacturing
Manufacturing Excluding Transportation
Manufacturing Excluding Defense
Manufacturing with Unfilled Orders
Durable Goods
Wood Products
Nonmetallic Mineral Products
Primary Metals
Iron and Steel Mills and Ferroalloy and Steel Product Manufacturing
Aluminum and Nonferrous Metal Products
Ferrous Metal Foundries
Fabricated Metal Products
Machinery
Farm Machinery and Equipment Manufacturing
Construction Machinery Manufacturing
Mining, Oil, and G

In [46]:
for column in USA_sales['GoodType'].unique():
    print(column)
    print(USA_sales[(USA_sales['GoodType']==column) & (USA_sales['PrincipleStats']=='Finished Goods Inventories Percent Change Monthly') & (USA_sales['REF_DATE']>='2017-01-01')&(USA_sales['REF_DATE']<='2020-02-01')])


USA_groceries_supply_demand = plot_supply_and_demand(USA_sales, "Food Products", '2017-01-01', '2019-12-01', "Finished Goods Inventories", "Inventories to Shipments Ratios", point_line='line', x_label = 'Date', y_label = 'Groceries Manufacturing for USA Finished Goods and Inventories to Shipment Ratio', title = 'Supply and Demand for US Grocery Manufacturing', text_height = 1.6)
display(USA_groceries_supply_demand)

USA_groceries_supply_demand_05_12 = plot_supply_and_demand(USA_sales, "Food Products", '2018-05-01', '2018-12-01', "Finished Goods Inventories", "Inventories to Shipments Ratios", point_line='line', x_label = 'Date', y_label = 'Groceries Manufacturing for USA Finished Goods and Inventories to Shipment Ratio', title = 'Supply and Demand for US Grocery Manufacturing', text_height=1.6)
USA_groceries_supply_demand_05_12

USA_groceries_supply_demand_10_05 = plot_supply_and_demand(USA_sales, "Food Products", '2017-10-01', '2018-05-01', "Finished Goods Inventories", "Inventories to Shipments Ratios", point_line='line', x_label = 'Date', y_label = 'Groceries Manufacturing for USA Finished Goods and Inventories to Shipment Ratio', title = 'Supply and Demand for US Grocery Manufacturing', text_height=1.6)
USA_groceries_supply_demand_10_05
mask = USA_sales['PrincipleStats'] == 'Inventories to Shipments Ratios'
US_sale = USA_sales[mask]
US_sale['Category'] = US_sale['GoodType']
US_sale['VALUE'] = US_sale['VALUE'].rolling(window=3).mean()
model, _, _, _ = regression_discontinuity_model(US_sale, "Food Products", '2018-01', '2018-10', '2018-07', heteroskedasticity = 'nonrobust')
model.summary()

Total Manufacturing
                    cat_idx  \
470138  Total Manufacturing   
470139  Total Manufacturing   
470140  Total Manufacturing   
470141  Total Manufacturing   
470142  Total Manufacturing   
...                     ...   
504961  Total Manufacturing   
522338  Total Manufacturing   
522339  Total Manufacturing   
522350  Total Manufacturing   
522351  Total Manufacturing   

                                                   dt_idx  geo_idx  is_adj  \
470138  Finished Goods Inventories Percent Change Monthly        1       0   
470139  Finished Goods Inventories Percent Change Monthly        1       0   
470140  Finished Goods Inventories Percent Change Monthly        1       0   
470141  Finished Goods Inventories Percent Change Monthly        1       0   
470142  Finished Goods Inventories Percent Change Monthly        1       0   
...                                                   ...      ...     ...   
504961  Finished Goods Inventories Percent Change Monthly    

10228    14680.0
10229    14534.0
10230    14092.0
10231    13614.0
10232    13834.0
Name: VALUE, dtype: float64
10252    0.97
10253    0.89
10254    0.84
10255    0.85
10256    0.82
Name: VALUE, dtype: float64
10228    14680.0
10229    14534.0
10230    14092.0
10231    13614.0
10232    13834.0
Name: VALUE, dtype: float64
10252    0.97
10253    0.89
10254    0.84
10255    0.85
10256    0.82
Name: VALUE, dtype: float64
Product:  Food Products


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sale['Category'] = US_sale['GoodType']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sale['VALUE'] = US_sale['VALUE'].rolling(window=3).mean()


6


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.014
Model:,OLS,Adj. R-squared:,-0.171
Method:,Least Squares,F-statistic:,0.07464
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.973
Time:,21:51:19,Log-Likelihood:,26.009
No. Observations:,20,AIC:,-44.02
Df Residuals:,16,BIC:,-40.03
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0143,0.036,-0.403,0.692,-0.090,0.061
above_or_below,0.0146,0.087,0.168,0.869,-0.170,0.199
Num_Date,-0.0046,0.010,-0.472,0.643,-0.026,0.016
above_or_below:Num_Date,0.0038,0.038,0.100,0.922,-0.077,0.085

0,1,2,3
Omnibus:,19.527,Durbin-Watson:,2.367
Prob(Omnibus):,0.0,Jarque-Bera (JB):,34.828
Skew:,1.412,Prob(JB):,2.74e-08
Kurtosis:,8.815,Cond. No.,18.6


In [47]:
#It seems that the tariffs increased finished inventory percentage change per month which decreased the following month meaning demand was changing based on the tariff changes in the news notably on 2018-04 and 2018-08 which indicates an instability in the market demand coinciding with momentary inflation for that month for American groceries.

In [48]:
#lets compare the change in variance of groceries in America to Canada to compare the effect of tariffs on groceries in both countries

mask_USA_groceries = df_USA_CPI_Scaled['Category'] == 'Groceries'
mask_Canada_groceries = df_Canada_CPI_Scaled['Category'] == 'Groceries'

df_USA_CPI_Scaled_groceries = df_USA_CPI_Scaled[mask_USA_groceries]
df_Canada_CPI_Scaled_groceries = df_Canada_CPI_Scaled[mask_Canada_groceries]

df_USA_CPI_Scaled_groceries['Country'] = ['USA']*len(df_USA_CPI_Scaled_groceries)
df_Canada_CPI_Scaled_groceries['Country'] = ['Canada']*len(df_Canada_CPI_Scaled_groceries)

df_USA_CPI_Scaled_groceries.drop(columns = ['Products and product groups'], inplace=True)
df_Canada_CPI_Scaled_groceries.drop(columns = ['Products and product groups'], inplace=True)

df_USA_CPI_Scaled_groc_var = df_USA_CPI_Scaled_groceries.groupby(['REF_DATE', 'Category', 'Country']).std().reset_index()
df_Canada_CPI_Scaled_groc_var = df_Canada_CPI_Scaled_groceries.groupby(['REF_DATE', 'Category', 'Country']).std().reset_index()

df_USA_Canada_groceries = pd.concat([df_USA_CPI_Scaled_groc_var, df_Canada_CPI_Scaled_groc_var])

variance_groceries_CPI = plot_structure(df_USA_Canada_groceries, 'Groceries', '2017-01-01', '2020-02-01', color = 'Country', x_label = "Date", y_label = "Variance of CPI index for Groceries", text_to_plot = ['Breakdown in China talks impact on US Groceries', 'IP Investigation effect on Canada Groceries', 'End of Canada/US Tariff War', 'Beginning of effect of COVID-19'], lines_to_plot = ['2017-08-01', '2017-11-01', '2019-05-01', '2019-11-01'], title = 'CPI Variance for Groceries in Canada and USA')



Product:  Groceries


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_USA_CPI_Scaled_groceries['Country'] = ['USA']*len(df_USA_CPI_Scaled_groceries)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_groceries['Country'] = ['Canada']*len(df_Canada_CPI_Scaled_groceries)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_USA_CPI_Scaled_groceries.drop(columns = ['Products and product groups'], inplace=True)

In [49]:
#It seems like the break down of the China talks decreased variance and the proceeding IP investigation increased variance in America first while Canada had a lag in their decrease.
#The IP investigation and the tariffs appeared to increase variance following the breakdown in talks meaning there was recooperation but America did not quite recover following the end of the tariffs in May 2019.
# the variance of the CPI decrease may mean that the consumer market relied on only essential grocery goods during the tariffs - we should further investigate 

In [50]:
df_Canada_Number_Groceries = pd.read_csv("../data/processed/Canada_Number_Groceries_Processed.csv")

In [51]:
print(df_Canada_Number_Groceries.head())
for uniq_product in df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'].unique():
    print(uniq_product)

                     Sales  \
0       Total retail sales   
1  Retail e-commerce sales   
2       Total retail sales   
3       Total retail sales   
4       Total retail sales   

  North American Industry Classification System (NAICS)    REF_DATE  \
0                               Retail trade [44-45]     2017-01-01   
1                               Retail trade [44-45]     2017-01-01   
2              Motor vehicle and parts dealers [441]     2017-01-01   
3                          Automobile dealers [4411]     2017-01-01   
4                            New car dealers [44111]     2017-01-01   

        VALUE  
0  50417235.0  
1   1236885.0  
2  13561527.0  
3  11980440.0  
4  11020613.0  
Retail trade [44-45]
Motor vehicle and parts dealers [441]
Automobile dealers [4411]
New car dealers [44111]
Used car dealers [44112]
Other motor vehicle dealers [4412]
Automotive parts, accessories and tire retailers [4413]
Building material and garden equipment and supplies dealers [444]
Food 

In [52]:
lst_groceries_category = [0]*8 + [1]*6 + [0]*16
dict_groceries = {uniq_product: category for uniq_product, category in zip(df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'].unique(), lst_groceries_category)}

In [53]:
df_Canada_Number_Groceries['Category'] = df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'].map(dict_groceries)

In [54]:
df_Canada_Number_Groceries_groupby = df_Canada_Number_Groceries.copy()
df_Canada_Number_Groceries_groupby.drop(columns = ['North American Industry Classification System (NAICS)', 'Sales'], inplace=True)
df_Canada_Number_Groceries_groupby = df_Canada_Number_Groceries_groupby.groupby(['REF_DATE', 'Category']).std().reset_index()
df_Canada_Number_Groceries_groupby.reset_index(inplace=True)
mask = (df_Canada_Number_Groceries_groupby['Category']==1)
scaler = StandardScaler()
df_Canada_Number_Groceries_plot_all = df_Canada_Number_Groceries[df_Canada_Number_Groceries['Category']==1]
df_Canada_Number_Groceries_groupby.loc[mask, "VALUE"] = scaler.fit_transform(df_Canada_Number_Groceries_groupby.loc[mask, 'VALUE'].values.reshape(-1, 1))
plot_structure(df_Canada_Number_Groceries_groupby,1,'2017-01-01', '2020-02-01')
for unique_class in df_Canada_Number_Groceries_plot_all['North American Industry Classification System (NAICS)'].unique():
    scale = StandardScaler()
    mask = df_Canada_Number_Groceries_plot_all['North American Industry Classification System (NAICS)'] == unique_class
    df_Canada_Number_Groceries_plot_all.loc[mask, "VALUE"] = scale.fit_transform(df_Canada_Number_Groceries_plot_all[mask]['VALUE'].values.reshape(-1, 1))
alt.Chart(df_Canada_Number_Groceries_plot_all).mark_line().encode(
    x='REF_DATE',
    y='VALUE',
    color='North American Industry Classification System (NAICS)'
)

Product:  1


In [55]:
#It seems there was higher variance in terms of where the groceries were bought in Canada coinciding with results from the breakdown in talks with China, the IP announcment and the tariffs - there was a reliance on convenience retailers and vending machines during the tariff period which was then under-utilized durign the recovery period follow May 2019
# 
# This seems to indicate that the Canadian groceries for consumers did indeed recover following the tariffs and the recovery may have been stronger than American groceries.

#let us now turn out attention to clothing. 

In [56]:
plot_structure(df_Canada_CPI_groupby, 'Clothing and footwear', '2017-01-01', '2019-10-01')

Product:  Clothing and footwear


In [57]:
#It doesnt appear Canadian clothing and footwear was that impacted by tariffs but let us look closer

In [58]:
Canada_Supply_Demand_Clothing = plot_supply_and_demand(df_sales, "Leather and allied product manufacturing [316]", "2017-01-01", "2019-10-01")

Canada_Supply_Demand_Clothing_10_05 = plot_supply_and_demand(df_sales, "Leather and allied product manufacturing [316]", "2017-10-01", "2018-05-01")

Canada_Supply_Demand_Clothing_05_12 = plot_supply_and_demand(df_sales, "Leather and allied product manufacturing [316]", "2018-05-01", "2018-12-01")

6624    112779.0
6625    118602.0
6626    116474.0
6627    113234.0
6628    111506.0
Name: VALUE, dtype: float64
7848    126528.0
7849    140313.0
7850    124819.0
7851    112269.0
7852    103204.0
Name: VALUE, dtype: float64
6624    112779.0
6625    118602.0
6626    116474.0
6627    113234.0
6628    111506.0
Name: VALUE, dtype: float64
7848    126528.0
7849    140313.0
7850    124819.0
7851    112269.0
7852    103204.0
Name: VALUE, dtype: float64
6624    112779.0
6625    118602.0
6626    116474.0
6627    113234.0
6628    111506.0
Name: VALUE, dtype: float64
7848    126528.0
7849    140313.0
7850    124819.0
7851    112269.0
7852    103204.0
Name: VALUE, dtype: float64


In [59]:

Apparel_Canada = plot_supply_and_demand(df_sales, "Apparel manufacturing [315]", "2017-01-01", "2019-10-01", point_line = 'line', x_label='Date', y_label='Canadian Apparel Manufacturing Supply and Demand', title = 'Supply and Demand for Canadian Apparel Manufacturing')

Apparel_Canada

6234    577572.0
6235    583848.0
6236    602944.0
6237    587686.0
6238    608707.0
Name: VALUE, dtype: float64
7496    330936.0
7497    358944.0
7498    371177.0
7499    359320.0
7500    382262.0
Name: VALUE, dtype: float64


In [60]:
#IT seems like manufacturing demand for clothing took a hit during July which means there were less orders from clothing stores and supply for clothing was decreased

#Inflation should have increased a bit for clothing and footwear but it was the impact of trumps tariffs that caused the reduction in supply - this increase in inflation is difficult to measure in July due to the seasonal nature of clothing and footwear but perhaps we can look at regression discontinuity 

In [61]:
model_clothing_US_tariff_groupby, clothing_chart_clothing_US_original, clothing_chart_clothing_US_diff, clothing_chart_clothing_US_vtrend, clothing_chart_clothing_US_trend = regression_discontinuity_model(df_Canada_CPI_groupby, 'Clothing and footwear', '2017-08', '2019-10', '2019-03', '2019-08', seasonality=True, x_label = 'Date', y_label = 'Consumer Price Index for Clothing and Footwear')

print(model_clothing_US_tariff_groupby.summary())

clothing_chart_clothing_US_trend.configure_axis(grid=False).properties(title = 'Canadian Inflation Trend for Clothing and Footwear')

Product:  Clothing and footwear


19
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.558
Model:                            OLS   Adj. R-squared:                  0.521
Method:                 Least Squares   F-statistic:                     13.19
Date:                Fri, 11 Apr 2025   Prob (F-statistic):           0.000137
Time:                        21:51:19   Log-Likelihood:                -3.5854
No. Observations:                  27   AIC:                             13.17
Df Residuals:                      24   BIC:                             17.06
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5468      0.108     

In [62]:
model_clothing_first_tariff_US_tariff_groupby, clothing_chart_clothing_US_original, clothing_chart_clothing_US_diff, clothing_chart_clothing_US_vtrend, clothing_chart_clothing_US_trend = regression_discontinuity_model(df_Canada_CPI_groupby, 'Clothing and footwear', '2017-11', '2018-06', '2018-03', '2018-04', seasonality=True)

model_clothing_first_tariff_US_tariff_groupby.summary()

Product:  Clothing and footwear


4
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.704
Model:                            OLS   Adj. R-squared:                  0.585
Method:                 Least Squares   F-statistic:                     4.576
Date:                Fri, 11 Apr 2025   Prob (F-statistic):             0.0742
Time:                        21:51:19   Log-Likelihood:                0.20998
No. Observations:                   8   AIC:                             5.580
Df Residuals:                       5   BIC:                             5.818
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.6667      0.202      

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


0,1,2,3
Dep. Variable:,VALUE_TREND,R-squared:,0.754
Model:,OLS,Adj. R-squared:,0.57
Method:,Least Squares,F-statistic:,1.026
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.47
Time:,21:51:19,Log-Likelihood:,28.313
No. Observations:,8,AIC:,-48.63
Df Residuals:,4,BIC:,-48.31
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0393,0.011,3.720,0.000,0.019,0.060
PredTreatment,-0.0295,0.018,-1.644,0.100,-0.065,0.006
Num_Date,0.0044,0.004,1.202,0.229,-0.003,0.012
PredTreatment:Num_Date,0.0009,0.011,0.082,0.935,-0.021,0.023

0,1,2,3
Omnibus:,0.16,Durbin-Watson:,2.204
Prob(Omnibus):,0.923,Jarque-Bera (JB):,0.345
Skew:,0.049,Prob(JB):,0.842
Kurtosis:,1.987,Cond. No.,7.12


In [63]:
model_clothing_second_tariff_US_tariff_groupby, clothing_chart_clothing_US_original, clothing_chart_clothing_US_diff, clothing_chart_clothing_US_vtrend, clothing_chart_clothing_US_trend = regression_discontinuity_model(df_Canada_CPI_groupby, 'Clothing and footwear', '2017-05', '2018-12', '2018-07', '2018-10', seasonality=True)

model_clothing_second_tariff_US_tariff_groupby.summary()

Product:  Clothing and footwear


14
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.533
Model:                            OLS   Adj. R-squared:                  0.478
Method:                 Least Squares   F-statistic:                     6.881
Date:                Fri, 11 Apr 2025   Prob (F-statistic):            0.00647
Time:                        21:51:19   Log-Likelihood:                -2.4401
No. Observations:                  20   AIC:                             10.88
Df Residuals:                      17   BIC:                             13.87
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5397      0.146     

0,1,2,3
Dep. Variable:,VALUE_TREND,R-squared:,0.756
Model:,OLS,Adj. R-squared:,0.71
Method:,Least Squares,F-statistic:,19.12
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,1.54e-05
Time:,21:51:19,Log-Likelihood:,51.598
No. Observations:,20,AIC:,-95.2
Df Residuals:,16,BIC:,-91.21
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0516,0.007,7.348,0.000,0.038,0.065
PredTreatment,-0.0331,0.023,-1.425,0.154,-0.079,0.012
Num_Date,-0.0005,0.001,-0.527,0.598,-0.002,0.001
PredTreatment:Num_Date,0.0189,0.004,4.886,0.000,0.011,0.027

0,1,2,3
Omnibus:,0.887,Durbin-Watson:,0.895
Prob(Omnibus):,0.642,Jarque-Bera (JB):,0.836
Skew:,0.298,Prob(JB):,0.658
Kurtosis:,2.195,Cond. No.,38.7


In [64]:
#this visual trend is not picked up by statistics - which means there was lower impact on inflation perhaps because of the already abundance of clothing in stores and the already lack of demand

#In fact the effect of the tariffs was not seen until during March 2019 - August 2019 meaning there was a lack of supply for clothing which is why there was inflation, this impact was significant. 

In [65]:
#It appears to have caused a decrease in orders or manufacturing demand which means less supply for clothing and footwear, however, demand was still above supply in this time frame - the impact may not have been felt until in March 2019 because of the already high supply of clothing and footwear in retail stores



In [66]:
for column in USA_sales['GoodType'].unique():
    print(column)
    print(USA_sales[(USA_sales['GoodType']==column) & (USA_sales['PrincipleStats']=='Finished Goods Inventories Percent Change Monthly') & (USA_sales['REF_DATE']>='2017-01-01')&(USA_sales['REF_DATE']<='2020-02-01')])


USA_leather = plot_supply_and_demand(USA_sales, "Leather and Allied Products", '2017-01-01', '2019-10-01', "Finished Goods Inventories Percent Change Monthly", "Value of Shipments", point_line = 'line', x_label = 'Date', y_label='Supply and Demand for US Leather and Allied Products', title = 'Supply and Demand for US Clothing Manufacturing')


Total Manufacturing
                    cat_idx  \
470138  Total Manufacturing   
470139  Total Manufacturing   
470140  Total Manufacturing   
470141  Total Manufacturing   
470142  Total Manufacturing   
...                     ...   
504961  Total Manufacturing   
522338  Total Manufacturing   
522339  Total Manufacturing   
522350  Total Manufacturing   
522351  Total Manufacturing   

                                                   dt_idx  geo_idx  is_adj  \
470138  Finished Goods Inventories Percent Change Monthly        1       0   
470139  Finished Goods Inventories Percent Change Monthly        1       0   
470140  Finished Goods Inventories Percent Change Monthly        1       0   
470141  Finished Goods Inventories Percent Change Monthly        1       0   
470142  Finished Goods Inventories Percent Change Monthly        1       0   
...                                                   ...      ...     ...   
504961  Finished Goods Inventories Percent Change Monthly    

In [67]:
#let us look at the American dataset to see if the same trend is seen

plot_structure(df_USA_CPI_groupby, 'Clothing and footwear', '2017-01-01', '2019-10-01')

Product:  Clothing and footwear


In [68]:
model_clothing_US_tariff, USA_chart_clothing_original, US_chart_clothing_diff, US_chart_clothing_vtrend, US_chart_clothing_trend2 = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-01', '2019-12', '2017-10', '2019-02', seasonality=True, y_label='Clothing and Footwear Trended Consumer Price Index', x_label='Date')

print(model_clothing_US_tariff.summary())
model_clothing_US_tariff, USA_chart_clothing_original, US_chart_clothing_diff, US_chart_clothing_vtrend, US_chart_clothing_trend = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-01', '2019-12', '2017-10', '2019-02', seasonality=True, fuzzy_sharp_omit = True)

print(model_clothing_US_tariff.summary())



model_clothing_US_China_Tariffs_on_America, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-01', '2019-10', '2018-08')

display((US_chart_clothing_trend2).configure_axis(grid=False).properties(title = 'US Inflation for Clothing and Footwear'))

Product:  Clothing and footwear


8
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.760
Model:                            OLS   Adj. R-squared:                  0.745
Method:                 Least Squares   F-statistic:                     93.86
Date:                Fri, 11 Apr 2025   Prob (F-statistic):           4.09e-14
Time:                        21:51:23   Log-Likelihood:               -0.38924
No. Observations:                  35   AIC:                             6.778
Df Residuals:                      32   BIC:                             11.44
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.4833      0.072      

8
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.760
Model:                            OLS   Adj. R-squared:                  0.745
Method:                 Least Squares   F-statistic:                     93.86
Date:                Fri, 11 Apr 2025   Prob (F-statistic):           4.09e-14
Time:                        21:51:23   Log-Likelihood:               -0.38924
No. Observations:                  35   AIC:                             6.778
Df Residuals:                      32   BIC:                             11.44
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.4833      0.072      

18


In [69]:
#It seems that both trumps tariffs caused American goods to increase in inflation while Canadian/Chinese tariffs caused a decrease in inflation for American clothings and footwear

#let us investigate further with regression discontinuity

model_clothing_US_tariff, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-10', '2018-06', '2018-03', '2018-04')

model_clothing_US_tariff.summary()

Product:  Clothing and footwear


5
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.646
Model:                            OLS   Adj. R-squared:                  0.527
Method:                 Least Squares   F-statistic:                     3.444
Date:                Fri, 11 Apr 2025   Prob (F-statistic):              0.101
Time:                        21:51:23   Log-Likelihood:               -0.20393
No. Observations:                   9   AIC:                             6.408
Df Residuals:                       6   BIC:                             7.000
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.6366      0.231      

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.408
Model:,OLS,Adj. R-squared:,0.053
Method:,Least Squares,F-statistic:,0.514
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.69
Time:,21:51:23,Log-Likelihood:,3.6663
No. Observations:,9,AIC:,0.6674
Df Residuals:,5,BIC:,1.456
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.1178,0.171,-0.688,0.491,-0.453,0.218
PredTreatment,0.4233,0.372,1.137,0.256,-0.306,1.153
Num_Date,-0.0305,0.054,-0.566,0.571,-0.136,0.075
PredTreatment:Num_Date,0.0456,0.273,0.167,0.868,-0.490,0.581

0,1,2,3
Omnibus:,2.11,Durbin-Watson:,1.928
Prob(Omnibus):,0.348,Jarque-Bera (JB):,0.806
Skew:,-0.027,Prob(JB):,0.668
Kurtosis:,1.535,Cond. No.,9.53


In [70]:
model_clothing_US_tariff_first_tariff, USA_chart_clothing_original, US_chart_clothing_diff, US_chart_clothing_vtrend, US_chart_clothing_trend = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-11', '2018-06', '2018-03', '2018-04', seasonality=True)

model_clothing_US_tariff_first_tariff.summary()

Product:  Clothing and footwear


4
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.704
Model:                            OLS   Adj. R-squared:                  0.585
Method:                 Least Squares   F-statistic:                     4.576
Date:                Fri, 11 Apr 2025   Prob (F-statistic):             0.0742
Time:                        21:51:24   Log-Likelihood:                0.20998
No. Observations:                   8   AIC:                             5.580
Df Residuals:                       5   BIC:                             5.818
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.6667      0.202      

  return hypotest_fun_in(*args, **kwds)
  return hypotest_fun_in(*args, **kwds)


0,1,2,3
Dep. Variable:,VALUE_TREND,R-squared:,0.996
Model:,OLS,Adj. R-squared:,0.993
Method:,Least Squares,F-statistic:,273.7
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,4.4e-05
Time:,21:51:24,Log-Likelihood:,39.686
No. Observations:,8,AIC:,-71.37
Df Residuals:,4,BIC:,-71.06
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0068,0.002,-4.035,0.000,-0.010,-0.004
PredTreatment,0.0263,0.006,4.672,0.000,0.015,0.037
Num_Date,-0.0115,0.001,-16.475,0.000,-0.013,-0.010
PredTreatment:Num_Date,-0.0184,0.005,-3.938,0.000,-0.028,-0.009

0,1,2,3
Omnibus:,2.816,Durbin-Watson:,2.043
Prob(Omnibus):,0.245,Jarque-Bera (JB):,0.859
Skew:,0.054,Prob(JB):,0.651
Kurtosis:,1.398,Cond. No.,7.12


In [71]:
model_clothing_US_tariff_second_tariff, USA_chart_clothing_original, US_chart_clothing_diff, US_chart_clothing_vtrend, US_chart_clothing_trend = regression_discontinuity_model(df_USA_CPI_groupby, 'Clothing and footwear', '2017-05', '2018-12', '2018-07', '2018-10', seasonality=True)

model_clothing_US_tariff_second_tariff.summary()

Product:  Clothing and footwear


14
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.533
Model:                            OLS   Adj. R-squared:                  0.478
Method:                 Least Squares   F-statistic:                     6.881
Date:                Fri, 11 Apr 2025   Prob (F-statistic):            0.00647
Time:                        21:51:24   Log-Likelihood:                -2.4401
No. Observations:                  20   AIC:                             10.88
Df Residuals:                      17   BIC:                             13.87
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5397      0.146     

0,1,2,3
Dep. Variable:,VALUE_TREND,R-squared:,0.749
Model:,OLS,Adj. R-squared:,0.702
Method:,Least Squares,F-statistic:,5.27
Date:,"Fri, 11 Apr 2025",Prob (F-statistic):,0.0102
Time:,21:51:24,Log-Likelihood:,52.53
No. Observations:,20,AIC:,-97.06
Df Residuals:,16,BIC:,-93.08
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0194,0.011,-1.762,0.078,-0.041,0.002
PredTreatment,-0.0505,0.030,-1.701,0.089,-0.109,0.008
Num_Date,-0.0030,0.001,-2.396,0.017,-0.006,-0.001
PredTreatment:Num_Date,-0.0154,0.005,-2.933,0.003,-0.026,-0.005

0,1,2,3
Omnibus:,2.4,Durbin-Watson:,1.001
Prob(Omnibus):,0.301,Jarque-Bera (JB):,1.142
Skew:,0.113,Prob(JB):,0.565
Kurtosis:,1.852,Cond. No.,38.7


In [72]:


mask_USA_clothing = df_USA_CPI_Scaled['Category'] == 'Clothing and footwear'
mask_Canada_clothing = df_Canada_CPI_Scaled['Category'] == 'Clothing and footwear'

df_USA_CPI_Scaled_clothing = df_USA_CPI_Scaled[mask_USA_clothing]
df_Canada_CPI_Scaled_clothing = df_Canada_CPI_Scaled[mask_Canada_clothing]
print(df_USA_CPI_Scaled_clothing.head())
df_USA_CPI_Scaled_clothing['Country'] = ['USA']*len(df_USA_CPI_Scaled_clothing)
df_Canada_CPI_Scaled_clothing['Country'] = ['Canada']*len(df_Canada_CPI_Scaled_clothing)

df_USA_CPI_Scaled_clothing.drop(columns = ['Products and product groups'], inplace=True)
df_Canada_CPI_Scaled_clothing.drop(columns = ['Products and product groups'], inplace=True)

df_USA_CPI_Scaled_clothing_var = df_USA_CPI_Scaled_clothing.groupby(['REF_DATE', 'Category', 'Country']).std().reset_index()
df_Canada_CPI_Scaled_clothing_var = df_Canada_CPI_Scaled_clothing.groupby(['REF_DATE', 'Category', 'Country']).std().reset_index()

df_USA_Canada_clothing = pd.concat([df_USA_CPI_Scaled_clothing_var, df_Canada_CPI_Scaled_clothing_var])
print(df_USA_Canada_clothing.head())
CPI_variance_clothing = plot_structure(df_USA_Canada_clothing, 'Clothing and footwear', '2017-01-01', '2020-02-01', color = 'Country', text_to_plot = ['Breakdown in trade talks with China impact on Canadian Clothing and Footwear', 'Breakdown in trade talks with China impact on US Clothing and Footwear', 'End of Canada/US Tariff War', 'Beginning of effect of COVID-19'], lines_to_plot = ['2017-07-01', '2017-09-01', '2019-05-01', '2019-11-01'], x_label = "Date", y_label = "Variance of CPI index for Clothing & Footwear", title = 'CPI Variance for Clothing & Footwear in Canada and USA')
(variance_groceries_CPI & CPI_variance_clothing).configure_axis(grid=False).properties(title = 'CPI Variance for Groceries and Clothing & Footwear in Canada and USA')

       REF_DATE Products and product groups     VALUE               Category
576  2017-01-01                     Apparel  0.663325  Clothing and footwear
577  2017-02-01                     Apparel  0.686137  Clothing and footwear
578  2017-03-01                     Apparel  0.877558  Clothing and footwear
579  2017-04-01                     Apparel  0.802832  Clothing and footwear
580  2017-05-01                     Apparel  0.573098  Clothing and footwear
     REF_DATE               Category Country     VALUE
0  2017-01-01  Clothing and footwear     USA  0.604674
1  2017-02-01  Clothing and footwear     USA  0.704206
2  2017-03-01  Clothing and footwear     USA  0.826387
3  2017-04-01  Clothing and footwear     USA  0.750805
4  2017-05-01  Clothing and footwear     USA  0.639166
Product:  Clothing and footwear


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_USA_CPI_Scaled_clothing['Country'] = ['USA']*len(df_USA_CPI_Scaled_clothing)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_clothing['Country'] = ['Canada']*len(df_Canada_CPI_Scaled_clothing)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_USA_CPI_Scaled_clothing.drop(columns = ['Products and product groups'], inplace=True)
A va

In [73]:
display(((US_chart_clothing_trend | clothing_chart_clothing_US_trend) & (USA_leather | Apparel_Canada)).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'Clothing & Footwear Inflation and Supply and Demand', subtitle = 'The tariffs impact caused a rise in inflation in Canada which, in the post period seemed to affect Canada more than America, particularly in regards to manufacturing')))

display(USA_leather)
display(Apparel_Canada)

In [74]:
print("USA Clothing & Footwear")
for column in df_USA_CPI_Scaled[df_USA_CPI_Scaled['Category'] == 'Clothing and footwear']['Products and product groups'].unique():
    print(column)

print("Canada Clothing & Footwear")
for column in df_Canada_CPI_Scaled[df_Canada_CPI_Scaled['Category'] == 'Clothing and footwear']['Products and product groups'].unique():
    print(column)


USA Clothing & Footwear
Apparel
Apparel less footwear
Boys' and girls' footwear
Boys' apparel
Footwear
Girls' apparel
Infants' and toddlers' apparel
Jewelry
Jewelry and watches
Men's and boys' apparel
Men's apparel
Men's pants and shorts
Men's shirts and sweaters
Men's suits, sport coats, and outerwear
Women's and girls' apparel
Women's apparel
Women's dresses
Women's footwear
Women's outerwear
Women's suits and separates
Women's underwear, nightwear, swimwear, and accessories
Canada Clothing & Footwear
Athletic footwear
Children's clothing
Children's footwear (excluding athletic)
Clothing material and notions
Dry cleaning services
Jewellery
Laundry services
Leather clothing accessories
Men's clothing
Men's footwear (excluding athletic)
Watches
Women's clothing
Women's footwear (excluding athletic)


In [75]:
US_women = ["Women apparel",
"Women dresses",
"Women footwear",
"Women outerwear",
"Women suits and separates",
"Women underwear, nightwear, swimwear, and accessories"]
Canada_Jewellery = ["Jewellery"]

US_Jewelry = ['Jewelry']
Canada_women = [
    "Women clothing",
    "Women footwear (excluding athletic)"
]

Canada_children = [
    "Children clothing",
    "Children footwear (excluding athletic)"
]

df_Canada_CPI_Scaled_clean = df_Canada_CPI_Scaled.copy()
df_Canada_CPI_Scaled_clean['Products and product groups'] = df_Canada_CPI_Scaled_clean['Products and product groups'].str.replace('\'s', '')
df_Canada_CPI_Scaled_clean['Products and product groups'] = df_Canada_CPI_Scaled_clean['Products and product groups'].str.replace('\'', '')
df_USA_CPI_Scaled_clean = df_USA_CPI_Scaled.copy()
df_USA_CPI_Scaled_clean['Products and product groups'] = df_USA_CPI_Scaled_clean['Products and product groups'].str.replace('\'s', '')
df_USA_CPI_Scaled_clean['Products and product groups'] = df_USA_CPI_Scaled_clean['Products and product groups'].str.replace('\'', '')

US_children = ["Infants and toddlers apparel"]

def mask_category(df, category_list):
    for count, cat in enumerate(category_list):
        if count ==0:
            mask =(df['Products and product groups'] == cat)
        else:
            mask = mask | (df['Products and product groups'] == cat)
    return mask
US_women_category = mask_category(df_USA_CPI_Scaled_clean, US_women)
Canada_women_category = mask_category(df_Canada_CPI_Scaled_clean, Canada_women)
Canada_children_category = mask_category(df_Canada_CPI_Scaled_clean, Canada_children)
US_children_category = mask_category(df_USA_CPI_Scaled_clean, US_children)
US_Jewelry_category = mask_category(df_USA_CPI_Scaled_clean, US_Jewelry)
Canada_Jewellery_category = mask_category(df_Canada_CPI_Scaled_clean, Canada_Jewellery)

US_woman_data = df_USA_CPI_Scaled_clean[US_women_category]
US_woman_data.drop(columns = ['Products and product groups'], inplace
=True)
US_woman_data = US_woman_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
US_woman_data['Country'] = ['USA']*len(US_woman_data)
US_woman_data['Category'] = ['Women']*len(US_woman_data)

US_Jewelry_data = df_USA_CPI_Scaled_clean[US_Jewelry_category]
US_Jewelry_data.drop(columns = ['Products and product groups'], inplace=True)
US_Jewelry_data = US_Jewelry_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
US_Jewelry_data['Country'] = ['USA']*len(US_Jewelry_data)
US_Jewelry_data['Category'] = ['Jewelry']*len(US_Jewelry_data)

US_children_data = df_USA_CPI_Scaled_clean[US_children_category]
US_children_data.drop(columns = ['Products and product groups'], inplace=True)
US_children_data = US_children_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
US_children_data['Country'] = ['USA']*len(US_children_data)
US_children_data['Category'] = ['Child']*len(US_children_data)

Canada_woman_data = df_Canada_CPI_Scaled_clean[Canada_women_category]
Canada_woman_data.drop(columns = ['Products and product groups'], inplace
=True)


Canada_woman_data = Canada_woman_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
Canada_woman_data['Country'] = ['Canada']*len(Canada_woman_data)
Canada_woman_data['Category'] = ['Women']*len(Canada_woman_data)

Canada_Jewelry_data = df_Canada_CPI_Scaled_clean[Canada_Jewellery_category]
Canada_Jewelry_data.drop(columns = ['Products and product groups'], inplace=True)
Canada_Jewelry_data = Canada_Jewelry_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
Canada_Jewelry_data['Country'] = ['Canada']*len(Canada_Jewelry_data)
Canada_Jewelry_data['Category'] = ['Jewelry']*len(Canada_Jewelry_data)


Canada_children_data = df_Canada_CPI_Scaled_clean[Canada_children_category]
Canada_children_data.drop(columns = ['Products and product groups'], inplace=True)
Canada_children_data = Canada_children_data.groupby(['REF_DATE', 'Category']).mean().reset_index()
Canada_children_data['Country'] = ['Canada']*len(Canada_children_data)
Canada_children_data['Category'] = ['Child']*len(Canada_children_data)

df_clothing_US = pd.concat([US_woman_data, US_Jewelry_data, US_children_data])
df_clothing_Canada = pd.concat([Canada_woman_data, Canada_Jewelry_data, Canada_children_data], axis = 0)

df_clothing = pd.concat([df_clothing_US, df_clothing_Canada], axis = 0)

print(df_clothing.tail())
def plot_clothing(df, product):
    df_cat = df.copy()[df['Category']==product]
    df_cat = df_cat[(df_cat['REF_DATE']>='2017-01-01') & (df_cat['REF_DATE']<='2020-02-01')]
    chart1 = alt.Chart(df_cat).mark_line().encode(
        x=alt.X('REF_DATE', axis = alt.Axis(titleFontSize=15, title = 'Date')),
        y=alt.Y('VALUE', axis = alt.Axis(titleFontSize=15, title = 'Consumer Price index')),
        color='Country'
    ).properties(title = product)
   
    return chart1

def plot_groceries(df, product, date1, date2, x_label = '', y_label = '', title = ''):
    df_cat = df.copy()[df['Category']==product]
    df_cat = df_cat[(df_cat['REF_DATE']>=date1) & (df_cat['REF_DATE']<=date2)]
    chart1 = alt.Chart(df_cat).mark_line().encode(
        x=alt.X('REF_DATE', axis = alt.Axis(titleFontSize=15, title = x_label)),
        y=alt.Y('VALUE', axis = alt.Axis(titleFontSize=15, title = y_label)),
        color='Category'
    ).properties(title = title)
    df_cat['y'] = [0] * len(df_cat)
    df_cat['text_1'] = ['First Tariffs'] * len(df_cat)
    df_cat['text_2'] = ['Second Tariffs'] * len(df_cat)
    mark_text = alt.Chart(df_cat).mark_text().encode(x = alt.X('REF_DATE'), y = 'y', text = 'text_1').transform_filter(alt.datum.REF_DATE == '2018-03')
    mark_text_2 = alt.Chart(df_cat).mark_text().encode(x = alt.X('REF_DATE'), y = 'y', text = 'text_2').transform_filter(alt.datum.REF_DATE == '2018-09')
    mark_rule = alt.Chart(df_cat).mark_rule(color='orange', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-02')
    mark_rule2 = alt.Chart(df_cat).mark_rule(color='orange', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-04')
    mark_rule3 = alt.Chart(df_cat).mark_rule(color='red', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-07')
    mark_rule4 = alt.Chart(df_cat).mark_rule(color='red', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-10')

    return chart1 + mark_text + mark_text_2 + mark_rule + mark_rule2 + mark_rule3 + mark_rule4
df_Canada_CPI_groupby['REF_DATE'] = pd.to_datetime(df_Canada_CPI_groupby['REF_DATE'])
df_USA_CPI_groupby['REF_DATE'] = pd.to_datetime(df_USA_CPI_groupby['REF_DATE'])
df_Canada_CPI_groupby['REF_DATE'] = df_Canada_CPI_groupby['REF_DATE'].dt.strftime('%Y-%m')
df_USA_CPI_groupby['REF_DATE'] = df_USA_CPI_groupby['REF_DATE'].dt.strftime('%Y-%m')
Canada_Groceries_CPI = plot_groceries(df_Canada_CPI_groupby, 'Groceries', '2017-01', '2020-02', 'Date', 'Consumer Price Index for Canadian Groceries', 'Canadian Groceries Inflation')
US_Groceries_CPI = plot_groceries(df_USA_CPI_groupby, 'Groceries', '2017-01', '2020-02', 'Date', 'Consumer Price Index for US Groceries', 'American Groceries Inflation')
Canada_Groceries_CPI_05_12 = plot_groceries(df_Canada_CPI_groupby, 'Groceries', '2018-05', '2018-12', 'Date', 'Consumer Price Index for Canadian Groceries', 'Canadian Groceries Inflation')
US_Groceries_CPI_05_12 = plot_groceries(df_USA_CPI_groupby, 'Groceries', '2018-05', '2018-12', 'Date', 'Consumer Price Index for US Groceries', 'American Groceries Inflation')
Canada_Groceries_CPI_10_05 = plot_groceries(df_Canada_CPI_groupby, 'Groceries', '2017-10', '2018-05', 'Date', 'Consumer Price Index for Canadian Groceries', 'Canadian Groceries Inflation')
US_Groceries_CPI_10_05 = plot_groceries(df_USA_CPI_groupby, 'Groceries', '2017-10', '2018-05', 'Date', 'Consumer Price Index for US Groceries', 'American Groceries Inflation')
display(Canada_Groceries_CPI.configure_axis(grid=False))
US_Groceries_CPI.configure_axis(grid=False)



      REF_DATE Category     VALUE Country
33  2019-10-01    Child  1.547500  Canada
34  2019-11-01    Child  0.690514  Canada
35  2019-12-01    Child  0.002356  Canada
36  2020-01-01    Child -0.559307  Canada
37  2020-02-01    Child  0.127592  Canada


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_woman_data.drop(columns = ['Products and product groups'], inplace
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_Jewelry_data.drop(columns = ['Products and product groups'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_children_data.drop(columns = ['Products and product groups'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable

In [76]:
plot_clothing(df_clothing, 'Jewelry')

In [77]:
plot_clothing(df_clothing, 'Child')

In [78]:
plot_clothing(df_clothing, 'Women')

In [79]:
import numpy as np
df_clothing_women = df_clothing.copy()[(df_clothing['Category']=='Women') & ((df_clothing['Country']=='Canada') | (df_clothing['Country']=='USA'))]
df_clothing_women = df_clothing_women.copy()[['REF_DATE', 'Country', 'VALUE']]
from sklearn.preprocessing import StandardScaler

scale = StandardScaler()

def std_group(group):
    scale = StandardScaler()
    value = scale.fit_transform(group['VALUE'].values.reshape(-1, 1))
    return value[0][0]
df_clothing_women = df_clothing_women.groupby(['REF_DATE', 'Country']).apply(lambda x: std_group(x)).reset_index()

Canada_woman_CPI = df_Canada_CPI_Scaled[Canada_women_category]
Canada_woman_CPI = Canada_woman_CPI[['REF_DATE', 'VALUE']]
Canada_woman_CPI['Country'] = len(Canada_woman_CPI)*['Canada']
US_woman_CPI = df_USA_CPI_Scaled[US_women_category]
US_woman_CPI['Country'] = len(US_woman_CPI)*['USA']
US_woman_CPI = US_woman_CPI[['REF_DATE', 'VALUE', 'Country']]
US_woman_CPI = US_woman_CPI.groupby(['REF_DATE', 'Country']).std().reset_index()
Canada_woman_CPI = Canada_woman_CPI[['REF_DATE', 'VALUE', 'Country']]
Canada_woman_CPI = Canada_woman_CPI.groupby(['REF_DATE', 'Country']).std().reset_index()
df_clothing_wom = pd.concat([US_woman_CPI, Canada_woman_CPI], axis = 0)
# df_clothing_wom = df_clothing_wom[(df_clothing_wom['REF_DATE']>='2017-01-01')&(df_clothing_wom['REF_DATE']<='2020-02-01')]
alt.Chart(df_clothing_wom).mark_line().encode(
    x = 'REF_DATE',
    y = 'VALUE',
    color = 'Country'
)



  df_clothing_women = df_clothing_women.groupby(['REF_DATE', 'Country']).apply(lambda x: std_group(x)).reset_index()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_woman_CPI['Country'] = len(US_woman_CPI)*['USA']


In [80]:
for uniq_product in df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'].unique():
    print(uniq_product) 

Retail trade [44-45]
Motor vehicle and parts dealers [441]
Automobile dealers [4411]
New car dealers [44111]
Used car dealers [44112]
Other motor vehicle dealers [4412]
Automotive parts, accessories and tire retailers [4413]
Building material and garden equipment and supplies dealers [444]
Food and beverage retailers [445]
Grocery and convenience retailers [4451]
Supermarkets and other grocery retailers (except convenience retailers) [44511]
Convenience retailers and vending machine operators [44513]
Specialty food retailers [4452]
Beer, wine and liquor retailers [4453]
Furniture, home furnishings, electronics and appliances retailers [449]
Furniture, floor covering, window treatment and other home furnishings retailers [4491]
Furniture retailers [44911]
Floor covering, window treatment and other home furnishing retailers [44912]
Electronics and appliances retailers [4492]
General merchandise retailers [455]
Health and personal care retailers [456]
Gasoline stations and fuel vendors [4

In [81]:
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
df_clothing_canada_sales = df_Canada_Number_Groceries[(df_Canada_Number_Groceries['Sales'] == 'Total retail sales') & (df_Canada_Number_Groceries['North American Industry Classification System (NAICS)'] == 'Clothing, clothing accessories, shoes, jewelry, luggage and leather goods retailers [458]')]
df_clothing_canada_sales['VALUE'] = scale.fit_transform(df_clothing_canada_sales['VALUE'].values.reshape(-1, 1))
df_clothing_canada_sales['REF_DATE'] = pd.to_datetime(df_clothing_canada_sales['REF_DATE'])
df_clothing_canada_sales['REF_DATE'] = df_clothing_canada_sales['REF_DATE'].dt.strftime('%Y-%m')
df_clothing_canada_sales['text_1'] = ['First Tariff'] * len(df_clothing_canada_sales)
df_clothing_canada_sales['text_2'] = ['Second Tariff'] * len(df_clothing_canada_sales)
df_clothing_canada_sales['y'] = [1.0] * len(df_clothing_canada_sales)

chart_canada_sales_clothing = alt.Chart(df_clothing_canada_sales).mark_line().encode(
    x = alt.X('REF_DATE', axis =alt.Axis(labelFontSize = 15, title = 'Date')),
    y = alt.Y('VALUE', axis =alt.Axis(labelFontSize = 15, title = 'Total Retail Sales for Clothing in Canada')),
    
    
)
mark_text = alt.Chart(df_clothing_canada_sales).mark_text().encode(x = alt.X('REF_DATE'), y = 'y', text = 'text_1').transform_filter(alt.datum.REF_DATE == '2018-03')
mark_text2 = alt.Chart(df_clothing_canada_sales).mark_text().encode(x = alt.X('REF_DATE'), y = 'y', text = 'text_2').transform_filter(alt.datum.REF_DATE == '2018-09')
mark_rule = alt.Chart(df_clothing_canada_sales).mark_rule(color='orange', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-02')
mark_rule2 = alt.Chart(df_clothing_canada_sales).mark_rule(color='orange', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-04')
mark_rule3 = alt.Chart(df_clothing_canada_sales).mark_rule(color='red', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-07')
mark_rule4 = alt.Chart(df_clothing_canada_sales).mark_rule(color='red', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-10')

(chart_canada_sales_clothing+mark_text+mark_text2+mark_rule+mark_rule2+mark_rule3+mark_rule4).configure_axis(grid=False).properties(title = 'Total Retail Sales for Clothing in Canada')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clothing_canada_sales['VALUE'] = scale.fit_transform(df_clothing_canada_sales['VALUE'].values.reshape(-1, 1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clothing_canada_sales['REF_DATE'] = pd.to_datetime(df_clothing_canada_sales['REF_DATE'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  

In [82]:
woman_clothing_US_sales = pd.read_csv('../data/raw/MRTSSM44812USS.csv')


In [83]:
woman_clothing_US_sales.head()

Unnamed: 0,observation_date,MRTSSM44812USS
0,2017-01-01,3329
1,2017-02-01,3294
2,2017-03-01,3250
3,2017-04-01,3341
4,2017-05-01,3231


In [84]:
scale = StandardScaler()
woman_clothing_US_sales['MRTSSM44812USS'] = scale.fit_transform(woman_clothing_US_sales['MRTSSM44812USS'].values.reshape(-1, 1))
alt.Chart(woman_clothing_US_sales).mark_line().encode(
    x = 'observation_date',
    y = 'MRTSSM44812USS',
)

In [85]:
US_sales_groceries_clothing = pd.read_csv("../data/raw/ClothingGroceriesUSSalesData.csv")
print(US_sales_groceries_clothing.columns)

Index(['Date', 'Clothing Sales', 'Clothing InventorySales',
       'Clothing Month Difference', 'Grocery Sales',
       'Clothing InventorySales.1', 'Gas Sales'],
      dtype='object')


In [86]:


US_sales_groceries_clothing['Clothing Sales'] = US_sales_groceries_clothing['Clothing Sales'].str.replace(',', '')  
US_sales_groceries_clothing['Grocery Sales'] = US_sales_groceries_clothing['Grocery Sales'].str.replace(',', '')
US_sales_groceries_clothing['Clothing Sales'] = US_sales_groceries_clothing['Clothing Sales'].astype(float)
US_sales_groceries_clothing['Grocery Sales'] = US_sales_groceries_clothing['Grocery Sales'].astype(float)
print(US_sales_groceries_clothing.head())

         Date  Clothing Sales  Clothing InventorySales  \
0  2016-01-01         53288.0                     3.23   
1  2016-02-01         53344.0                     2.76   
2  2016-03-01         53261.0                     2.46   
3  2016-04-01         53391.0                     2.56   
4  2016-05-01         53585.0                     2.40   

   Clothing Month Difference  Grocery Sales  Clothing InventorySales.1  \
0                       0.04        45669.0                       0.80   
1                      -0.15        46031.0                       0.83   
2                      -0.04        45863.0                       0.78   
3                       0.09        46080.0                       0.81   
4                       0.04        46165.0                       0.77   

  Gas Sales  
0   447,376  
1   451,898  
2   450,462  
3   452,238  
4   453,898  


In [87]:
US_sales_groceries_clothing['REF_DATE'] = pd.to_datetime(US_sales_groceries_clothing['Date'])

In [88]:
scale = StandardScaler()
US_sales_groceries_clothing['Clothing Sales'] = scale.fit_transform(US_sales_groceries_clothing['Clothing Sales'].values.reshape(-1, 1))
US_sales_groceries_clothing['Grocery Sales'] = scale.fit_transform(US_sales_groceries_clothing['Grocery Sales'].values.reshape(-1, 1))
US_sales_groceries = US_sales_groceries_clothing[['REF_DATE', 'Grocery Sales']]
US_sales_groceries['Category'] = ['Groceries']*len(US_sales_groceries)
US_sales_groceries['VALUE'] = US_sales_groceries['Grocery Sales']

alt.Chart(US_sales_groceries_clothing).mark_line().encode(
    x = 'REF_DATE',
    y = 'Clothing Sales',
    
)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sales_groceries['Category'] = ['Groceries']*len(US_sales_groceries)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sales_groceries['VALUE'] = US_sales_groceries['Grocery Sales']


In [89]:

US_sales_groceries_clothing['REF_DATE'] = US_sales_groceries_clothing['REF_DATE'].dt.strftime('%Y-%m')
US_sales_groceries_clothing['text_1'] = ['End of Tariff War'] * len(US_sales_groceries_clothing)
US_sales_groceries_clothing['text_2'] = ['Beginning of Tariff War'] * len(US_sales_groceries_clothing)
US_sales_groceries_clothing['y'] = [0.15] * len(US_sales_groceries_clothing)

chart_inventory_sales_US = alt.Chart(US_sales_groceries_clothing).mark_line().encode(
    x = alt.X('REF_DATE:N', axis =alt.Axis(labelFontSize = 15, title = 'Date')),
    y = alt.Y('Clothing Month Difference', axis =alt.Axis(labelFontSize = 15, title = 'US Clothing 12 month differenced Inventory to Sales ratio'))
    
)
mark_rule = alt.Chart(US_sales_groceries_clothing).mark_rule(color='orange', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2018-02')
mark_rule2 = alt.Chart(US_sales_groceries_clothing).mark_rule(color='red', strokeDash = [10, 5], strokeWidth = 2).encode(x = alt.X('REF_DATE')).transform_filter(alt.datum.REF_DATE == '2019-05')
text1 = alt.Chart(US_sales_groceries_clothing).mark_text().encode(x = alt.X('REF_DATE'), y = 'y', text = 'text_1').transform_filter(alt.datum.REF_DATE == '2019-05')
text2 = alt.Chart(US_sales_groceries_clothing).mark_text().encode(x = alt.X('REF_DATE'), y = 'y', text = 'text_2').transform_filter(alt.datum.REF_DATE == '2018-02')
(chart_inventory_sales_US + mark_rule + mark_rule2 + text1 + text2).configure_axis(grid=False).properties(title = 'US Inventory to Sales Ratio of Clothing Differenced by Month from Previous Year', width = 800, height = 400)

In [90]:
alt.Chart(US_sales_groceries_clothing).mark_line().encode(
    x = 'Date:O',
    y = 'Grocery Sales',
    
).interactive()

In [91]:
df_num_sales_Canada = pd.read_csv("../data/raw/egg_dataset.csv")

df_num_sales_Canada['VALUE'] = df_num_sales_Canada['VALUE'].str.replace(',', '')
df_num_sales_Canada['VALUE'] = df_num_sales_Canada['VALUE'].astype(float)

In [92]:
df_num_sales_Canada['Month'] = df_num_sales_Canada['Date'].str.split('-').str[0]
dict_month = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'}
df_num_sales_Canada['Month'] = df_num_sales_Canada['Month'].map(dict_month)

df_num_sales_Canada['Year'] = ['20']*len(df_num_sales_Canada) + df_num_sales_Canada['Date'].str.split('-').str[1]
df_num_sales_Canada['Day'] = ['01']*len(df_num_sales_Canada)
df_num_sales_Canada['REF_DATE'] = pd.to_datetime(df_num_sales_Canada[['Year', 'Month', 'Day']])

df_num_sales_Canada['REF_DATE'] = df_num_sales_Canada['REF_DATE'].dt.strftime('%Y-%m')


In [93]:
display(((US_chart_clothing_trend.properties(width = 750, height = 450) | clothing_chart_clothing_US_trend.properties(width = 750, height = 450)) & (USA_leather.properties(width = 750, height = 450) | Apparel_Canada.properties(width = 750, height = 450)) & (chart_inventory_sales_US.properties(width = 750, height = 450) | chart_canada_sales_clothing.properties(width = 750, height = 450))).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'Clothing & Footwear Inflation and Supply Manufacturing and Demand Sales', subtitle = 'The tariffs impact caused a rise in inflation in Canada which, in the post period seemed to affect Canada more than America, particularly in regards to manufacturing')))

In [94]:

for num in range(2, 58):
    df_num_sales_Canada = df_num_sales_Canada.drop(columns = ['Unnamed: '+str(num)])
scaler = StandardScaler()
df_num_sales_Canada['VALUE'] = scaler.fit_transform(df_num_sales_Canada['VALUE'].values.reshape(-1, 1))
df_num_sales_Canada['Category'] = ['Eggs']*len(df_num_sales_Canada)
alt.Chart(df_num_sales_Canada).mark_line().encode(
    x = 'REF_DATE',
    y = 'VALUE',
    
).interactive()
df_num_sales_Canada['REF_DATE'] = pd.to_datetime(df_num_sales_Canada['REF_DATE'])
df_num_sales_Canada['REF_DATE'] = df_num_sales_Canada['REF_DATE'].dt.strftime('%Y-%m')

In [95]:
Sales_Canada_Groceries = pd.read_csv('../data/raw/20100082.csv')
Sales_Canada_Groceries['Month'] = Sales_Canada_Groceries['REF_DATE'].str.split('-').str[0]
Sales_Canada_Groceries['Month'] = Sales_Canada_Groceries['Month'].map(dict_month)

Sales_Canada_Groceries['Year'] = ['20']*len(Sales_Canada_Groceries) + Sales_Canada_Groceries['REF_DATE'].str.split('-').str[1]
Sales_Canada_Groceries['Day'] = ['01']*len(Sales_Canada_Groceries)
Sales_Canada_Groceries['REF_DATE'] = pd.to_datetime(Sales_Canada_Groceries[['Year', 'Month', 'Day']])
Sales_Canada_Groceries['REF_DATE'] = Sales_Canada_Groceries['REF_DATE'].dt.strftime('%Y-%m')


In [96]:
alt.data_transformers.disable_max_rows()
Sales_Canada_Groceries['Category'] = Sales_Canada_Groceries['North American Industry Classification System (NAICS)']

print(Sales_Canada_Groceries['Category'].unique())

Sales_Canada_Clothing = Sales_Canada_Groceries[Sales_Canada_Groceries['Category'] == 'Clothing and clothing accessories retailers [4581]']

alt.Chart(Sales_Canada_Clothing).mark_point().encode(
    x = 'REF_DATE',
    y = 'VALUE',
    
).interactive()

US_sales_groceries['REF_DATE'] = pd.to_datetime(US_sales_groceries['REF_DATE'])
US_sales_groceries['REF_DATE'] = US_sales_groceries['REF_DATE'].dt.strftime('%Y-%m')
regression_discontinuity_sales_canada_groceries, _, canada_groceries_retail_chart, _ = regression_discontinuity_model(Sales_Canada_Groceries, 'Supermarkets and other grocery retailers (except convenience retailers) [44511]', '2017-10', '2019-12', '2018-07', '2018-10', point_line='point')

regression_discontinuity_sales_canada_eggs, _, _, _ = regression_discontinuity_model(df_num_sales_Canada, 'Eggs', '2018-03', '2019-12', '2019-05')

regression_discontinuity_sales_canada_eggs_IP, _, _, _ = regression_discontinuity_model(df_num_sales_Canada, 'Eggs', '2017-10', '2019-05', '2018-03')

regression_discontinuity_sales_usa_groceries, _, usa_groceries_retail_chart, _ = regression_discontinuity_model(US_sales_groceries, 'Groceries', '2017-10', '2019-12', '2018-02', '2018-04')
# Sales_Canada_Groceries['VALUE'] = Sales_Canada_Groceries['VALUE'].str.replace(',', '')
Sales_Canada_Groceries['VALUE'] = Sales_Canada_Groceries['VALUE'].astype(float)
# US_sales_groceries['VALUE'] = US_sales_groceries['VALUE'].str.replace(',', '')
US_sales_groceries['VALUE'] = US_sales_groceries['VALUE'].astype(float)

US_grocery_df_time = US_sales_groceries.copy()

Canada_grocery_df_time = Sales_Canada_Groceries.copy()
US_sales_groceries['REF_DATE'] = pd.to_datetime(US_sales_groceries['REF_DATE'])
Sales_Canada_Groceries['REF_DATE'] = pd.to_datetime(Sales_Canada_Groceries['REF_DATE'])
US_sales_groceries['REF_DATE'] = US_sales_groceries['REF_DATE'].dt.strftime('%Y-%m')
Sales_Canada_Groceries['REF_DATE'] = Sales_Canada_Groceries['REF_DATE'].dt.strftime('%Y-%m')
US_grocery_df_time['REF_DATE'] = pd.to_datetime(US_grocery_df_time['REF_DATE'])
Canada_grocery_df_time['REF_DATE'] = pd.to_datetime(Canada_grocery_df_time['REF_DATE'])

US_grocery_df_time['REF_DATE'] = US_grocery_df_time['REF_DATE'].dt.strftime('%Y-%m')
Canada_grocery_df_time['REF_DATE'] = Canada_grocery_df_time['REF_DATE'].dt.strftime('%Y-%m')

US_grocery_df_time = US_grocery_df_time[(US_grocery_df_time['REF_DATE']>='2017-10') & (US_grocery_df_time['REF_DATE']<='2019-12') & (US_grocery_df_time['Category'] == 'Groceries')]
Canada_grocery_df_time = Canada_grocery_df_time[(Canada_grocery_df_time['REF_DATE']>='2017-10') & (Canada_grocery_df_time['REF_DATE']<='2019-12') & (Canada_grocery_df_time['Category'] == 'Supermarkets and other grocery retailers (except convenience retailers) [44511]')]
mark_rule_canada_groceries = alt.Chart(Canada_grocery_df_time).mark_rule(color='red', strokeDash = [5,5]).encode(x='REF_DATE:N', tooltip=['REF_DATE:N']).transform_filter(alt.datum.REF_DATE == '2018-07')
mark_rule_canada_groceries_2 = alt.Chart(Canada_grocery_df_time).mark_rule(color='red', strokeDash = [5,5]).encode(x='REF_DATE:N', tooltip=['REF_DATE:N']).transform_filter(alt.datum.REF_DATE == '2018-10')
mark_rule_canada_groceries_3 = alt.Chart(Canada_grocery_df_time).mark_rule(color='orange', strokeDash = [5,5]).encode(x='REF_DATE:N', tooltip=['REF_DATE:N']).transform_filter(alt.datum.REF_DATE == '2018-02')
mark_rule_canada_groceries_4 = alt.Chart(Canada_grocery_df_time).mark_rule(color='orange', strokeDash = [5,5]).encode(x='REF_DATE:N', tooltip=['REF_DATE:N']).transform_filter(alt.datum.REF_DATE == '2018-04')
canada_groceries_retail_chart = alt.Chart(Canada_grocery_df_time).mark_point().encode(x = alt.X('REF_DATE:N', axis =alt.Axis(tickCount=5, labelFontSize = 15), title = 'Date'), y = alt.Y('VALUE', scale = alt.Scale(domain=[6000, 8000]), title = 'Sales of Groceries in Canada'), color = 'Category').properties(title = 'Canada Groceries Retail Sales', height = 400, width= 700)
usa_groceries_retail_chart = alt.Chart(US_grocery_df_time).mark_line().encode(x = alt.X('REF_DATE:N', axis =alt.Axis(tickCount=5, labelFontSize = 15), title = 'Date'), y = alt.Y('VALUE', title = 'Sales of Groceries in USA'), color = 'Category').properties(title = 'USA Groceries Retail Sales', height = 400, width = 700)
mark_rule_US_groceries = alt.Chart(US_grocery_df_time).mark_rule(color='red', strokeDash = [5,5]).encode(x='REF_DATE:N', tooltip=['REF_DATE:N']).transform_filter(alt.datum.REF_DATE == '2018-07')
mark_rule_US_groceries_2 = alt.Chart(US_grocery_df_time).mark_rule(color='red', strokeDash = [5,5]).encode(x='REF_DATE:N', tooltip=['REF_DATE:N']).transform_filter(alt.datum.REF_DATE == '2018-10')
mark_rule_US_groceries_3 = alt.Chart(US_grocery_df_time).mark_rule(color='orange', strokeDash = [5,5]).encode(x='REF_DATE:N', tooltip=['REF_DATE:N']).transform_filter(alt.datum.REF_DATE == '2018-02')
mark_rule_US_groceries_4 = alt.Chart(US_grocery_df_time).mark_rule(color='orange', strokeDash = [5,5]).encode(x='REF_DATE:N', tooltip=['REF_DATE:N']).transform_filter(alt.datum.REF_DATE == '2018-04')
US_grocery_df_time_05_12 = US_sales_groceries.copy()[(US_sales_groceries['REF_DATE']>='2018-05') & (US_sales_groceries['REF_DATE']<='2018-12') & (US_sales_groceries['Category'] == 'Groceries')]
Canada_grocery_df_time_05_12 = Sales_Canada_Groceries.copy()[(Sales_Canada_Groceries['REF_DATE']>='2018-05') & (Sales_Canada_Groceries['REF_DATE']<='2018-12') & (Sales_Canada_Groceries['Category'] == 'Supermarkets and other grocery retailers (except convenience retailers) [44511]')]
canada_groceries_retail_chart_05_12 = alt.Chart(Canada_grocery_df_time_05_12).mark_point().encode(x = alt.X('REF_DATE:N', axis =alt.Axis(tickCount = 5, labelFontSize = 15), title = 'Date'), y = alt.Y('VALUE', title = 'Sales of Groceries in Canada', scale = alt.Scale(domain = [6000, 8000])), color = 'Category').properties(title = 'Canada Groceries Retail Sales', height = 400, width= 700)
usa_groceries_retail_chart_05_12 = alt.Chart(US_grocery_df_time_05_12).mark_line().encode(x = alt.X('REF_DATE:N', axis =alt.Axis(tickCount = 5, labelFontSize = 15), title = 'Date'), y = alt.Y('VALUE', title = 'Sales of Groceries in USA'), color = 'Category').properties(title = 'USA Groceries Retail Sales', height = 400, width = 700)
US_grocery_df_time_10_05 = US_sales_groceries.copy()[(US_sales_groceries['REF_DATE']>='2017-10') & (US_sales_groceries['REF_DATE']<='2018-05') & (US_sales_groceries['Category'] == 'Groceries')]
Canada_grocery_df_time_10_05 = Sales_Canada_Groceries.copy()[(Sales_Canada_Groceries['REF_DATE']>='2017-10') & (Sales_Canada_Groceries['REF_DATE']<='2018-05') & (Sales_Canada_Groceries['Category'] == 'Supermarkets and other grocery retailers (except convenience retailers) [44511]')]
canada_groceries_retail_chart_10_05 = alt.Chart(Canada_grocery_df_time_10_05).mark_point().encode(x = alt.X('REF_DATE:N', axis =alt.Axis(tickCount = 5, labelFontSize = 15), title = 'Date'), y = alt.Y('VALUE', title = 'Sales of Groceries in Canada'), color = 'Category').properties(title = 'Canada Groceries Retail Sales', height = 400, width= 700)
usa_groceries_retail_chart_10_05 = alt.Chart(US_grocery_df_time_10_05).mark_line().encode(x = alt.X('REF_DATE:N', axis =alt.Axis(tickCount = 5, labelFontSize = 15), title = 'Date'), y = alt.Y('VALUE', title = 'Sales of Groceries in USA'), color = 'Category').properties(title = 'USA Groceries Retail Sales', height = 400, width = 700)
Canada_grocery_df_time['y'] = [7800]*len(Canada_grocery_df_time)
Canada_grocery_df_time['text_1'] = ['Second Tariff period']*len(Canada_grocery_df_time)
Canada_grocery_df_time['text_2'] = ['First Tariff period']*len(Canada_grocery_df_time)
US_grocery_df_time['y'] = [1]*len(US_grocery_df_time)
US_grocery_df_time['text_1'] = ['Second Tariff period']*len(US_grocery_df_time)
US_grocery_df_time['text_2'] = ['First Tariff period']*len(US_grocery_df_time)
mark_text_canada_groceries = alt.Chart(Canada_grocery_df_time).mark_text().encode(x = 'REF_DATE:N', y = alt.Y('y', scale = alt.Scale(domain = [6000, 8000])), text = 'text_1').transform_filter(alt.datum.REF_DATE == '2018-09')

mark_text_canada_groceries_2 = alt.Chart(Canada_grocery_df_time).mark_text().encode(x = 'REF_DATE:N', y = alt.Y('y', scale = alt.Scale(domain = [6000, 8000])), text = 'text_2').transform_filter(alt.datum.REF_DATE == '2018-03')
mark_text_us_groceries = alt.Chart(US_grocery_df_time).mark_text().encode(x = 'REF_DATE:N', y = 'y', text = 'text_1').transform_filter(alt.datum.REF_DATE == '2018-09')
mark_text_us_groceries_2 = alt.Chart(US_grocery_df_time).mark_text().encode(x = 'REF_DATE:N', y = 'y', text = 'text_2').transform_filter(alt.datum.REF_DATE == '2018-03')

display((mark_text_canada_groceries + mark_text_canada_groceries_2 + canada_groceries_retail_chart + mark_rule_canada_groceries + mark_rule_canada_groceries_2 + mark_rule_canada_groceries_3 + mark_rule_canada_groceries_4).configure_axis(grid=False))
display((mark_text_us_groceries + mark_text_us_groceries_2 + usa_groceries_retail_chart + mark_rule_US_groceries + mark_rule_US_groceries_2+ mark_rule_US_groceries_3 + mark_rule_US_groceries_4).configure_axis(grid=False))



['Retail trade [44-45]' 'Motor vehicle and parts dealers [441]'
 'Automobile dealers [4411]' 'New car dealers [44111]'
 'Used car dealers [44112]' 'Other motor vehicle dealers [4412]'
 'Automotive parts, accessories and tire retailers [4413]'
 'Building material and garden equipment and supplies dealers [444]'
 'Food and beverage retailers [445]'
 'Grocery and convenience retailers [4451]'
 'Supermarkets and other grocery retailers (except convenience retailers) [44511]'
 'Convenience retailers and vending machine operators [44513]'
 'Specialty food retailers [4452]'
 'Beer, wine and liquor retailers [4453]'
 'Furniture, home furnishings, electronics and appliances retailers [449]'
 'Furniture, floor covering, window treatment and other home furnishings retailers [4491]'
 'Furniture retailers [44911]'
 'Floor covering, window treatment and other home furnishing retailers [44912]'
 'Electronics and appliances retailers [4492]'
 'General merchandise retailers [455]'
 'Health and personal

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sales_groceries['REF_DATE'] = pd.to_datetime(US_sales_groceries['REF_DATE'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sales_groceries['REF_DATE'] = US_sales_groceries['REF_DATE'].dt.strftime('%Y-%m')


9
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.419
Model:                            OLS   Adj. R-squared:                  0.419
Method:                 Least Squares   F-statistic:                     279.8
Date:                Fri, 11 Apr 2025   Prob (F-statistic):          1.10e-101
Time:                        21:51:26   Log-Likelihood:                -145.24
No. Observations:                1296   AIC:                             296.5
Df Residuals:                    1293   BIC:                             312.0
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.4396      0.018     2

14
Product:  Eggs


5
Product:  Groceries


4
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.377
Model:                            OLS   Adj. R-squared:                  0.325
Method:                 Least Squares   F-statistic:                     2.802
Date:                Fri, 11 Apr 2025   Prob (F-statistic):             0.0806
Time:                        21:51:26   Log-Likelihood:               -0.67858
No. Observations:                  27   AIC:                             7.357
Df Residuals:                      24   BIC:                             11.24
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.4372      0.214      

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sales_groceries['VALUE'] = US_sales_groceries['VALUE'].astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sales_groceries['REF_DATE'] = pd.to_datetime(US_sales_groceries['REF_DATE'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_sales_groceries['REF_DATE'] = US_sales_groceries['

In [97]:
# display(((US_chart_clothing_trend.properties(width = 750, height = 450) | clothing_chart_clothing_US_trend.properties(width = 750, height = 450)) & (USA_leather.properties(width = 750, height = 450) | Apparel_Canada.properties(width = 750, height = 450)) & (chart_inventory_sales_US.properties(width = 750, height = 450) | chart_canada_sales_clothing.properties(width = 750, height = 450))).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'Clothing & Footwear Inflation and Supply and Demand', subtitle = 'The tariffs impact caused a rise in inflation in Canada which, in the post period seemed to affect Canada more than America, particularly in regards to manufacturing')))
# ((US_chart_clothing_trend | clothing_chart_clothing_US_trend) & (USA_leather | Apparel_Canada) & (chart_inventory_sales_US | chart_canada_sales_clothing)).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'Clothing & Footwear Inflation and Supply and Demand', subtitle = 'The tariffs impact caused a rise in inflation in Canada which, in the post period seemed to affect Canada more than America, particularly in regards to manufacturing'))

# display(((US_chart_clothing_trend.properties(width = 750, height = 450) | clothing_chart_clothing_US_trend.properties(width = 750, height = 450)) & (USA_leather.properties(width = 750, height = 450) | Apparel_Canada.properties(width = 750, height = 450)) & (chart_inventory_sales_US.properties(width = 750, height = 450) | chart_canada_sales_clothing.properties(width = 750, height = 450))).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'Clothing & Footwear Inflation and Supply and Demand', subtitle = 'The tariffs impact caused a rise in inflation in Canada which, in the post period seemed to affect Canada more than America, particularly in regards to manufacturing')))
display(US_chart_clothing_trend.properties(title = 'Clothing & Footwear Inflation in America'))

In [98]:
display(((US_Groceries_CPI.properties(width = 750, height = 450)|Canada_Groceries_CPI.properties(width = 750, height = 450))&(USA_groceries_supply_demand.properties(width = 750, height = 450) | Canada_groceries_supply_demand.properties(width = 750, height = 450)) &(usa_groceries_retail_chart.properties(width = 750, height = 450) | canada_groceries_retail_chart.properties(width = 750, height = 450))).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'US and Canada Groceries Inflation and Supply and Demand', subtitle = 'The tariffs caused a waves of inflation in groceries in both countries with food manufacturing being more affected in Canada than US')))
((USA_parallel_trends_groceries|Canada_parallel_trends_groceries)&(USA_groceries_supply_demand | Canada_groceries_supply_demand) &(usa_groceries_retail_chart | canada_groceries_retail_chart)).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'US and Canada Groceries Inflation and Supply and Demand', subtitle = 'The tariffs caused a waves of inflation in groceries in both countries with food manufacturing being more affected in Canada than US'))

display(((US_Groceries_CPI_05_12.properties(width = 750, height = 450)|Canada_Groceries_CPI_05_12.properties(width = 750, height = 450))&(USA_groceries_supply_demand_05_12.properties(width = 750, height = 450) | Canada_groceries_supply_demand_05_12.properties(width = 750, height = 450)) &(usa_groceries_retail_chart_05_12.properties(width = 750, height = 450) | canada_groceries_retail_chart_05_12.properties(width = 750, height = 450))).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'US and Canada Groceries Inflation and Supply and Demand', subtitle = 'The tariffs caused a waves of inflation in groceries in both countries with food manufacturing being more affected in Canada than US')))
display(((US_Groceries_CPI_10_05.properties(width = 750, height = 450)|Canada_Groceries_CPI_10_05.properties(width = 750, height = 450))&(USA_groceries_supply_demand_10_05.properties(width = 750, height = 450) | Canada_groceries_supply_demand_10_05.properties(width = 750, height = 450)) &(usa_groceries_retail_chart_10_05.properties(width = 750, height = 450) | canada_groceries_retail_chart_10_05.properties(width = 750, height = 450))).configure_axis(grid=False).properties(title = alt.TitleParams(text = 'US and Canada Groceries Inflation and Supply and Demand', subtitle = 'The tariffs caused a waves of inflation in groceries in both countries with food manufacturing being more affected in Canada than US')))


In [99]:
#It seems clothing and footwear have a seasonal impact for both countries but
#The american market seemed less impacted by the tariffs and while clothing did appear to reduce supply, the abundance of normal clothing perhaps was able to account for it. In fact it appears manufacturing was not completely impacted except for month to month during tariffs until near COVID period.
#The american clothing market inflation increased during the treatment period due to American tariffs which may have been beneficial for the clothing industry but this was reversed by Canadian tariffs.    
#The Canadian market for clothing and footwear was more impacted by the tariffs and the recovery was not as strong as the American market - the impact of the tariffs from Trump had an estimated effect in March 2019 due to the high supply of clothing already in retail stores. 
#In other words, manufacturing for clothing was affected by August 2018 but impact on retail stores was not until March 2019 and was very disrupted near COVID period.




In [100]:
#This ends the groceries and clothing/footwear section of our analysis.