In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import sys
import altair as alt
sys.path.append('../src')
from models import plot_structure, plot_supply_and_demand, regression_discontinuity_model, differences_differences, plot_for_parallel_trends 

#categories for products and services gotten by LLMs
df_model_data_CAN = pd.read_csv('../data/processed/CAN_Categorized_Products_and_Services_NEW.csv')
df_model_data_USA = pd.read_csv('../data/processed/US_Categorized_Products_and_Services_NEW.csv')

#CPI data for Canada and USA to analyze Shelter inflation
df_Canada_CPI = pd.read_csv('../data/processed/Canada_CPI_Processed_2018_2019.csv')
df_USA_CPI = pd.read_csv('../data/processed/USA_CPI_Processed_2018_2019.csv')

print(df_model_data_CAN.head())
df_model_data_USA.head()

                                   Product_Service Category
0                                    Durable goods      ???
1                                            Goods      ???
2                             Household operations      ???
3  Household operations, furnishings and equipment      ???
4                        Housing (1986 definition)      ???


Unnamed: 0,Product_Service,Category
0,Airline fares,Transportation
1,Alcoholic beverages,Alcohol & Tobacco
2,Alcoholic beverages at home,Alcohol & Tobacco
3,All items,Uncategorized
4,All items less energy,Energy


In [3]:

#standardize and get scaled and non-scaled data for CPI data
df_USA_CPI_melt = pd.melt(df_USA_CPI, var_name = 'Products and product groups',value_name = 'VALUE', id_vars = 'REF_DATE')

df_Canada_CPI_Scaled = df_Canada_CPI.copy()
df_Canada_CPI_NonScaled = df_Canada_CPI.copy()

df_USA_CPI_Scaled = df_USA_CPI_melt.copy()
df_USA_CPI_NonScaled = df_USA_CPI_melt.copy()
for unique_product in df_Canada_CPI_Scaled['Products and product groups'].unique():
    stan_scale = StandardScaler()
    mask = df_Canada_CPI_Scaled['Products and product groups']==unique_product
    df_Canada_CPI_Scaled.loc[mask, "VALUE"] = stan_scale.fit_transform(df_Canada_CPI_Scaled.loc[mask, "VALUE"].values[:, None]).flatten()
for unique_product in df_USA_CPI_Scaled['Products and product groups'].unique():
    stan_scale = StandardScaler()
    mask = df_USA_CPI_Scaled['Products and product groups']==unique_product
    df_USA_CPI_Scaled.loc[mask, "VALUE"] = stan_scale.fit_transform(df_USA_CPI_Scaled.loc[mask, "VALUE"].values[:, None]).flatten()

In [4]:
#Canada and US datasets scaled and non-scaled and make category columns
dict_CAN = df_model_data_CAN.drop_duplicates(subset=['Product_Service']).set_index('Product_Service')['Category'].to_dict()
dict_USA = df_model_data_USA.drop_duplicates(subset=['Product_Service']).set_index('Product_Service')['Category'].to_dict()

df_Canada_CPI_Scaled['Category'] = df_Canada_CPI['Products and product groups'].map(dict_CAN)
df_Canada_CPI_NonScaled['Category'] = df_Canada_CPI['Products and product groups'].map(dict_CAN)
df_USA_CPI_Scaled['Category'] = df_USA_CPI_Scaled['Products and product groups'].map(dict_USA)
df_USA_CPI_NonScaled['Category'] = df_USA_CPI_NonScaled['Products and product groups'].map(dict_USA)


In [5]:
#make groupby dataframe get median of each category
df_Canada_CPI_Cat = df_Canada_CPI_Scaled.copy().drop(columns = ['Products and product groups'])
df_Canada_CPI_groupby = df_Canada_CPI_Cat.groupby(['REF_DATE', 'Category']).median().reset_index()

df_Canada_CPI_Cat_NonScaled = df_Canada_CPI_NonScaled.copy().drop(columns = ['Products and product groups'])
df_Canada_CPI_groupby_NonScaled = df_Canada_CPI_Cat_NonScaled.groupby(['REF_DATE', 'Category']).median().reset_index()

df_USA_CPI_Cat = df_USA_CPI_Scaled.copy().drop(columns = ['Products and product groups'])
df_USA_CPI_groupby = df_USA_CPI_Cat.groupby(['REF_DATE', 'Category']).median().reset_index()

df_USA_CPI_Cat_NonScaled = df_USA_CPI_NonScaled.copy().drop(columns = ['Products and product groups'])
df_USA_CPI_groupby_NonScaled = df_USA_CPI_Cat_NonScaled.groupby(['REF_DATE', 'Category']).median().reset_index()

In [6]:
#look at all potential categories
for column in df_Canada_CPI_groupby['Category'].unique():
    print(column)

???
Alcohol & Tobacco
Appliances & Equipment
Clothing & Footwear Groups
Clothing and footwear
Communication
Education & Reading
Energy
Furniture & Home Decor
Gardening
Groceries
Health & Personal Care
Household Supplies
Housing & Utilities
Housing, Rent, Tax  & Insurance
Recreation & Entertainment
Services
Shelter
Transportation
Vehicles


In [7]:
#look at plot Shelter which was more impacted and compare to Education & Reading which was less impacted by tariffs
plot_for_parallel_trends(df_Canada_CPI_groupby, '2018-01-01', '2018-11-01', 'Shelter', 'Education & Reading')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [8]:
#look at china tariffs for differences in differences to get a better idea for one month
differences_differences(df_Canada_CPI_groupby, 'Shelter', 'Education & Reading', '2018-07-01', '2018-09-01', '2018-08-01')

  warn("omni_normtest is not valid with less than 8 observations; %i "


0,1,2,3
Dep. Variable:,VALUE,R-squared:,0.843
Model:,OLS,Adj. R-squared:,0.608
Method:,Least Squares,F-statistic:,6.207
Date:,"Fri, 04 Apr 2025",Prob (F-statistic):,0.142
Time:,20:52:17,Log-Likelihood:,-2.959
No. Observations:,6,AIC:,13.92
Df Residuals:,2,BIC:,13.09
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.1469,0.879,-1.304,0.192,-2.870,0.576
tariff_non_tariffed,1.3470,0.971,1.388,0.165,-0.555,3.249
post,1.5673,2.887,0.543,0.587,-4.091,7.226
tariff_non_tariffed:post,-0.2942,2.916,-0.101,0.920,-6.010,5.422

0,1,2,3
Omnibus:,,Durbin-Watson:,1.973
Prob(Omnibus):,,Jarque-Bera (JB):,0.195
Skew:,0.0,Prob(JB):,0.907
Kurtosis:,2.117,Cond. No.,6.32


In [9]:
#It seems like the groupby shows some interaction 
#let us investigate into specific Shelter categories to see which categories may have been impacted by the tariffs

In [10]:

shelter_items = df_Canada_CPI_NonScaled[(df_Canada_CPI_NonScaled['Category'] == 'Shelter')]['Products and product groups'].unique()

shelter_dataframe = df_Canada_CPI_NonScaled[(df_Canada_CPI_NonScaled['Category'] == 'Shelter') | (df_Canada_CPI_NonScaled['Category'] == 'Education & Reading')]


dict_shelter = shelter_categories = {
    "Homeowners' home and mortgage insurance": 'Owned accommodation',
    "Homeowners' maintenance and repairs": 'Owned accommodation',
    "Homeowners' replacement cost": 'Owned accommodation',
    "Mortgage interest cost": 'Owned accommodation',
    "Other owned accommodation expenses": 'Owned accommodation',
    "Property taxes and other special charges": 'Owned accommodation',
    "Rent": 'Rented accommodation',
    "Tenants' insurance premiums": 'Rented accommodation',
    "Tenants' maintenance, repairs and other expenses": 'Rented accommodation',
    "Electricity": 'Water, fuel and electricity',
    "Fuel oil and other fuels": 'Water, fuel and electricity',
    "Natural gas": 'Water, fuel and electricity',
    "Water": 'Water, fuel and electricity',
    'Books and reading material (excluding textbooks)':'Books, Magazines & Newspaper',
    'Education':'Education',					
    'Education and reading':'Education',				
    'Magazines and periodicals':'Books, Magazines & Newspaper',			
    'Newspapers':'Books, Magazines & Newspaper',					
    'Reading material (excluding textbooks)':'Books, Magazines & Newspaper',		
    'Recreation, education and reading':'Education',		
    'School textbooks and supplies':'Books, Magazines & Newspaper',			
    'Tuition fees':'Education'
}

dict_shelter_US= {
    'Owned accommodation': [
        'Fuels and utilities', 'Household energy', 'Housing', 'Housing at school, excluding board',
        "Owners' equivalent rent of primary residence", "Owners' equivalent rent of residences"
    ],
    'Rented accommodation': [
        'Rent of primary residence', 'Rent of shelter', 'Services less rent of shelter'
    ],
    'Water, fuel and electricity': [
        'Utility (piped) gas service', 'Water and sewer and trash collection services','Water and sewerage maintenance'
    ]
}


US_shelter_dataset = df_USA_CPI_NonScaled[(df_USA_CPI_NonScaled['Category'] == 'Shelter') | (df_USA_CPI_NonScaled['Category'] == 'Education & Reading')]

print(US_shelter_dataset['Products and product groups'].unique())

US_shelter_dataset['Category'] = US_shelter_dataset['Category'].map(dict_shelter_US)



['College tuition and fees' 'Day care and preschool' 'Education'
 'Electricity' 'Elementary and high school tuition and fees'
 'Fuels and utilities' 'Household energy' 'Housing'
 'Housing at school, excluding board'
 "Owners' equivalent rent of primary residence"
 "Owners' equivalent rent of residences" 'Rent of primary residence'
 'Rent of shelter' 'Services less rent of shelter'
 'Stationery, stationery supplies, gift wrap'
 'Tuition, other school fees, and childcare' 'Utility (piped) gas service'
 'Water and sewer and trash collection services'
 'Water and sewerage maintenance']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  US_shelter_dataset['Category'] = US_shelter_dataset['Category'].map(dict_shelter_US)


In [11]:
#make grocery categories within the grocery category dataframe
shelter_dataframe['Category'] = shelter_dataframe['Products and product groups'].map(dict_shelter)

shelter_dataframe.drop(columns = ['Products and product groups'], inplace=True)
print(shelter_dataframe['Category'].unique())
grocery_dataframe_groupby = shelter_dataframe.groupby(['REF_DATE', 'Category']).mean().reset_index()

['Books, Magazines & Newspaper' 'Education' 'Water, fuel and electricity'
 'Owned accommodation' 'Rented accommodation']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shelter_dataframe['Category'] = shelter_dataframe['Products and product groups'].map(dict_shelter)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shelter_dataframe.drop(columns = ['Products and product groups'], inplace=True)


In [12]:
model_Canada_RC_shelter_first_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-10-01', '2018-06-01', '2018-03-01', '2018-04-01')

Product:  Shelter


5
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.646
Model:                            OLS   Adj. R-squared:                  0.527
Method:                 Least Squares   F-statistic:                     3.444
Date:                Fri, 04 Apr 2025   Prob (F-statistic):              0.101
Time:                        20:52:17   Log-Likelihood:               -0.20393
No. Observations:                   9   AIC:                             6.408
Df Residuals:                       6   BIC:                             7.000
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.6366      0.231      

  res = hypotest_fun_out(*samples, **kwds)


In [13]:

model_Canada_RC_shelter_first_tariff.summary()

  res = hypotest_fun_out(*samples, **kwds)


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.313
Model:,OLS,Adj. R-squared:,-0.1
Method:,Least Squares,F-statistic:,0.6438
Date:,"Fri, 04 Apr 2025",Prob (F-statistic):,0.619
Time:,20:52:17,Log-Likelihood:,10.98
No. Observations:,9,AIC:,-13.96
Df Residuals:,5,BIC:,-13.17
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0131,0.049,-0.269,0.788,-0.109,0.082
PredTreatment,0.0924,0.171,0.540,0.589,-0.243,0.428
Num_Date,-0.0196,0.015,-1.343,0.179,-0.048,0.009
PredTreatment:Num_Date,-0.0159,0.098,-0.162,0.871,-0.208,0.176

0,1,2,3
Omnibus:,1.094,Durbin-Watson:,2.211
Prob(Omnibus):,0.579,Jarque-Bera (JB):,0.629
Skew:,-0.035,Prob(JB):,0.73
Kurtosis:,1.707,Cond. No.,9.53


In [14]:
model_Canada_RC_shelter_second_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2018-05-01', '2018-12-01', '2018-07-01', '2018-10-01')

Product:  Shelter


2
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.778
Model:                            OLS   Adj. R-squared:                  0.689
Method:                 Least Squares   F-statistic:                     8.231
Date:                Fri, 04 Apr 2025   Prob (F-statistic):             0.0262
Time:                        20:52:17   Log-Likelihood:                0.20998
No. Observations:                   8   AIC:                             5.580
Df Residuals:                       5   BIC:                             5.818
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5556      0.161      

  res = hypotest_fun_out(*samples, **kwds)


In [15]:
model_Canada_RC_shelter_second_tariff.summary()

  res = hypotest_fun_out(*samples, **kwds)


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.897
Model:,OLS,Adj. R-squared:,0.819
Method:,Least Squares,F-statistic:,2.115
Date:,"Fri, 04 Apr 2025",Prob (F-statistic):,0.241
Time:,20:52:17,Log-Likelihood:,17.349
No. Observations:,8,AIC:,-26.7
Df Residuals:,4,BIC:,-26.38
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0990,0.096,-1.029,0.303,-0.287,0.089
PredTreatment,0.1640,0.188,0.874,0.382,-0.204,0.532
Num_Date,-0.0002,0.023,-0.010,0.992,-0.046,0.045
PredTreatment:Num_Date,0.0079,0.072,0.109,0.913,-0.134,0.150

0,1,2,3
Omnibus:,2.258,Durbin-Watson:,2.031
Prob(Omnibus):,0.323,Jarque-Bera (JB):,0.786
Skew:,0.014,Prob(JB):,0.675
Kurtosis:,1.465,Cond. No.,19.4


In [16]:
first_tariff_vs_second_tariff, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-10-01', '2018-12-01', '2018-05-01')

first_tariff_vs_second_tariff.summary()

Product:  Shelter


7


  res = hypotest_fun_out(*samples, **kwds)


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.145
Model:,OLS,Adj. R-squared:,-0.088
Method:,Least Squares,F-statistic:,0.4071
Date:,"Fri, 04 Apr 2025",Prob (F-statistic):,0.751
Time:,20:52:17,Log-Likelihood:,16.296
No. Observations:,15,AIC:,-24.59
Df Residuals:,11,BIC:,-21.76
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.0236,0.080,-0.295,0.768,-0.180,0.133
above_or_below,0.0704,0.112,0.628,0.530,-0.149,0.290
Num_Date,-0.0156,0.018,-0.870,0.384,-0.051,0.020
above_or_below:Num_Date,0.0044,0.029,0.152,0.879,-0.052,0.061

0,1,2,3
Omnibus:,2.549,Durbin-Watson:,1.847
Prob(Omnibus):,0.28,Jarque-Bera (JB):,1.874
Skew:,0.73,Prob(JB):,0.392
Kurtosis:,2.069,Cond. No.,22.2


In [17]:
#it seems like american tariffs had little immediate impact on Canadian shelter when looking at manufacturing even the only month affected was the China tariffs in August as mentioned supply was still high - it is hard to disentangle the impact of the tariffs and the news so perhaps we should look for a more long term approach to gain more insight to what happened with groceries


Canada_parallel_trends_shelter = plot_for_parallel_trends(df_Canada_CPI_groupby, '2017-01-01', '2019-10-01', 'Shelter', 'Education & Reading')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [18]:
#it is hard to quantify the impact of shelter based on specific tariffs due to the serial correlation of tariffs but looking from the start of the breakdown in China talks to the Chinese hike tax we will look at RD between these periods and see if we can draw conclusions
#there does seem to be greater variance from the start of the Intellectual Property Trials to the Chinese hike in tariffs that started propagating like a wave with each tariff with minimal activity following the results of the IP investigation - let us investigate further
model_shelter_longterm_IP, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2017-01-01', '2018-04-01', '2017-08-01', heteroskedasticity = 'nonrobust')

model_shelter_longterm_IP.summary()

Product:  Shelter


6


  res = hypotest_fun_out(*samples, **kwds)


0,1,2,3
Dep. Variable:,VALUE_DETREND,R-squared:,0.232
Model:,OLS,Adj. R-squared:,0.023
Method:,Least Squares,F-statistic:,1.11
Date:,"Fri, 04 Apr 2025",Prob (F-statistic):,0.387
Time:,20:52:17,Log-Likelihood:,17.731
No. Observations:,15,AIC:,-27.46
Df Residuals:,11,BIC:,-24.63
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,-0.0024,0.059,-0.040,0.968,-0.132,0.128
above_or_below,0.0629,0.090,0.701,0.498,-0.134,0.260
Num_Date,0.0085,0.016,0.522,0.612,-0.027,0.045
above_or_below:Num_Date,-0.0109,0.021,-0.515,0.617,-0.057,0.036

0,1,2,3
Omnibus:,0.599,Durbin-Watson:,2.306
Prob(Omnibus):,0.741,Jarque-Bera (JB):,0.561
Skew:,-0.384,Prob(JB):,0.755
Kurtosis:,2.446,Cond. No.,25.0


In [19]:
#Next lets compare with RD the tariff / IP trails period to the Chinese hike in tariffs
model_groceries_longterm_recovery, _, _, _ = regression_discontinuity_model(df_Canada_CPI_groupby, 'Shelter', '2018-02-01', '2019-05-01', '2018-11-01', '2019-01-01')

print(model_groceries_longterm_recovery.summary())



Product:  Shelter


9
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.582
Model:                            OLS   Adj. R-squared:                  0.518
Method:                 Least Squares   F-statistic:                     5.387
Date:                Fri, 04 Apr 2025   Prob (F-statistic):             0.0198
Time:                        20:52:17   Log-Likelihood:               -0.66959
No. Observations:                  16   AIC:                             7.339
Df Residuals:                      13   BIC:                             9.657
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5693      0.173      

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


In [20]:
#let us look at what happened in American shelter due to the tariffs and compare the effect of the tariffs on shelter for US tariffs vs Canadian tariffs
#they appear to have a similar effect on American goods

plot_structure(df_USA_CPI_groupby, 'Shelter', '2017-01-01', '2020-02-01')



Product:  Shelter


In [21]:
#It appears that the American shelter dataset follows the Canadian with waves except the breakdown in china talks led to a decrease in inflation for Canadian shelter but an increase for American shelter
# We will look at the IP investigation later but it appears trumps tariffs on Canadian goods had only a small impact on American shelter but the combination of Canadian announcement of tariffs, canadian tariffs and chinese tariffs on American exported goods may have had a larger impact, let us investigate further
model_USA_RC_shelter_cutoff_first_tariff, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2017-10-01', '2018-06-01', '2018-03-01', '2018-04-01')

print(model_USA_RC_shelter_cutoff_first_tariff.summary())
model_USA_RC_shelter_cutoff_second_tariff, _, _, _= regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2018-05-01', '2018-12-01', '2018-07-01', '2018-10-01')

print(model_USA_RC_shelter_cutoff_second_tariff.summary())

model_longterm_impact_shelter, _, _, _ = regression_discontinuity_model(df_USA_CPI_groupby, 'Shelter', '2017-10-01', '2019-12-01', '2018-02-01', '2018-04-01')

print(model_longterm_impact_shelter.summary())



Product:  Shelter


  res = hypotest_fun_out(*samples, **kwds)


5
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.646
Model:                            OLS   Adj. R-squared:                  0.527
Method:                 Least Squares   F-statistic:                     3.444
Date:                Fri, 04 Apr 2025   Prob (F-statistic):              0.101
Time:                        20:52:17   Log-Likelihood:               -0.20393
No. Observations:                   9   AIC:                             6.408
Df Residuals:                       6   BIC:                             7.000
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.6366      0.231      

  res = hypotest_fun_out(*samples, **kwds)


2
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.778
Model:                            OLS   Adj. R-squared:                  0.689
Method:                 Least Squares   F-statistic:                     8.231
Date:                Fri, 04 Apr 2025   Prob (F-statistic):             0.0262
Time:                        20:52:17   Log-Likelihood:                0.20998
No. Observations:                   8   AIC:                             5.580
Df Residuals:                       5   BIC:                             5.818
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.5556      0.161      

  res = hypotest_fun_out(*samples, **kwds)
  res = hypotest_fun_out(*samples, **kwds)


4
                            OLS Regression Results                            
Dep. Variable:                  treat   R-squared:                       0.377
Model:                            OLS   Adj. R-squared:                  0.325
Method:                 Least Squares   F-statistic:                     2.802
Date:                Fri, 04 Apr 2025   Prob (F-statistic):             0.0806
Time:                        20:52:17   Log-Likelihood:               -0.67858
No. Observations:                  27   AIC:                             7.357
Df Residuals:                      24   BIC:                             11.24
Df Model:                           2                                         
Covariance Type:                  HC3                                         
                     coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
Intercept          0.4372      0.214      

In [22]:
print(df_USA_CPI_groupby['Category'].unique())
USA_parallel_trends_shelter = plot_for_parallel_trends(df_USA_CPI_groupby, '2017-01-01', '2019-12-01', 'Shelter', 'Education & Reading', category_3 = None)

USA_parallel_trends_shelter

['Alcohol & Tobacco' 'Appliances & Equipment' 'Clothing and footwear'
 'Communication' 'Education & Reading' 'Energy' 'Furniture & Home Decor'
 'Groceries' 'Health & Personal Care' 'Household Supplies'
 'Housing & Utilities' 'Housing, Rent, Tax & Insurance'
 'Recreation & Entertainment' 'Services' 'Shelter' 'Shelter group'
 'Transportation' 'Uncategorized' 'Vehicles']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'] = scale.fit_transform(df_Canada_CPI_Scaled_US_on_Canada_Tariffs_1['VALUE'].values.reshape(-1, 1))


In [23]:
differences_differences(df_USA_CPI_Scaled, 'Shelter', 'Education & Reading', '2018-03-01', '2018-05-01', '2018-04-01')

0,1,2,3
Dep. Variable:,VALUE,R-squared:,0.148
Model:,OLS,Adj. R-squared:,0.1
Method:,Least Squares,F-statistic:,5.674
Date:,"Fri, 04 Apr 2025",Prob (F-statistic):,0.00191
Time:,20:52:17,Log-Likelihood:,-76.305
No. Observations:,57,AIC:,160.6
Df Residuals:,53,BIC:,168.8
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-0.6171,0.109,-5.678,0.000,-0.830,-0.404
tariff_non_tariffed,0.7844,0.251,3.131,0.002,0.293,1.275
post,0.2268,0.232,0.979,0.328,-0.227,0.681
tariff_non_tariffed:post,0.0211,0.451,0.047,0.963,-0.863,0.905

0,1,2,3
Omnibus:,25.463,Durbin-Watson:,0.743
Prob(Omnibus):,0.0,Jarque-Bera (JB):,37.357
Skew:,1.706,Prob(JB):,7.73e-09
Kurtosis:,5.023,Cond. No.,8.02


In [24]:
differences_differences(df_USA_CPI_groupby, 'Shelter', 'Education & Reading', '2018-03-01', '2018-05-01', '2018-04-01')

  warn("omni_normtest is not valid with less than 8 observations; %i "


0,1,2,3
Dep. Variable:,VALUE,R-squared:,0.938
Model:,OLS,Adj. R-squared:,0.845
Method:,Least Squares,F-statistic:,11.0
Date:,"Fri, 04 Apr 2025",Prob (F-statistic):,0.0845
Time:,20:52:17,Log-Likelihood:,-0.16974
No. Observations:,6,AIC:,8.339
Df Residuals:,2,BIC:,7.507
Df Model:,3,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.1501,0.350,-3.287,0.001,-1.836,-0.464
tariff_non_tariffed,1.5558,0.610,2.552,0.011,0.361,2.751
post,1.0153,2.896,0.351,0.726,-4.661,6.692
tariff_non_tariffed:post,0.2024,2.958,0.068,0.945,-5.595,6.000

0,1,2,3
Omnibus:,,Durbin-Watson:,1.365
Prob(Omnibus):,,Jarque-Bera (JB):,0.439
Skew:,0.0,Prob(JB):,0.803
Kurtosis:,1.675,Cond. No.,6.32
