In [2]:
import pandas as pd
import statsmodels.api as sm
import numpy as np

df = pd.read_stata('/Users/michalurdanivia/Library/CloudStorage/GoogleDrive-mw.urdanivia@gmail.com/Mon Drive/Guns/Guns.dta')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1173 entries, 0 to 1172
Data columns (total 13 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   year         1173 non-null   int8   
 1   vio          1173 non-null   float32
 2   mur          1173 non-null   float32
 3   rob          1173 non-null   float32
 4   incarc_rate  1173 non-null   int16  
 5   pb1064       1173 non-null   float32
 6   pw1064       1173 non-null   float32
 7   pm1029       1173 non-null   float32
 8   pop          1173 non-null   float32
 9   avginc       1173 non-null   float32
 10  density      1173 non-null   float32
 11  stateid      1173 non-null   int8   
 12  shall        1173 non-null   int8   
dtypes: float32(9), int16(1), int8(3)
memory usage: 56.1 KB


In [4]:
df.head()

Unnamed: 0,year,vio,mur,rob,incarc_rate,pb1064,pw1064,pm1029,pop,avginc,density,stateid,shall
0,77,414.399994,14.2,96.800003,83,8.384873,55.122906,18.174412,3.780403,9.563148,0.074552,1,0
1,78,419.100006,13.3,99.099998,94,8.352101,55.143665,17.99408,3.831838,9.932,0.075567,1,0
2,79,413.299988,13.2,109.5,144,8.329575,55.135857,17.839336,3.866248,9.877028,0.076245,1,0
3,80,448.5,13.2,132.100006,141,8.408386,54.912586,17.734198,3.900368,9.541428,0.076829,1,0
4,81,470.5,11.9,126.5,149,8.483435,54.925125,17.673716,3.918531,9.548351,0.077187,1,0


In [5]:
# 1.a MCO log(vio) sur shall
df['lvio'] = np.log(df['vio'])
m1 = sm.OLS(df['lvio'], sm.add_constant(df['shall']), missing = 'drop')
res1 = m1.fit(cov_type='HC0')
print(res1.summary())

                            OLS Regression Results                            
Dep. Variable:                   lvio   R-squared:                       0.087
Model:                            OLS   Adj. R-squared:                  0.086
Method:                 Least Squares   F-statistic:                     87.01
Date:                Wed, 26 Mar 2025   Prob (F-statistic):           5.22e-20
Time:                        10:52:50   Log-Likelihood:                -1097.7
No. Observations:                1173   AIC:                             2199.
Df Residuals:                    1171   BIC:                             2209.
Df Model:                           1                                         
Covariance Type:                  HC0                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const          6.1349      0.019    318.078      0.0

In [6]:
# 1.b MCO log(vio) sur shall, incarc_rate, density, avginc, pop, pb1064, pw1064 et pm1029.
m2 = sm.OLS(df['lvio'], sm.add_constant(
    df[['shall', 'incarc_rate', 'density', 'avginc', 'pop', 'pb1064', 'pw1064', 'pm1029']]), missing = 'drop')
res2 = m2.fit(cov_type='HC0')
print(res2.summary())


                            OLS Regression Results                            
Dep. Variable:                   lvio   R-squared:                       0.564
Model:                            OLS   Adj. R-squared:                  0.561
Method:                 Least Squares   F-statistic:                     96.40
Date:                Wed, 26 Mar 2025   Prob (F-statistic):          6.77e-123
Time:                        10:52:54   Log-Likelihood:                -663.61
No. Observations:                1173   AIC:                             1345.
Df Residuals:                    1164   BIC:                             1391.
Df Model:                           8                                         
Covariance Type:                  HC0                                         
                  coef    std err          z      P>|z|      [0.025      0.975]
-------------------------------------------------------------------------------
const           2.9817      0.607      4.915      

In [7]:
year = pd.Categorical(df.year)
df = df.set_index(['stateid', 'year'])
df['year'] = year


In [8]:
exog =  df[['shall', 'incarc_rate', 'density', 'avginc', 'pop', 'pb1064', 'pw1064', 'pm1029', 'year']]

In [9]:
from linearmodels.panel import RandomEffects
mod = RandomEffects(df.lvio, exog)
re_res = mod.fit()
print(re_res)

                        RandomEffects Estimation Summary                        
Dep. Variable:                   lvio   R-squared:                        0.8790
Estimator:              RandomEffects   R-squared (Between):              0.9936
No. Observations:                1173   R-squared (Within):               0.3834
Date:                Wed, Mar 26 2025   R-squared (Overall):              0.9931
Time:                        10:53:03   Log-likelihood                    624.09
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      276.77
Entities:                          51   P-value                           0.0000
Avg Obs:                       23.000   Distribution:                 F(30,1143)
Min Obs:                       23.000                                           
Max Obs:                       23.000   F-statistic (robust):             276.77
                            

In [10]:
from linearmodels.panel import PanelOLS
fe_mod = PanelOLS(df.lvio, exog, entity_effects=True)
fe_res = fe_mod.fit()
print(fe_res)

                          PanelOLS Estimation Summary                           
Dep. Variable:                   lvio   R-squared:                        0.4180
Estimator:                   PanelOLS   R-squared (Between):              0.6019
No. Observations:                1173   R-squared (Within):               0.4180
Date:                Wed, Mar 26 2025   R-squared (Overall):              0.6018
Time:                        10:53:06   Log-likelihood                    683.58
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      26.143
Entities:                          51   P-value                           0.0000
Avg Obs:                       23.000   Distribution:                 F(30,1092)
Min Obs:                       23.000                                           
Max Obs:                       23.000   F-statistic (robust):             26.143
                            

In [None]:
# Différences premières