# This file replicates the regressions in PSMJ

In [220]:
import pandas as pd
from statsmodels.iolib.summary2 import summary_col
from linearmodels import PanelOLS

#### Step 1: Loading the sample file

In [221]:
df = pd.read_parquet('../input/psmjsample.parquet')

#### Step 2: Set up regressions

In [222]:
dfsample = df[df['INSAMPLE']==1]

#### Step 3: create sample splits

In [223]:
# Only check receipients
df['ONLY CHECK'] = (df['EVER REBATE']==1) & (df['EVER EFT']==0)

# Only EFT receipients
df['ONLY EFT'] = (df['EVER REBATE']==1) & (df['EVER CHECK']==0)

#### Step 4: run regression

In [227]:
dfsample['INT_DATE_COPY'] = dfsample.index.get_level_values(1)

# we write all the diagnostics to text as well
with open('../output/psmjregressions.txt', 'w') as f:

    for sampleselec in ['INSAMPLE', 'EVER REBATE', 'ONLY CHECK', 'ONLY EFT']:

        resset = []

        for depvar in ['d_FOODBEVS','d_SNDEXP','d_NDEXP','d_TOTEXP2']:

            mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1]) 

            res = mod.fit(cov_type='clustered', cluster_entity=True)  #

            # some items are named differently for summary column to work
            res.bse = res.std_errors
            res.tvalues = res.tstats

            res.model.exog_names = res._var_names
            res.model.endog_names = depvar

            resset.append(res)

        # combine results in columns and print to text file
        results = summary_col(resset,stars=True,float_format='%0.3f',
                          info_dict={'N':lambda x: "{0:d}".format(int(x.nobs)),
                                     'R2':lambda x: "{:.2f}".format(x.rsquared)})

        print(results)
        f.write(str(sampleselec) + '\n')  
        f.write(str(results) + '\n\n\n') 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfsample['INT_DATE_COPY'] = dfsample.index.get_level_values(1)
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])



               d_FOODBEVS  d_SNDEXP   d_NDEXP   d_TOTEXP2
---------------------------------------------------------
RBTAMT         0.013      0.071      0.120**    0.518*** 
               (0.027)    (0.047)    (0.056)    (0.183)  
AGE            0.717**    -0.226     0.663      5.541**  
               (0.338)    (0.632)    (0.786)    (2.237)  
d_NUM_ADULTS   153.983*** 382.112*** 483.073*** 533.099  
               (55.560)   (102.290)  (115.729)  (387.096)
d_PERSLT18     40.545     98.683     109.682    -311.207 
               (45.817)   (83.660)   (103.169)  (347.452)
R-squared      0.001      0.002      0.002      0.001    
R-squared Adj. nan        nan        nan        nan      
N              17304      17304      17304      17304    
R2             0.00       0.00       0.00       0.00     
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])



               d_FOODBEVS  d_SNDEXP   d_NDEXP   d_TOTEXP2
---------------------------------------------------------
RBTAMT         0.047      0.146***   0.198***   0.685*** 
               (0.031)    (0.055)    (0.067)    (0.224)  
AGE            0.672*     -0.015     1.310      5.995**  
               (0.399)    (0.747)    (0.956)    (2.751)  
d_NUM_ADULTS   130.524**  312.442*** 451.549*** -85.991  
               (52.833)   (94.088)   (115.178)  (377.497)
d_PERSLT18     42.017     185.477*   154.781    -383.894 
               (62.174)   (105.882)  (131.756)  (444.790)
R-squared      0.001      0.002      0.003      0.001    
R-squared Adj. nan        nan        nan        nan      
N              11154      11154      11154      11154    
R2             0.00       0.00       0.00       0.00     
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])



               d_FOODBEVS  d_SNDEXP   d_NDEXP   d_TOTEXP2
---------------------------------------------------------
RBTAMT         0.027      0.148*     0.151      0.772*** 
               (0.043)    (0.083)    (0.097)    (0.265)  
AGE            0.706      0.288      0.749      6.616*   
               (0.527)    (0.939)    (1.203)    (3.402)  
d_NUM_ADULTS   183.586*** 394.446*** 439.842*** 356.268  
               (67.930)   (124.947)  (142.036)  (425.241)
d_PERSLT18     72.013     282.868**  223.711    -462.338 
               (78.638)   (141.322)  (165.620)  (601.904)
R-squared      0.002      0.004      0.003      0.002    
R-squared Adj. nan        nan        nan        nan      
N              6477       6477       6477       6477     
R2             0.00       0.00       0.00       0.00     
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])



               d_FOODBEVS  d_SNDEXP  d_NDEXP  d_TOTEXP2 
--------------------------------------------------------
RBTAMT         0.039      0.073     0.177*    0.528     
               (0.048)    (0.086)   (0.102)   (0.419)   
AGE            0.248      -1.076    1.611     0.521     
               (0.632)    (1.300)   (1.713)   (4.720)   
d_NUM_ADULTS   52.566     146.357   434.207** -1206.021*
               (89.419)   (149.142) (202.294) (684.209) 
d_PERSLT18     4.264      57.312    117.924   -325.430  
               (111.754)  (166.382) (234.599) (652.223) 
R-squared      0.000      0.001     0.002     0.002     
R-squared Adj. nan        nan       nan       nan       
N              4589       4589      4589      4589      
R2             0.00       0.00      0.00      0.00      
Standard errors in parentheses.
* p<.1, ** p<.05, ***p<.01


  mod = PanelOLS.from_formula(depvar + ' ~   RBTAMT + AGE + d_NUM_ADULTS + d_PERSLT18 + TimeEffects ', dfsample[df[sampleselec]==1])
