## Appendix Table 3

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as sm
import numpy as np 

%matplotlib inline

First step is to load the data set into a pandas dataframe

In [2]:
dat = pd.read_csv('Indian2000Data.csv', index_col=0)

This should be all data required, so now we move onto the fun stuff!

## Recreate Table 3 In Appendix

Change dataframe in some pretty sketchy ways

In [3]:
dat['pcinc_log'] = np.log(dat['pcinc'])
dat['pcinc_co_log'] = np.log(dat['pcinc_co'])

#### Apply Model Based on Table Columns

##### Column 1 -- Confirmed

In [4]:
res = sm.ols(formula='pcinc_log ~ FC + HC', data=dat).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              pcinc_log   R-squared:                       0.212
Model:                            OLS   Adj. R-squared:                  0.203
Method:                 Least Squares   F-statistic:                     24.03
Date:                Tue, 27 Dec 2016   Prob (F-statistic):           5.69e-10
Time:                        22:35:00   Log-Likelihood:                -60.328
No. Observations:                 182   AIC:                             126.7
Df Residuals:                     179   BIC:                             136.3
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      9.3373      0.041    227.652      0.0

##### Column 2 -- Confirmed

In [5]:
res = sm.ols(formula='pcinc_log ~ FC + HC + pcinc_co_log + unempl_co + logdist + logruggedness + logresarea_sqkm', data=dat).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              pcinc_log   R-squared:                       0.360
Model:                            OLS   Adj. R-squared:                  0.334
Method:                 Least Squares   F-statistic:                     13.95
Date:                Tue, 27 Dec 2016   Prob (F-statistic):           2.66e-14
Time:                        22:35:01   Log-Likelihood:                -41.423
No. Observations:                 182   AIC:                             98.85
Df Residuals:                     174   BIC:                             124.5
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [95.0% Conf. Int.]
-----------------------------------------------------------------------------------
Intercept           6.1222      2.157     

##### Column 3 -- values off

In [6]:
res = sm.ols(formula='pcinc_log ~ FC + HC + ea_v5 + ea_v30 + ea_v32 + ea_v66', data=dat).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              pcinc_log   R-squared:                       0.251
Model:                            OLS   Adj. R-squared:                  0.225
Method:                 Least Squares   F-statistic:                     9.780
Date:                Tue, 27 Dec 2016   Prob (F-statistic):           2.74e-09
Time:                        22:35:01   Log-Likelihood:                -55.657
No. Observations:                 182   AIC:                             125.3
Df Residuals:                     175   BIC:                             147.7
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      9.3351      0.145     64.552      0.0

##### Column 4 -- Confirmed

In [7]:
dat['logpopsq'] = dat['logpop']**2
res = sm.ols(formula='pcinc_log ~ FC + HC + logpop + logpopsq + popadultshare + casino', data=dat).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              pcinc_log   R-squared:                       0.328
Model:                            OLS   Adj. R-squared:                  0.304
Method:                 Least Squares   F-statistic:                     14.21
Date:                Tue, 27 Dec 2016   Prob (F-statistic):           3.72e-13
Time:                        22:35:01   Log-Likelihood:                -45.865
No. Observations:                 182   AIC:                             105.7
Df Residuals:                     175   BIC:                             128.2
Df Model:                           6                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [95.0% Conf. Int.]
---------------------------------------------------------------------------------
Intercept         8.6654      0.628     13.793

##### Column 5 -- Nope

In [8]:
res = sm.ols(formula='pcinc_log ~ FC + HC', data=dat).fit()
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:              pcinc_log   R-squared:                       0.212
Model:                            OLS   Adj. R-squared:                  0.203
Method:                 Least Squares   F-statistic:                     24.03
Date:                Tue, 27 Dec 2016   Prob (F-statistic):           5.69e-10
Time:                        22:35:01   Log-Likelihood:                -60.328
No. Observations:                 182   AIC:                             126.7
Df Residuals:                     179   BIC:                             136.3
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------
Intercept      9.3373      0.041    227.652      0.0

### Export Any Changes to dat

In [9]:
dat.to_pickle('indian2000.df')