# PPML Estimation of Model

* Pseudo-Maximum-Likelihood not a common estimation method 
* requires specialist functions, which may be found in e.g. STATA

But we want Python here!

Use the GME package from the US International Trade Commission.

In [8]:
import pandas as pd
import numpy as np
import gme as gme
#loading the file and inspect data
url = 'https://www.dropbox.com/s/2uha8rwc8bngcsz/servicesdataset%202.xlsx?dl=1'
df = pd.read_excel(url)
print(df.shape)
print(df.head())


#drop null values from relevant columns in df 
trade_data = df[['exp','imp', 'trade', 'year','gdp_exp', 'gdp_imp', 'contig','comlang_off','distw','ent_cost_imp', 'ent_cost_exp', 'colony']]
trade_data = trade_data.dropna()


#include the accessibility column
trade_data['bilateral accessibility'] = np.exp(-np.log(trade_data['distw']))


#create EU dummy column
EU = ['AUT','BEL','CYP','CZE','DNK','EST','FIN','FRA','DEU','GRC','HUN','IRL','ITA','LVA','LTU','LUX','MLT','NLD','POL','SVK','SVN','ESP','SWE','GBR']
imp_EU = {}
exp_EU = {}
imp_countries = trade_data['imp'].tolist()
exp_countries = trade_data['exp'].tolist()
for country in imp_countries:
    if country in EU:
        imp_EU[country] = 1
        exp_EU[country] = 1
    else:
        imp_EU[country] = 0
        exp_EU[country] = 0
trade_data['imp_is_EU'] = trade_data['imp'].map(imp_EU)
trade_data['exp_is_EU'] = trade_data['exp'].map(exp_EU)
trade_data['between_EU'] = trade_data['imp_is_EU']*trade_data['exp_is_EU']
        
#include log GDP and log distance  
trade_data['log_gdp_exp'] = np.log(trade_data['gdp_exp'])
trade_data['log_gdp_imp'] = np.log(trade_data['gdp_imp'])
trade_data['log_distance'] = np.log(trade_data['distw'])
print(trade_data.head())

(31092, 25)
   exp  imp  year    trade sector  contig  comlang_off  comlang_ethno  colony  \
0  HUN  ABW  2005  0.00000    SER     0.0          0.0            0.0     0.0   
1  GBR  ABW  2005  0.00000    SER     0.0          0.0            1.0     0.0   
2  IRL  ABW  2005  0.00000    SER     0.0          0.0            1.0     0.0   
3  ITA  ABW  2005  4.97438    SER     0.0          0.0            0.0     0.0   
4  BEL  ABW  2005  2.48719    SER     0.0          1.0            1.0     0.0   

   comcol  ...     distw  distwces       gdp_exp  gdp_imp  etcr_exp  etcr_imp  \
0     0.0  ...  8940.499  8939.646  5.900000e+10      NaN  2.751628       NaN   
1     0.0  ...  7480.796  7479.964  1.700000e+12      NaN  0.940455       NaN   
2     0.0  ...  7123.465  7122.625  1.300000e+11      NaN  3.073227       NaN   
3     0.0  ...  8427.789  8422.343  1.100000e+12      NaN  2.036142       NaN   
4     0.0  ...  7843.255  7843.006  2.500000e+11      NaN  2.007156       NaN   

   ent_cost_im

In [10]:
#Step 1: Read the Data Frame into the Package
gme_data = gme.EstimationData(data_frame=trade_data,
                              imp_var_name='imp',
                              exp_var_name='exp',
                              trade_var_name='trade',
                              year_var_name='year')

In [11]:
#Step 2: Specify the Model
model_basic = gme.EstimationModel(estimation_data = gme_data,
                                         lhs_var = 'trade',
                                         rhs_var = ['log_gdp_exp',
                                                    'log_gdp_imp',
                                                    'log_distance',
                                                    'comlang_off',
                                                    'contig'
                                                    ])

In [12]:
#Step 3: Run the Regression and display the results
basic_estimates = model_basic.estimate()
results = basic_estimates['all']
print(results.summary())

select specification variables: ['log_gdp_exp', 'log_gdp_imp', 'log_distance', 'comlang_off', 'contig', 'trade', 'imp', 'exp', 'year'], Observations excluded by user: {'rows': 0, 'columns': 10}
drop_intratrade: no, Observations excluded by user: {'rows': 0, 'columns': 0}
drop_imp: none, Observations excluded by user: {'rows': 0, 'columns': 0}
drop_exp: none, Observations excluded by user: {'rows': 0, 'columns': 0}
keep_imp: all available, Observations excluded by user: {'rows': 0, 'columns': 0}
keep_exp: all available, Observations excluded by user: {'rows': 0, 'columns': 0}
drop_years: none, Observations excluded by user: {'rows': 0, 'columns': 0}
keep_years: all available, Observations excluded by user: {'rows': 0, 'columns': 0}
drop_missing: yes, Observations excluded by user: {'rows': 0, 'columns': 0}
Estimation began at 04:22 AM  on Feb 27, 2021
Omitted Columns: []
Estimation completed at 04:22 AM  on Feb 27, 2021
                 Generalized Linear Model Regression Results       



In [13]:
# Add Fixed Effects
model_fix = gme.EstimationModel(estimation_data = gme_data,
                                         lhs_var = 'trade',
                                         rhs_var = ['log_gdp_exp',
                                                    'log_gdp_imp',
                                                    'log_distance',
                                                    'comlang_off',
                                                    'contig'],
                                         fixed_effects = ['imp','exp'])
fix_estimates = model_fix.estimate()
results_fix = fix_estimates['all']
print(results_fix.summary())

select specification variables: ['log_gdp_exp', 'log_gdp_imp', 'log_distance', 'comlang_off', 'contig', 'trade', 'imp', 'exp', 'year'], Observations excluded by user: {'rows': 0, 'columns': 10}
drop_intratrade: no, Observations excluded by user: {'rows': 0, 'columns': 0}
drop_imp: none, Observations excluded by user: {'rows': 0, 'columns': 0}
drop_exp: none, Observations excluded by user: {'rows': 0, 'columns': 0}
keep_imp: all available, Observations excluded by user: {'rows': 0, 'columns': 0}
keep_exp: all available, Observations excluded by user: {'rows': 0, 'columns': 0}
drop_years: none, Observations excluded by user: {'rows': 0, 'columns': 0}
keep_years: all available, Observations excluded by user: {'rows': 0, 'columns': 0}
drop_missing: yes, Observations excluded by user: {'rows': 0, 'columns': 0}
Estimation began at 04:26 AM  on Feb 27, 2021
Omitted Columns: ['imp_fe_KIR', 'imp_fe_PLW', 'exp_fe_FSM', 'exp_fe_KIR', 'exp_fe_PLW', 'exp_fe_SLB', 'exp_fe_TON', 'imp_fe_ZWE', 'exp_fe



                 Generalized Linear Model Regression Results                  
Dep. Variable:                  trade   No. Iterations:                     11
Model:                            GLM   Df Residuals:                    24673
Model Family:                 Poisson   Df Model:                          320
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -2.4108e+06
Covariance Type:                  HC1   Deviance:                   4.7764e+06
No. Observations:               24994   Pearson chi2:                 8.10e+06
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
log_gdp_exp      2.2325      2.056      1.086      0.277      -1.797       6.262
log_gdp_imp     -1.9511      2.038     -0.957      0.338      -5.946       2.043
log_distance    -0.5856      0.058    -10.12

'''
Generalized Linear Model Regression Results for fixed effects

==============================================================================
Dep. Variable:                  trade   No. Iterations:                     11
Model:                            GLM   Df Residuals:                    24673
Model Family:                 Poisson   Df Model:                          320
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -2.4108e+06
Covariance Type:                  HC1   Deviance:                   4.7627e+06
No. Observations:               24994   Pearson chi2:                 8.13e+06

                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
log_gdp_exp      2.2325      2.056      1.086      0.277      -1.797       6.262
log_gdp_imp     -1.9511      2.038     -0.957      0.338      -5.946       2.043
log_distance    -0.5856      0.058    -10.124      0.000      -0.699      -0.472
comlang_off      0.3306      0.192      1.723      0.085      -0.046       0.707
contig           0.2247      0.179      1.256      0.209      -0.126       0.576
'''

Great success! Log Distance coefficient estimate is reasonably close to the one expected from the literature (-0.56)

## Tips for Our Successors

* Don't underestimate the importance of a solid lit review!

* Expect error messages, setbacks and results that don't make sense

* Python is a great tool, but not a specialist statistical software! Consider running PPML models in STATA

* Have fun and enjoy the process!