In [1]:
import pandas
import numpy
from scipy.stats import chi2, f
import statsmodels.api as sm
import warnings
from statsmodels.tools.sm_exceptions import ValueWarning
warnings.simplefilter('ignore',ValueWarning)

## Hybrid sample

In [2]:
filepath = 'D:/BG/Data/Processing/4_data.txt'
data = pandas.read_csv(filepath, sep='\t')
print(data.shape[0],'job postings in full sample')
hybrid = data[data.plant_5==0]
print(hybrid.shape[0],'job postings in hybrid sample')

3091485 job postings in full sample
212822 job postings in hybrid sample


## Preparing data 
### (only model C is needed)

In [3]:
df = hybrid.copy()
df.reset_index(inplace=True,drop=True)
measures = ['Engineering Skills','Operations Skills','Support Skills','General Skills','Job Complexity']
df['Engineering Skills'] = df['research']+df['design']+df['materials']+df['development']
df['Operations Skills'] = df['tools']+df['inventory']+df['production']
df['Support Skills'] = df['business']+df['finance']+df['management']+df['analysis']+df['customer']+df['office']+df['software']
df['General Skills'] = df['cognitive']+df['social']
df['Job Complexity'] = df['complexity']
Z = df['plant'].tolist()
df['Year'] = pandas.to_datetime(df.JobDate).dt.year
df.loc[df.Year<2017,'Period'] = 0
df.loc[df.Year>2016,'Period'] = 1
X = df[['TECH','occupation','plant','Period']]
X = pandas.get_dummies(X,columns=['TECH','occupation','plant'])
X = X.drop(['TECH_TM','occupation_Operator','plant_United Technolo East Hartford'],axis=1)
X = X.rename(columns={'occupation_Manager':'Manager','occupation_Engineer':'Engineer','occupation_Technician':'Technician','TECH_AM':'TECH'})
occ = ['Manager','Engineer','Technician']
op = []
ao = []
apo = []
for o in occ:
  X['TECH*'+o] = X['TECH']*X[o]
  X[o+'*Period'] = X[o]*X['Period']
  X['TECH*Period*'+o] = X['TECH']*X['Period']*X[o]
  op.append(o+'*Period')
  ao.append('TECH*'+o)
  apo.append('TECH*Period*'+o)
X['TECH*Period'] = X['TECH'] * X['Period']
X['Intercept'] = 1
listC = ['TECH'] + occ + ['Period'] + ao + ['TECH*Period'] + op + apo + ['Intercept']
all_variables = listC + (X.columns.drop(listC).tolist())
XC = X[all_variables]
print(XC.shape)
print(XC.columns[0:25])

(212822, 334)
Index(['TECH', 'Manager', 'Engineer', 'Technician', 'Period', 'TECH*Manager',
       'TECH*Engineer', 'TECH*Technician', 'TECH*Period', 'Manager*Period',
       'Engineer*Period', 'Technician*Period', 'TECH*Period*Manager',
       'TECH*Period*Engineer', 'TECH*Period*Technician', 'Intercept',
       'plant_3m Odessa', 'plant_3m Saint Paul', 'plant_Aerojet Jupiter',
       'plant_Aerojet Los Angeles', 'plant_Aerospace El Segundo',
       'plant_Aerovironment Boston', 'plant_Aerovironment Burlington',
       'plant_Aerovironment Simi Valley', 'plant_Alcoa Austin'],
      dtype='object')


## Poisson Regression

In [8]:
SC = []
n = XC.shape[0]
for m in measures:
  regC = sm.Poisson(df[m],XC).fit(method='cg',maxiter=1000,skip_hessian=True,cov_type='cluster',cov_kwds={'groups':Z})
  print(regC.summary())
  for o in occ:
    e = numpy.exp(regC.params['TECH']+regC.params['TECH*'+o])
    s = e - 1
    print(s)
    v = e**2*numpy.array([[1,1]])@regC.cov_params().loc[['TECH','TECH*'+o],['TECH','TECH*'+o]]@numpy.array([[1,1]]).T
    a = s**2/v.to_numpy()
    print(a)
    SC.append([m,o,'2014-2016',s,1-f.cdf(a[0][0],1,n-1-1)]) 
    e = numpy.exp(regC.params['TECH']+regC.params['TECH*Period']+regC.params['TECH*'+o]+regC.params['TECH*Period*'+o])
    s = e - 1
    print(s)
    v = e**2*numpy.array([[1,1,1,1]])@regC.cov_params().loc[['TECH','TECH*Period','TECH*'+o,'TECH*Period*'+o],['TECH','TECH*Period','TECH*'+o,'TECH*Period*'+o]]@numpy.array([[1,1,1,1]]).T
    a = s**2/v.to_numpy()
    print(a)
    SC.append([m,o,'2017-2019',s,1-f.cdf(a[0][0],1,n-1-1)])
  e = numpy.exp(regC.params['TECH'])
  s = e - 1
  v = e**2*numpy.array([[1]])@regC.cov_params().loc[['TECH'],['TECH']]@numpy.array([[1]]).T
  a = s**2/v.to_numpy()
  SC.append([m,'Operator','2014-2016',s,1-f.cdf(a[0][0],1,n-1-1)])
  e = numpy.exp(regC.params['TECH']+regC.params['TECH*Period'])
  s = e - 1
  v = e**2*numpy.array([[1,1]])@regC.cov_params().loc[['TECH','TECH*Period'],['TECH','TECH*Period']]@numpy.array([[1,1]]).T
  a = s**2/v.to_numpy()
  SC.append([m,'Operator','2017-2019',s,1-f.cdf(a[0][0],1,n-1-1)])

Optimization terminated successfully.
         Current function value: 1.189779
         Iterations: 778
         Function evaluations: 1424
         Gradient evaluations: 1424
                          Poisson Regression Results                          
Dep. Variable:     Engineering Skills   No. Observations:               212822
Model:                        Poisson   Df Residuals:                   212488
Method:                           MLE   Df Model:                          333
Date:                Tue, 18 May 2021   Pseudo R-squ.:                  0.1693
Time:                        14:40:32   Log-Likelihood:            -2.5321e+05
converged:                       True   LL-Null:                   -3.0480e+05
Covariance Type:              cluster   LLR p-value:                     0.000
                                                   coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------

Optimization terminated successfully.
         Current function value: 0.771310
         Iterations: 536
         Function evaluations: 1361
         Gradient evaluations: 1361
                          Poisson Regression Results                          
Dep. Variable:      Operations Skills   No. Observations:               212822
Model:                        Poisson   Df Residuals:                   212488
Method:                           MLE   Df Model:                          333
Date:                Tue, 18 May 2021   Pseudo R-squ.:                  0.1951
Time:                        14:45:20   Log-Likelihood:            -1.6415e+05
converged:                       True   LL-Null:                   -2.0394e+05
Covariance Type:              cluster   LLR p-value:                     0.000
                                                   coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------

Optimization terminated successfully.
         Current function value: 2.436244
         Iterations: 712
         Function evaluations: 1227
         Gradient evaluations: 1227
                          Poisson Regression Results                          
Dep. Variable:         Support Skills   No. Observations:               212822
Model:                        Poisson   Df Residuals:                   212488
Method:                           MLE   Df Model:                          333
Date:                Tue, 18 May 2021   Pseudo R-squ.:                 0.05142
Time:                        14:50:20   Log-Likelihood:            -5.1849e+05
converged:                       True   LL-Null:                   -5.4659e+05
Covariance Type:              cluster   LLR p-value:                     0.000
                                                   coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------

Optimization terminated successfully.
         Current function value: 1.766811
         Iterations: 701
         Function evaluations: 1216
         Gradient evaluations: 1216
                          Poisson Regression Results                          
Dep. Variable:         General Skills   No. Observations:               212822
Model:                        Poisson   Df Residuals:                   212488
Method:                           MLE   Df Model:                          333
Date:                Tue, 18 May 2021   Pseudo R-squ.:                 0.07533
Time:                        14:55:13   Log-Likelihood:            -3.7602e+05
converged:                       True   LL-Null:                   -4.0665e+05
Covariance Type:              cluster   LLR p-value:                     0.000
                                                   coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------

Optimization terminated successfully.
         Current function value: 1.713339
         Iterations: 837
         Function evaluations: 1456
         Gradient evaluations: 1456
                          Poisson Regression Results                          
Dep. Variable:         Job Complexity   No. Observations:               212822
Model:                        Poisson   Df Residuals:                   212488
Method:                           MLE   Df Model:                          333
Date:                Tue, 18 May 2021   Pseudo R-squ.:                 0.08092
Time:                        15:00:54   Log-Likelihood:            -3.6464e+05
converged:                       True   LL-Null:                   -3.9674e+05
Covariance Type:              cluster   LLR p-value:                     0.000
                                                   coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------

In [9]:
mydf = pandas.DataFrame(SC,columns=['Measures','Occupations','Period','Semi-elasticity','p-value'])
mydf = mydf.set_index(['Measures','Occupations','Period'])
mydf = mydf.unstack('Measures').swaplevel(0,1,axis=1).reindex(['Manager','Engineer','Technician','Operator'],axis=0,level=0).reindex(['Engineering Skills','Operations Skills','Support Skills','General Skills','Job Complexity'],axis=1,level=0)
mydf = mydf.applymap('{:.2f}'.format) 
mydf

Unnamed: 0_level_0,Measures,Engineering Skills,Engineering Skills,Operations Skills,Operations Skills,Support Skills,Support Skills,General Skills,General Skills,Job Complexity,Job Complexity
Unnamed: 0_level_1,Unnamed: 1_level_1,Semi-elasticity,p-value,Semi-elasticity,p-value,Semi-elasticity,p-value,Semi-elasticity,p-value,Semi-elasticity,p-value
Occupations,Period,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
Manager,2014-2016,2.68,0.0,0.25,0.4,-0.04,0.34,-0.02,0.72,0.19,0.0
Manager,2017-2019,1.04,0.0,0.06,0.73,-0.08,0.0,0.04,0.37,-0.01,0.79
Engineer,2014-2016,0.89,0.0,1.28,0.0,0.03,0.54,0.09,0.08,0.23,0.0
Engineer,2017-2019,0.78,0.0,1.06,0.0,-0.04,0.19,0.12,0.0,0.21,0.0
Technician,2014-2016,0.13,0.36,0.77,0.04,-0.06,0.26,0.23,0.05,0.13,0.38
Technician,2017-2019,0.43,0.05,0.33,0.11,-0.01,0.88,0.35,0.02,0.5,0.0
Operator,2014-2016,1.6,0.0,0.17,0.48,-0.0,0.95,0.38,0.01,0.56,0.01
Operator,2017-2019,0.98,0.01,-0.23,0.02,0.1,0.03,0.45,0.0,0.61,0.0


In [10]:
mydf.to_excel('Table7.xlsx')