In [2]:
import pandas
import numpy
from scipy import stats

## Hybrid sample

In [3]:
filepath = 'D:/BG/Data/Processing/4_data.txt'
data = pandas.read_csv(filepath, sep='\t')
print(data.shape[0],'job postings in full sample')
hybrid = data[data.plant_5==0]
print(hybrid.shape[0],'job postings in hybrid sample')

3091485 job postings in full sample
212822 job postings in hybrid sample


## Measures

In [6]:
mydf = hybrid.copy()
mydf['Engineering Skills'] = mydf['research']+mydf['design']+mydf['materials']+mydf['development']
mydf['Operations Skills'] = mydf['tools']+mydf['inventory']+mydf['production']
mydf['Support Skills'] = mydf['business']+mydf['finance']+mydf['management']+mydf['analysis']+mydf['customer']+mydf['office']+mydf['software']
mydf['General Skills'] = mydf['cognitive']+mydf['social']
mydf['Job Complexity'] = mydf['complexity']

measures = ['Engineering Skills','Operations Skills','Support Skills','General Skills','Job Complexity']

## Statistics by occupation

In [7]:
occupations = ['Manager','Engineer','Technician','Operator']
df = mydf[['plant','TECH','occupation'] + measures]
mymean = df.groupby(by=['plant','occupation','TECH']).mean().reset_index()
t = []
for m in measures:
  for o in occupations:
    t.append([m,o,stats.ttest_ind(mymean.loc[(mymean.TECH=='AM') & (mymean.occupation==o),m],mymean.loc[(mymean.TECH=='TM') & (mymean.occupation==o),m],equal_var=False)[1]])
tdf = pandas.DataFrame(t,columns=['measures','occupations','p-val'])
tdf.set_index(['measures','occupations'],inplace=True)
tdf = tdf.applymap('{:.2f}'.format) 
mean_mymean = mymean.groupby(by=['occupation','TECH']).mean().unstack(level=0).T
mean_mymean.index.names = ['measures','occupations']
mean_mymean = mean_mymean.applymap('{:.2f}'.format)
table2 = pandas.merge(left=mean_mymean,right=tdf,how='inner',left_on=['measures','occupations'],right_on=['measures','occupations'])
table2['statistics'] = 'Mean'
table2.set_index('statistics',append=True,inplace=True)

mysd = df.groupby(by=['plant','occupation','TECH']).std(ddof=0).reset_index()
mean_mysd = mysd.groupby(by=['occupation','TECH']).mean().unstack(level=0).T
mean_mysd.index.names = ['measures','occupations']
mean_mysd = mean_mysd.applymap('{:.2f}'.format)
mean_mysd = mean_mysd.applymap(lambda x: '(' + x + ')')
mean_mysd['statistics'] = 'SD'
mean_mysd.set_index('statistics',append=True,inplace=True)
mean_mysd['p-val'] = numpy.nan
table2 = pandas.concat([table2,mean_mysd],axis=0).sort_index(kind='merge').reindex(measures,axis=0,level='measures').unstack('occupations').swaplevel(0,1,axis=1).reindex(occupations,axis=1,level=0)
table2.rename_axis([None,None],axis=1,inplace=True)

postings = mydf.groupby(['occupation','TECH']).nunique()[['BGTJobId']].reindex(occupations, level='occupation').T
postings.rename_axis([None,None],axis=1,inplace=True)
postings.index = pandas.MultiIndex.from_product([['Number of job postings'],['']])
plants = mydf.groupby(['occupation','TECH']).nunique()[['plant']].reindex(occupations, level='occupation').T
plants.rename_axis([None,None],axis=1,inplace=True)
plants.index = pandas.MultiIndex.from_product([['Number of plants'],['']])
table2 = pandas.concat([table2,postings,plants],axis=0).reindex(occupations,axis=1,level=0)
table2

Unnamed: 0_level_0,Unnamed: 1_level_0,Manager,Manager,Manager,Engineer,Engineer,Engineer,Technician,Technician,Technician,Operator,Operator,Operator
Unnamed: 0_level_1,Unnamed: 1_level_1,AM,TM,p-val,AM,TM,p-val,AM,TM,p-val,AM,TM,p-val
measures,statistics,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
Engineering Skills,Mean,1.15,0.51,0.0,2.74,1.56,0.0,1.37,0.87,0.0,0.87,0.47,0.0
Engineering Skills,SD,(0.47),(0.77),,(0.95),(1.27),,(0.37),(0.81),,(0.23),(0.61),
Operations Skills,Mean,0.43,0.29,0.03,0.60,0.31,0.0,0.96,0.76,0.09,0.95,1.12,0.2
Operations Skills,SD,(0.16),(0.54),,(0.38),(0.49),,(0.36),(0.81),,(0.37),(1.11),
Support Skills,Mean,6.19,6.71,0.01,6.05,6.11,0.74,4.44,4.66,0.31,4.57,4.50,0.67
Support Skills,SD,(1.03),(2.67),,(1.50),(2.71),,(0.68),(1.99),,(0.61),(2.01),
General Skills,Mean,2.68,2.49,0.12,2.58,2.32,0.0,2.19,1.52,0.0,1.92,1.45,0.0
General Skills,SD,(0.63),(1.49),,(0.84),(1.31),,(0.40),(1.01),,(0.43),(1.05),
Job Complexity,Mean,2.71,2.43,0.03,3.30,2.69,0.0,1.79,1.41,0.02,1.54,1.07,0.0
Job Complexity,SD,(0.57),(1.34),,(0.90),(1.37),,(0.40),(1.04),,(0.37),(0.92),


In [8]:
table2.to_excel('Table3.xlsx')