<h1>Import Packages</h1>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
import os

plt.style.use('fivethirtyeight')
sns.set()
plt.rcParams['figure.figsize'] = (12,12)

In [2]:
GRAPH = '../result/Graphs'
TABLE = '../result/Tables'
DATA = '../data'
VERSION = 10 #1: Merged All 2:Cleaned
# os.listdir(DATA)

<h1>Load Pre-processed Data</h1>

In [3]:
df = pd.read_csv(os.path.join(DATA,'Dataset_v'+str(VERSION) + '.csv'))
df['JobTitle'].nunique()

14307

<h1>Employment Type</h1>



In [7]:
emp_type_df = pd.DataFrame(df.groupby(['JobType'])['TotalVacancy'].sum()/df['TotalVacancy'].sum())
emp_type_df.rename(columns={'TotalVacancy':'mean'},inplace=True)
# emp_type_df['mean'] = df.groupby(['JobType'])['TotalVacancy'].mean()
emp_type_df['std'] = df.groupby(['JobType'])['TotalVacancy'].std()
emp_type_df['N'] = df.groupby(['JobType'])['TotalVacancy'].count()
emp_type_df.to_csv(os.path.join(TABLE,'emp_type.csv'),float_format='%.3f')
emp_type_df

Unnamed: 0_level_0,mean,std,N
JobType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Contract,0.042896,10.944079,1003
Freelance,0.008163,132.125481,14
FullTime,0.927318,7.126507,30173
Intern,0.011,16.221511,161
PartTime,0.010609,11.517815,174


<h1>Position Level </h1>

In [20]:
pos_level_df = pd.DataFrame(df.groupby(['JobLevel'])['TotalVacancy'].sum()/df['TotalVacancy'].sum())
pos_level_df.rename(columns={'TotalVacancy':'mean'},inplace=True)
# pos_level_df['mean'] = df.groupby(['JobLevel'])['TotalVacancy'].mean()
pos_level_df['std'] = df.groupby(['JobLevel'])['TotalVacancy'].std()
pos_level_df['N'] = df.groupby(['JobLevel'])['TotalVacancy'].count()
pos_level_df.to_csv(os.path.join(TABLE,'pos_level.csv'),float_format='%.3f')
pos_level_df

Unnamed: 0_level_0,mean,std,N
JobLevel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Entry,0.431505,12.079188,11185
"Entry, Mid",0.104703,6.595816,2711
"Entry, Mid, Top",0.021357,19.338263,230
"Entry, Top",0.000224,3.962323,5
Mid,0.364666,2.669774,14832
"Mid, Top",0.049633,2.38842,1398
Top,0.027913,2.554865,1165


<h1>Gender</h1>

In [21]:
df['Gender'] = df['MaleGender'] + df['FemaleGender']
df['Gender'] = df['Gender'].apply(lambda x : 'Both male and female' if x == 2 else x)
df['Gender'] = df['Gender'].apply(lambda x : 'Not Specified' if x == 0 else x)
for i in range(len(df)):
    if df.loc[i,'Gender'] == 1:
        if df.loc[i,'MaleGender'] == 1:
            df.loc[i,'Gender'] = 'Male only'
        else:
            df.loc[i,'Gender'] = 'Female only'
            
df['Gender'].value_counts()

Both male and female    18116
Not Specified            8608
Male only                4562
Female only               240
Name: Gender, dtype: int64

In [22]:
gender_df = pd.DataFrame(df.groupby(['Gender'])['TotalVacancy'].sum()/df['TotalVacancy'].sum())
gender_df['N'] = df.groupby(['Gender'])['TotalVacancy'].count()
gender_df.to_csv(os.path.join(TABLE,'gender.csv'),float_format='%.3f')

<h1>Industry </h1>

In [23]:
top_industry = df['IndustryName'].value_counts().index[:9].tolist()
df['Industry'] = df['IndustryName'].apply(lambda x : x if x in top_industry else 'Others')

In [25]:
industry_df = pd.DataFrame(df.groupby(['Industry'])['TotalVacancy'].sum()/df['TotalVacancy'].sum())
industry_df.rename(columns={'TotalVacancy':'mean'},inplace=True)
# industry_df['mean'] = df.groupby(['Industry'])['TotalVacancy'].count()
# industry_df['std'] = df.groupby(['Industry'])['TotalVacancy'].std()
industry_df['N'] = df.groupby(['Industry'])['TotalVacancy'].count()
industry_df['mean']
industry_df.to_csv(os.path.join(TABLE,'industry.csv'),float_format='%.3f')
industry_df

Unnamed: 0_level_0,mean,N
Industry,Unnamed: 1_level_1,Unnamed: 2_level_1
Bank/ Non-Bank Fin. Institution,0.00759,827
Education,0.025341,1089
Garments/ Textile,0.025844,1549
Govt./ Semi-Govt./ Autonomous,0.001286,731
Information Technology (IT),0.706241,17989
Manufacturing (Heavy Industry),0.011643,864
NGO/Development,0.014998,1060
Others,0.147907,5895
Telecommunication,0.059152,1522


<h1>Location</h1>

In [26]:
df['Location_dhaka'] = df['City'].apply(lambda x : 'Dhaka' if x in ['Dhaka'] else None)
df['Location_bd'] = df['Country'].apply(lambda x : 'Dhaka or anywhere in Bangladesh' if x =='Bangladesh' else 'Outside Bangladesh')

In [27]:
location_df = pd.DataFrame(columns=['Location','Mean','N'])
location_df.loc[0] = ('Dhaka City',df[df['Location_dhaka'] == 'Dhaka']['TotalVacancy'].sum()/df['TotalVacancy'].sum(),
                     df[df['Location_dhaka'] == 'Dhaka']['TotalVacancy'].count())
location_df.loc[1] = ('Dhaka City or anywhere in Bangladesh',df[df['Location_bd'] == 'Dhaka or anywhere in Bangladesh']['TotalVacancy'].sum()/df['TotalVacancy'].sum(),
                     df[df['Location_bd'] == 'Dhaka or anywhere in Bangladesh']['TotalVacancy'].count())
location_df.loc[2] = ('Outside Bangladesh',df[df['Location_bd'] == 'Outside Bangladesh']['TotalVacancy'].sum()/df['TotalVacancy'].sum(),
                     df[df['Location_bd'] == 'Outside Bangladesh']['TotalVacancy'].count())
location_df.to_csv(os.path.join(TABLE,'location.csv'),float_format='%.3f')
location_df

Unnamed: 0,Location,Mean,N
0,Dhaka City,0.461318,16351
1,Dhaka City or anywhere in Bangladesh,0.976434,31174
2,Outside Bangladesh,0.023566,352
