In [2]:
from IPython.display import display, HTML
display(HTML("<style>.container {width:85%;}</style>"))

In [5]:
import numpy as np
import pandas as pd
import warnings
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.discrete.discrete_model import MultinomialModel

pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)
pd.set_option('display.float_format', lambda x:'{:.4f}'.format(x))
warnings.filterwarnings("ignore")

## Data

In [21]:
path = "/Users/geofrey.wanyama/Downloads/Microeconometrics1/Data sets/piaac9Feb2017Stata12.dta"
edcat7 = {
    'Tertiaryâ\x80\x93master/research degree': 'TertMA',
     'Upper secondary': 'UpSec',
     'Post-secondary, non-tertiary': 'PostSec',
     'Lower secondary or less': 'LowSec',
     'Tertiary-professional degree': 'TertProf',
     'Tertiary â\x80\x93 bachelor degree': 'TertBA'   
}
workreq = {
    'This level necessary': 'level_necessary',
    'Lower level sufficient': 'lower_sufficient',
    'Higher level needed': 'Higher_needed' 
}

df = (
    pd.read_stata(path)
    .query("ageg5lfs != '16-19' and country in ['fin', 'est']")
    .filter(["chatatwork", "ageg5lfs", "male", "edcat7", "immig", "indep", "workreq", "geo"])
    .dropna()
    .assign(
        ageg5lfs = lambda X: pd.Categorical(X["ageg5lfs"]).remove_unused_categories(),
        geo = lambda X: pd.Categorical(X["geo"]).remove_unused_categories(),
        edcat7 = lambda X: X["edcat7"].map(edcat7),
        workreq = lambda X: X["workreq"].map(workreq),
        male = lambda X: X["male"].astype(int).astype("category"),
        immig = lambda X: X["immig"].astype(int).astype("category")
    )
    .assign(ageg5lfs = lambda X: X["ageg5lfs"].apply(lambda x: x.replace("-", "_")))
)

data = pd.concat([df[["workreq"]+["male", "immig"]],
                  pd.get_dummies(df[["ageg5lfs", "edcat7", "geo"]], drop_first=True)],
                 axis=1
)

data.head(3)

Unnamed: 0,workreq,male,immig,ageg5lfs_25_29,ageg5lfs_30_34,ageg5lfs_35_39,ageg5lfs_40_44,ageg5lfs_45_49,ageg5lfs_50_54,ageg5lfs_55_59,ageg5lfs_60_65,edcat7_UpSec,edcat7_PostSec,edcat7_TertProf,edcat7_TertBA,edcat7_TertMA,geo_fin
57418,level_necessary,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0
57422,level_necessary,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0
57423,lower_sufficient,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0


## Multinominal logit model

In [23]:
y = data.workreq
X = data.drop("workreq", axis=1)
X = sm.add_constant(X)

In [24]:
model = sm.MNLogit(y, X)
result = model.fit()
print(result.summary())

Optimization terminated successfully.
         Current function value: 0.782541
         Iterations 6
                          MNLogit Regression Results                          
Dep. Variable:                workreq   No. Observations:                 5204
Model:                        MNLogit   Df Residuals:                     5170
Method:                           MLE   Df Model:                           32
Date:                Wed, 06 Sep 2023   Pseudo R-squ.:                 0.02583
Time:                        22:38:01   Log-Likelihood:                -4072.3
converged:                       True   LL-Null:                       -4180.3
Covariance Type:            nonrobust   LLR p-value:                 3.517e-29
workreq=lower_sufficient       coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------
const                       -1.1396      0.203     -5.616      0.000      -1.537