Author: Jianji Chen

email: jianjichen001@gmail.com

Please feel free to reach out with any questions or comments. Thank you!

Note: If you would like to replicate the results with the following codes, please make sure that you get access to the SHARE datasets.

Tthe dataset used here is publicly accessible via application on the official website of the SHARE: https://share-eric.eu/data/data-access

The dataset used here is:

SHARE-ERIC (2024). SHARE Job Episodes Panel. Release version: 9.0.0. SHARE-ERIC. Data set. DOI: 10.6103/SHARE.jep.900

In [None]:
import pandas as pd
import numpy as np

In [2]:
job_his = pd.read_stata('sharewX_rel9-0-0_gv_job_episodes_panel_stata/sharewX_rel9-0-0_gv_job_episodes_panel.dta')

In [3]:
job_history_vars = ["mergeid", "jep_w", "gender", "yrbirth", "age", "year", "country",
                    "situation", "working", "in_education", "retired", "industry", "isco",
                    "job_title", "working_hours", "nchildren", "withpartner", "married",
                    "country_res_"]

In [4]:
job_history = job_his[job_history_vars]

In [5]:
job_history = job_history.rename(
    columns = {"jep_w": "jobinfo_wave",
               "yrbirth": "birth_year",
               "nchildren": "number_children",
               "withpartner": "live_with_partner",
               "country_res_": "country_residence"
    })

In [6]:
job_history["job_situation_general"] = np.select(
    condlist = [
        # 1. Retired
        np.logical_or(
            job_history["retired"] == 1,
            job_history["situation"] == "Retired from work"
        ),

        # 2. Working
        np.logical_or(
            job_history["working"] == 1,
            job_history["situation"] == "Employee or self-employed"
        ),

        # 3. In education
        np.logical_or(
            job_history["in_education"] == 1,
            job_history["situation"] == "In education"
        ),

        # 4. Military services
        job_history["situation"] == "Military services, war prisoner or equivalent",

        # 5. Not working
        job_history["working"] == 0
    ],

    choicelist = [
        "Retired",
        "Working",
        "In education",
        "Military services",
        "Not working"
    ],
    
    default = ""
)

job_history["job_situation_general"] = job_history["job_situation_general"].replace('', np.nan, regex = True)

In [7]:
job_history["job_situation_general"].value_counts(dropna = False)

job_situation_general
Working              2971683
Not working          1246000
In education         1147739
Retired               784505
Military services      14765
Name: count, dtype: int64

In [8]:
job_history["job_situation"] = np.select(
    condlist = [
        # 1. Retired
        np.logical_or(
            job_history["retired"] == 1,
            job_history["situation"] == "Retired from work"
        ),

        # 2. Part-time / unstable work
        np.logical_or(
            np.logical_and(
                np.logical_or(
                    job_history["working"] == 1,
                    job_history["situation"] == "Employee or self-employed"
                ),
                job_history["working_hours"].isin([
                    "Always part-time",
                    "Changed once from full-time to part-time",
                    "Changed multiple times",
                    "Changed once from part-time to full-time"
                ])
            ),
            job_history["situation"] == "Short term job (less than 6 months)"
        ),

        # 3. Stable full-time work
        np.logical_and(
            np.logical_or(
                job_history["working"] == 1,
                job_history["situation"] == "Employee or self-employed"
            ),
            job_history["working_hours"] == "Always full-time"
        ),

        # 4. In education
        np.logical_or(
            job_history["in_education"] == 1,
            job_history["situation"] == "In education"
        ),

        # 5. Military services
        job_history["situation"] == "Military services, war prisoner or equivalent",

        # 6. Not working
        job_history["working"] == 0
    ],

    choicelist = [
        "Retired",
        "Part-time/unstable work",
        "Stable full-time work",
        "In education",
        "Military services",
        "Not working"
    ],

    default = ""
)

job_history["job_situation"] = job_history["job_situation"].replace('', np.nan, regex = True)

In [9]:
job_history["job_situation"].value_counts(dropna = False)

job_situation
Stable full-time work      2663054
Not working                1254447
In education               1148037
Retired                     784505
Part-time/unstable work     244201
NaN                          55683
Military services            14765
Name: count, dtype: int64

In [10]:
print(list(job_history.country.unique()))
## There are no missing values here

['Austria', 'Bulgaria', 'Belgium', 'Cyprus', 'Czech Republic', 'Switzerland', 'Germany', 'Denmark', 'Estonia', 'Spain', 'France', 'Finland', 'Greece', 'Croatia', 'Hungary', 'Ireland', 'Italy', 'Israel', 'Lithuania', 'Luxembourg', 'Latvia', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Sweden', 'Slovenia', 'Slovakia']


In [11]:
job_history["isco"].value_counts(dropna = False)

isco
NaN              3301575
Not yet coded     393929
4110.0             97331
5223.0             64387
2341.0             44645
                  ...   
3130.0                 1
8130.0                 1
3520.0                 1
8412.0                 1
9133.0                 1
Name: count, Length: 653, dtype: int64

In [12]:
job_history["occupation_level"] = np.select(
    condlist = [
        job_history["job_situation_general"] == "Retired",
        job_history["job_situation_general"] == "In education",
        np.logical_or(job_history["job_situation_general"] == "Military services",
                      job_history["isco"].astype(str).str.startswith("0")),
        job_history["job_situation_general"] == "Not working",
        job_history["isco"].astype(str).str.startswith(("1")),
        job_history["isco"].astype(str).str.startswith(("2")),
        job_history["isco"].astype(str).str.startswith(("3", "4", "5", "6", "7", "8", "9"))
    ],
    choicelist = ["Retired", "In education", "Military services", "Not working",
                  "Manager", "Professional", "Other levels"],
    default = ""
)

job_history["occupation_level"] = job_history["occupation_level"].replace('', np.nan, regex = True)

In [13]:
job_history["occupation_level"].value_counts(dropna = False)

occupation_level
Other levels         1844969
Not working          1246000
In education         1147739
Retired               784505
NaN                   591380
Professional          412309
Manager               123025
Military services      14765
Name: count, dtype: int64

In [13]:
job_history["occupation_level_merge"] = job_history["occupation_level"].case_when(
    [
        ((job_history["occupation_level"] == "Manager") | (job_history["occupation_level"] == "Professional"), "Manager/professional")
    ]
)

In [14]:
job_history = job_history[["mergeid", "jobinfo_wave", "year", "age",
                           "gender", "birth_year", "country_residence",
                           "job_situation_general", "job_situation",
                           "occupation_level", "occupation_level_merge"]]

In [15]:
job_history_f21t60 = job_history.loc[(job_history["age"] > 20) & (job_history["age"] < 61), :]

In [16]:
job_history_f21t60.occupation_level.value_counts(dropna = False)

occupation_level
Other levels         1622659
Not working           644751
NaN                   519070
Professional          388287
Retired               201009
Manager               114668
In education           57432
Military services       8498
Name: count, dtype: int64

In [17]:
job_history_f21t60[["mergeid", "age"]].groupby("mergeid").count().value_counts()

age
40     68217
39      3350
38      3348
37      3289
36      2941
35      2711
34      2192
33      2072
32      1062
31       839
30       412
29       314
28       228
27       161
26       130
25        99
24        68
23        63
22        39
21        32
20        22
19        21
18        14
17        13
15        13
16         6
14         5
13         4
10         3
11         2
2          1
6          1
5          1
7          1
9          1
Name: count, dtype: int64

In [18]:
## save the data
job_history_f21t60.to_csv('data_out/job_history_f21t60.csv', index = False)  
job_history.to_csv('data_out/job_history.csv', index = False)