# This notebook takes the data and conducts the Automation Shock

In [34]:
# Required packages (check which are required)
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import networkx as nx
import pandas as pd
import numpy as np
import scipy as sp
import datetime as dt
import community
from shapely.geometry import Polygon


import importlib
import ddom

import random
import math

import cmocean as cmo

In [32]:
# Read the data from data_processing.ipynb
sa_calibration_data = pd.read_csv('../Data_Labour/calibration_data.csv')
employment_SSYK = pd.read_csv('../Data_Labour/occupational_employment.csv', sep = ',')
SSYK_shock = pd.read_csv('../Data_Labour/occupation_shock.csv', sep = ',', index_col = 0)
hours_data = pd.read_csv('../Data_Labour/hours_data.csv', sep = ',', index_col = 0)

G = nx.read_graphml('../Data_Labour/Occ_mob_sweden.graphml')

In [39]:
importlib.reload(ddom)
employment = employment_SSYK[['SSYK', '2018']]
employment = {str(employment['SSYK'].iloc[i]):employment['2018'].iloc[i] for i in range(len(employment))}
node_names = G.nodes()

# setup network
employed = {str(name):e for name,e in employment.items() if str(name) in node_names}
unemployed = {name:0 for name in node_names}
vacancies = {name:0 for name in node_names}

target_demand = {str(name):e for name,e in employment.items() if str(name) in node_names}
of_data = SSYK_shock.to_dict()['Computerisation Probability']
of_data = {str(code):prob for code, prob in of_data.items()}

attributes = {'employed':employed, 'unemployed':unemployed, 'vacancies':vacancies,
                'target_demand':target_demand, 'comp_prob':of_data}

# 0.026 delta_u: 0.011 delta_nu: 0.00042 gamma_u: 0.174 timestep: 16.6

# Parameters
a = 0.026
delta_u = 0.011
delta_nu = 0.00042
gamma_u = 0.174
gamma_nu = gamma_u
timestep = 16.6
period = 10.25
shock_period = 30
k = 0.79
avg_hours_0 = hours_data.loc[2018,'average_hours/year']
t_0 = 15


output = ddom.deterministic_simulation(G, delta_u, delta_nu, gamma_u, gamma_nu, timestep, period, shock_period, k, avg_hours_0, t_0, attributes, True, calibration = True)

EEROR: Empirical data must be input for calibration run


<module 'ddom' from '/Users/lh/MSc_Thesis/Code Labour/ddom.py'>

In [27]:
employment = employment_SSYK[['SSYK', '2018']]
employment = {str(employment['SSYK'].iloc[i]):employment['2018'].iloc[i] for i in range(len(employment))}
node_names = G.nodes()

importlib.reload(ddom)


# setup network
employed = {str(name):e for name,e in employment.items() if str(name) in node_names}
unemployed = {name:0 for name in node_names}
vacancies = {name:[] for name in node_names}

target_demand = {str(name):e for name,e in employment.items() if str(name) in node_names}
of_data = SSYK_shock.to_dict()['Computerisation Probability']
of_data = {str(code):prob for code, prob in of_data.items()}

attributes = {'employed':employed, 'unemployed':unemployed, 'vacancies':vacancies,
                'target_demand':target_demand, 'comp_prob':of_data}

# 0.026 delta_u: 0.011 delta_nu: 0.00042 gamma_u: 0.174 timestep: 16.6

# Parameters
delta_u = 0.011
delta_nu = 0.00042
gamma_u = 0.174
gamma_nu = gamma_u
timestep = 16.6
period = 10.25
shock_period = 30
k = 0.79
avg_hours_0 = hours_data.loc[2018,'average_hours/year']
t_0 = 15

vac_datalist = []
u_datalist = []
e_datalist = []
td_datalist = []

for _ in range(2):
    output_abm = ddom.simulation(G, delta_u, delta_nu, gamma_u, gamma_nu, timestep, period, shock_period, k, avg_hours_0, t_0, attributes, False)
    vac_datalist.append(pd.DataFrame(output_abm['vacancy_data']))
    u_datalist.append(pd.DataFrame(output_abm['unemployment_data']))
    e_datalist.append(pd.DataFrame(output_abm['employment_data']))
    td_datalist.append(pd.DataFrame(output_abm['target_demand_data']))

Simulation started at:  2020-05-13 11:13:44.335373
Simulation took:  1:51:06.130154


In [30]:
vac_data = pd.DataFrame(output_abm['vacancy_data'])
u_data = pd.DataFrame(output_abm['unemployment_data'])
e_data = pd.DataFrame(output_abm['employment_data'])
td_data = pd.DataFrame(output_abm['target_demand_data'])


td_data.to_csv('../Data_Labour/abm_td_simulation.csv', sep = ',', index = False)
vac_data.to_csv('../Data_Labour/abm_vac_simulation.csv', sep = ',', index = False)
e_data.to_csv('../Data_Labour/abm_emp_simulation.csv', sep = ',', index = False)
u_data.to_csv('../Data_Labour/abm_unemp_simulation.csv', sep = ',', index = False)


In [107]:
def long_term_u(u, f_i, omega, k, t, i):
    '''
    Function that calculates the long term unemployment of each occupation
    u and f_i should be timeseries
    '''

    if k == 0 and t == 0:
        return omega
    elif k == 0:
        return long_term_u(u, f_i, omega, 0, t - 1, i)*(1 - f_i[t][i]/u[t][i])
    elif t == 0:
        return omega # long_term_u(u, f_i, omega, k - 1, 0, i)*(1 - f_i[t][i]/u[t][i])
    else:
        return long_term_u(u, f_i, omega, k - 1, t - 1, i)*(1 - f_i[t][i]/u[t][i])


def deterministic_simulation_shock(G, delta_u, delta_nu, gamma_u, gamma_nu, timestep, period, shock_period, k, avg_hours_0, t_0, attributes, long_term = False):
    for key, value in attributes.items():
        nx.set_node_attributes(G, value, str(key))
    
    pre_steps = int(period*52/timestep) # Steps before the automation shock
    shock_steps = int(shock_period*52/timestep) # Steps during the automation shock
    post_steps = int(period*52/timestep) # Steps after the automation shock
    timesteps = pre_steps + shock_steps + post_steps

    shock_mid = int(t_0*52/timestep) + pre_steps

    vacancies = nx.get_node_attributes(G, 'vacancies')
    employed = nx.get_node_attributes(G, 'employed')

    e_start = {key:val for key, val in employed.items()}

    demand_0 = {occ:vacancies[occ] + employed[occ] for occ in vacancies.keys()} 

    vac_data = []
    emp_data = []
    unemp_data = []
    td_data = []
    if long_term == True:
        lt_unemp_data = []
        f_i_data = []

    # Variables to calculate the post shock demand
    comp_prob = nx.get_node_attributes(G, 'comp_prob')

    average_hours_worked_0 = avg_hours_0

    L = sum(demand_0.values())
    final_hours_worked = {occ : average_hours_worked_0*employed[occ]*(1-prob) for occ, prob in comp_prob.items()}

    final_average_hours_worked = sum(final_hours_worked.values())/L

    # Post shock demand
    final_demand = {occupation:round(hours/final_average_hours_worked) for occupation, hours in final_hours_worked.items()}

    occupations = G.nodes()
    time = dt.datetime.now()
    print('Simulation started at: ', time)
    for t in range(timesteps):
        nu = nx.get_node_attributes(G, 'vacancies')
        u = nx.get_node_attributes(G, 'unemployed')
        lt_u = nx.get_node_attributes(G, 'lt_unemployed')
        e = nx.get_node_attributes(G, 'employed')
        A = nx.get_edge_attributes(G, 'weight')

        s = {}
        f = {}
        for j in occupations:
            s[j] = []
            for i in G.predecessors(j):
                nu_A_sum = np.sum([nu[k]*A[(i,k)] for k in G.neighbors(i)])
                if nu_A_sum == 0:
                    s[j].append(0)
                else:
                    s[j].append(u[i]*nu[j]*A[(i,j)]/nu_A_sum)

            s[j] = sum(s[j])
            for i in G.predecessors(j):
                nu_A_sum = np.sum([nu[k]*A[(i,k)] for k in G.neighbors(i)])
                if s[j]*nu_A_sum == 0:
                    f[(i,j)] = 0
                else:
                    f[(i,j)] = u[i]*(nu[j]**(2))*A[(i,j)]*(1 - math.exp(-s[j]/nu[j]))/(s[j]*nu_A_sum)

        new_e = {}
        new_u = {}
        new_nu = {}
        if long_term == True:
            new_f_i = {}
            new_lt_u = {}
        
        target_demand = nx.get_node_attributes(G, 'target_demand')
        current_demand = {}
        for i in occupations:
            # Set the current demand of the occupation
            current_demand[i] = nu[i] + e[i]
            demand_diff = round(np.max([0, current_demand[i] - target_demand[i]]))

            # Calculate the inflow of employees to the occupation 
            # saved since timeseries is required to calculate long term unemployed
            f_i = round(np.sum([f[(j,i)] for j in G.predecessors(i)]))
            if long_term == True:
                new_f_i[i] = f_i

            # Calculate new amount of employees
            new_e[i] = round(e[i] - delta_u*e[i] - (1 - delta_u)*gamma_u*demand_diff + f_i)

            # Calculate outflow of unemployed workers
            f_j = round(np.sum([f[(i,j)] for j in G.successors(i)]))

            # Calculate new amount of unemployed workers
            new_u[i] = round(u[i] + delta_u*e[i] + (1 - delta_u)*gamma_u*demand_diff - f_j)

            # Used to calculate long term unemployment (time-consuming)
            if long_term == True:
                omega = e_start[i] * delta_u # + (1 - delta_u)*gamma_u*demand_diff/e[i])
                 # Calculate long term unemployment
                if t == 0:
                    continue
                l = int(27/timestep) # maybe modify to Sweden's definition

                temp_sum = 0
                while l <= t:
                    lt_u_k = long_term_u(unemp_data, f_i_data, omega, l, t-1, i)
                    l +=1
                    temp_sum += lt_u_k
                new_lt_u[i] = round(temp_sum)

            # Calculate new vacancies
            demand_diff = round(np.max([0, target_demand[i]-current_demand[i]]))
            new_nu[i] = round(nu[i] + delta_nu*e[i] + (1-delta_nu)*gamma_nu*demand_diff - f_i) 

           

        nx.set_node_attributes(G, new_nu, 'vacancies')
        nx.set_node_attributes(G, new_e, 'employed')
        nx.set_node_attributes(G, new_u, 'unemployed')
        if long_term == True:
            nx.set_node_attributes(G, new_lt_u, 'lt_unemployed')
            nx.set_node_attributes(G, new_f_i, 'f_i')

        vac_data.append(new_nu)
        unemp_data.append(new_u)
        emp_data.append(new_e)
        td_data.append(target_demand)
        if long_term == True:
            lt_unemp_data.append(new_lt_u)
            f_i_data.append(new_f_i)
        
            
        if pre_steps < t and t < shock_steps + pre_steps:
            DDOM.shock(G, demand_0, final_demand, t*timestep/52, shock_mid*timestep/52, k)

    
    time = dt.datetime.now()- time
    print('Simulation took: ', time)
    if long_term == True:
        return {'vacancy_data': vac_data, 'unemployment_data': unemp_data, 'employment_data': emp_data, 'target_demand_data': td_data, 'lt_unemp_data': lt_unemp_data}
    else:
        return {'vacancy_data': vac_data, 'unemployment_data': unemp_data, 'employment_data': emp_data, 'target_demand_data': td_data}

In [5]:
lt_u_data = pd.DataFrame(output['lt_unemp_data'])
lt_u_data.to_csv('../Data_Labour/lt_unemp_simulation.csv', sep = ',', index = False)

In [6]:
lt_u_data

Unnamed: 0,111,112,121,122,123,124,125,129,131,132,...,911,912,921,931,932,933,941,952,961,962
0,,,,,,,,,,,...,,,,,,,,,,
1,0.011121,0.011122,0.011122,0.011123,0.011123,0.011123,0.011123,0.011122,0.011123,0.011122,...,0.011122,0.011122,0.011124,0.011122,0.011122,0.011122,0.011122,0.011117,0.011122,0.011122
2,0.022490,0.022492,0.022492,0.022494,0.022493,0.022494,0.022493,0.022492,0.022492,0.022493,...,0.022492,0.022490,0.022494,0.022491,0.022492,0.022491,0.022492,0.022473,0.022492,0.022492
3,0.033953,0.033950,0.033948,0.033948,0.033953,0.033961,0.033952,0.033949,0.033949,0.033948,...,0.033949,0.033955,0.033983,0.033967,0.033950,0.033951,0.033950,0.034076,0.033950,0.033949
4,0.045393,0.045412,0.045404,0.045417,0.045417,0.045439,0.045411,0.045409,0.045414,0.045411,...,0.045408,0.045444,0.045520,0.045473,0.045424,0.045421,0.045408,0.045934,0.045423,0.045408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,0.161460,0.138026,0.132339,0.254814,0.349003,0.158682,0.146869,0.232855,0.123887,0.493578,...,0.401223,0.624461,2.437533,1.427892,0.848633,0.486960,2.362928,75.047284,0.653948,1.525008
153,0.159897,0.137396,0.131733,0.253610,0.347432,0.157864,0.146204,0.231796,0.123316,0.491371,...,0.399422,0.621605,2.436493,1.423004,0.845161,0.484883,2.352601,75.541016,0.651061,1.518463
154,0.158350,0.136772,0.131134,0.252402,0.345786,0.157044,0.145542,0.230747,0.122741,0.489153,...,0.397613,0.618736,2.435321,1.416785,0.841359,0.482671,2.342218,76.034748,0.648394,1.511610
155,0.156817,0.136137,0.130522,0.251189,0.344133,0.156273,0.144878,0.229694,0.122178,0.486925,...,0.395807,0.616127,2.434016,1.411809,0.837845,0.480571,2.331780,76.528480,0.645475,1.504993


In [87]:
e_data

Unnamed: 0,111,112,121,122,123,124,125,129,131,132,...,911,912,921,931,932,933,941,952,961,962
0,2995.0,21654.0,18101.0,8778.0,10744.0,4369.0,31965.0,23738.0,11395.0,11328.0,...,77711.0,7032.0,3109.0,7294.0,11167.0,9924.0,78169.0,188.0,8901.0,39282.0
1,2962.0,21416.0,17902.0,8681.0,10626.0,4321.0,31613.0,23477.0,11270.0,11203.0,...,76856.0,6955.0,3075.0,7214.0,11044.0,9815.0,77309.0,186.0,8803.0,38850.0
2,2936.0,21231.0,17748.0,8607.0,10534.0,4283.0,31340.0,23275.0,11173.0,11107.0,...,76194.0,6894.0,3047.0,7150.0,10949.0,9730.0,76643.0,184.0,8727.0,38516.0
3,2916.0,21080.0,17623.0,8545.0,10459.0,4252.0,31118.0,23110.0,11093.0,11028.0,...,75655.0,6843.0,3023.0,7096.0,10870.0,9660.0,76101.0,182.0,8664.0,38243.0
4,2898.0,20958.0,17521.0,8494.0,10399.0,4225.0,30938.0,22975.0,11028.0,10963.0,...,75217.0,6802.0,3004.0,7051.0,10806.0,9603.0,75659.0,180.0,8613.0,38021.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,4201.0,32886.0,28053.0,9795.0,10265.0,6174.0,47041.0,27762.0,18252.0,8836.0,...,69220.0,4513.0,576.0,2215.0,5473.0,7799.0,14802.0,28.0,5493.0,11205.0
153,4212.0,32886.0,28053.0,9796.0,10266.0,6176.0,47041.0,27762.0,18253.0,8836.0,...,69220.0,4513.0,575.0,2215.0,5473.0,7799.0,14801.0,28.0,5492.0,11205.0
154,4223.0,32887.0,28054.0,9797.0,10267.0,6177.0,47041.0,27762.0,18253.0,8836.0,...,69219.0,4512.0,574.0,2214.0,5472.0,7798.0,14800.0,28.0,5492.0,11204.0
155,4233.0,32887.0,28054.0,9798.0,10267.0,6179.0,47041.0,27762.0,18254.0,8836.0,...,69219.0,4512.0,573.0,2214.0,5471.0,7798.0,14800.0,28.0,5491.0,11203.0
