# This notebook takes the data and conducts the Automation Shock

In [23]:
# Required packages (check which are required)
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
import networkx as nx
import pandas as pd
import numpy as np
import scipy as sp
import datetime as dt
import community
from shapely.geometry import Polygon

import DDOM

import random
import math

import cmocean as cmo

In [24]:
# Read the data from data_processing.ipynb
sa_calibration_data = pd.read_csv('../Data_Labour/calibration_data.csv')
employment_SSYK = pd.read_csv('../Data_Labour/occupational_employment.csv', sep = ',')
SSYK_shock = pd.read_csv('../Data_Labour/occupation_shock.csv', sep = ',', index_col = 0)
hours_data = pd.read_csv('../Data_Labour/hours_data.csv', sep = ',', index_col = 0)

G = nx.read_graphml('../Data_Labour/Occ_mob_sweden.graphml')

In [47]:
employment = employment_SSYK[['SSYK', '2018']]
employment = {str(employment['SSYK'].iloc[i]):employment['2018'].iloc[i] for i in range(len(employment))}
node_names = G.nodes()

# setup network
employed = {str(name):e for name,e in employment.items() if str(name) in node_names}
unemployed = {name:0 for name in node_names}
vacancies = {name:0 for name in node_names}
applications = {name:[] for name in node_names}

target_demand = {str(name):e for name,e in employment.items() if str(name) in node_names}
of_data = SSYK_shock.to_dict()['Computerisation Probability']
of_data = {str(code):prob for code, prob in of_data.items()}

attributes = {'employed':employed, 'unemployed':unemployed, 'vacancies':vacancies, 'applications':applications,
                'target_demand':target_demand, 'comp_prob':of_data}

# 0.026 delta_u: 0.011 delta_nu: 0.00042 gamma_u: 0.174 timestep: 16.6

# Parameters
delta_u = 0.011
delta_nu = 0.00042
gamma_u = 0.174
gamma_nu = gamma_u
timestep = 16.6
period = 10.25
shock_period = 30
k = 0.79
avg_hours_0 = hours_data.loc[2018,'average_hours/year']
t_0 = 15


output = deterministic_simulation_shock(G, delta_u, delta_nu, gamma_u, gamma_nu, timestep, period, shock_period, k, avg_hours_0, t_0, attributes)

Simulation started at:  2020-05-12 22:15:00.469563


RecursionError: maximum recursion depth exceeded in comparison

In [46]:
def long_term_u(u, f_i, omega, k, t):
    '''
    Function that calculates the long term unemployment of each occupation
    u and f_i should be timeseries
    '''

    if k == 0 and t == 0:
        return omega
    elif k == 0:
        return long_term_u(u, f_i, omega, 0, t - 1)*(1 - f_i[t][i]/u[t][i])
    elif t == 0:
        return long_term_u(u, f_i, omega, k - 1, 0)*(1 - f_i[t][i]/u[t][i])
    else:
        return long_term_u(u, f_i, omega, k - 1, t - 1)*(1 - f_i[t][i]/u[t][i])


def deterministic_simulation_shock(G, delta_u, delta_nu, gamma_u, gamma_nu, timestep, period, shock_period, k, avg_hours_0, t_0, attributes):
    for key, value in attributes.items():
        nx.set_node_attributes(G, value, str(key))
    
    pre_steps = int(period*52/timestep) # Steps before the automation shock
    shock_steps = int(shock_period*52/timestep) # Steps during the automation shock
    post_steps = int(period*52/timestep) # Steps after the automation shock
    timesteps = pre_steps + shock_steps + post_steps

    shock_mid = int(t_0*52/timestep) + pre_steps

    vacancies = nx.get_node_attributes(G, 'vacancies')
    employed = nx.get_node_attributes(G, 'employed')

    e_0 = {key:val for key, val in employed.items()}

    demand_0 = {occ:vacancies[occ] + employed[occ] for occ in vacancies.keys()} 

    vac_data = []
    emp_data = []
    unemp_data = []
    lt_unemp_data = []
    td_data = []
    f_i_data = []

    # Variables to calculate the post shock demand
    comp_prob = nx.get_node_attributes(G, 'comp_prob')

    average_hours_worked_0 = avg_hours_0

    L = sum(demand_0.values())
    final_hours_worked = {occ : average_hours_worked_0*employed[occ]*(1-prob) for occ, prob in comp_prob.items()}

    final_average_hours_worked = sum(final_hours_worked.values())/L

    # Post shock demand
    final_demand = {occupation:round(hours/final_average_hours_worked) for occupation, hours in final_hours_worked.items()}

    occupations = G.nodes()
    time = dt.datetime.now()
    print('Simulation started at: ', time)
    for t in range(timesteps):
        nu = nx.get_node_attributes(G, 'vacancies')
        u = nx.get_node_attributes(G, 'unemployed')
        lt_u = nx.get_node_attributes(G, 'lt_unemployed')
        e = nx.get_node_attributes(G, 'employed')
        A = nx.get_edge_attributes(G, 'weight')

        s = {}
        f = {}
        for j in occupations:
            s[j] = []
            for i in G.predecessors(j):
                nu_A_sum = np.sum([nu[k]*A[(i,k)] for k in G.neighbors(i)])
                if nu_A_sum == 0:
                    s[j].append(0)
                else:
                    s[j].append(u[i]*nu[j]*A[(i,j)]/nu_A_sum)

            s[j] = sum(s[j])
            for i in G.predecessors(j):
                nu_A_sum = np.sum([nu[k]*A[(i,k)] for k in G.neighbors(i)])
                if s[j]*nu_A_sum == 0:
                    f[(i,j)] = 0
                else:
                    f[(i,j)] = u[i]*(nu[j]**(2))*A[(i,j)]*(1 - math.exp(-s[j]/nu[j]))/(s[j]*nu_A_sum)

        new_e = {}
        new_u = {}
        new_nu = {}
        new_f_i = {}
        new_lt_u = {}
        
        target_demand = nx.get_node_attributes(G, 'target_demand')
        current_demand = {}
        for i in occupations:
            # Set the current demand of the occupation
            current_demand[i] = nu[i] + e[i]
            demand_diff = round(np.max([0, current_demand[i] - target_demand[i]]))

            # Calculate the inflow of employees to the occupation 
            # saved since timeseries is required to calculate long term unemployed
            new_f_i[i] = round(np.sum([f[(j,i)] for j in G.predecessors(i)]))

            # Calculate new amount of employees
            new_e[i] = round(e[i] - delta_u*e[i] - (1 - delta_u)*gamma_u*demand_diff + new_f_i[i])

            # Calculate outflow of unemployed workers
            f_j = round(np.sum([f[(i,j)] for j in G.successors(i)]))

            # Calculate new amount of unemployed workers
            new_u[i] = round(u[i] + delta_u*e[i] + (1 - delta_u)*gamma_u*demand_diff - f_j)

            # Used to calculate long term unemployment
            omega = e_0[i] * (delta_u + (1 - delta_u)*gamma_u*demand_diff)/e[i]

            # Calculate new vacancies
            demand_diff = round(np.max([0, target_demand[i]-current_demand[i]]))
            new_nu[i] = round(nu[i] + delta_nu*e[i] + (1-delta_nu)*gamma_nu*demand_diff - new_f_i[i])

            # Calculate long term unemployment
            if t == 0:
                continue
            k = int(27/timestep) # maybe modify to Sweden's definition
            status = True
            temp_sum = 0
            while status == True:
                lt_u_k = long_term_u(unemp_data, f_i_data, omega, k, t-1)
                if lt_u_k <= 1:
                    status = False
                k +=1
                temp_sum += lt_u_k
            new_lt_u[i] = temp_sum

        nx.set_node_attributes(G, new_nu, 'vacancies')
        nx.set_node_attributes(G, new_e, 'employed')
        nx.set_node_attributes(G, new_u, 'unemployed')
        nx.set_node_attributes(G, new_lt_u, 'lt_unemployed')
        nx.set_node_attributes(G, new_f_i, 'f_i')

        vac_data.append(new_nu)
        unemp_data.append(new_e)
        emp_data.append(new_u)
        lt_unemp_data.append(new_lt_u)
        f_i_data.append(new_f_i)

        td_data.append(target_demand)
        
        if pre_steps < t and t < shock_steps + pre_steps:
            DDOM.shock(G, demand_0, final_demand, t*timestep/52, shock_mid*timestep/52, k)

   
    
    time = dt.datetime.now()- time
    print('Simulation took: ', time)
    
    return {'vacancy_data': vac_data, 'unemployment_data': unemp_data, 'employment_data': emp_data, 'target_demand_data': td_data, 'lt_unemp_data': lt_unemp_data}

In [8]:
vac_data = pd.DataFrame(output['vacancy_data'])
u_data = pd.DataFrame(output['unemployment_data'])
e_data = pd.DataFrame(output['employment_data'])
td_data = pd.DataFrame(output['target_demand_data'])

td_data.to_csv('../Data_Labour/td_simulation.csv', sep = ',', index = False)
vac_data.to_csv('../Data_Labour/vac_simulation.csv', sep = ',', index = False)
e_data.to_csv('../Data_Labour/emp_simulation.csv', sep = ',', index = False)
u_data.to_csv('../Data_Labour/unemp_simulation.csv', sep = ',', index = False)