# This notebook takes the data and conducts the Automation Shock

In [None]:
# Read the data from data_processing.ipynb
sa_calibration_data = pd.read_csv('../Data_Labour/calibration_data.csv')
employment_SSYK = pd.read_csv('../Data_Labour/occupational_employment.csv', sep = ',')
SSYK_shock = pd.read_csv('../Data_Labour/occupation_shock.csv', sep = ',')

G = nx.read_graphml('../Data_Labour/Occ_mob_sweden.graphml')

In [None]:
def eval_func1(individual):
    while individual[1] < individual[2]:
        individual[1] += 0.001
    employment = employment_SSYK[['SSYK', '2018']]
    employment = {str(employment['SSYK'].iloc[i]):employment['2018'].iloc[i] for i in range(len(employment))}
    node_names = G.nodes()

    # setup network
    employed = {str(name):e for name,e in employment.items() if str(name) in node_names}
    unemployed = {name:0 for name in node_names}
    vacancies = {name:0 for name in node_names}
    applications = {name:[] for name in node_names}
    target_demand = {str(name):e for name,e in employment.items() if str(name) in node_names}
    of_data = SSYK_shock.groupby(by = ['ssyk3'], axis = 0).mean()
    of_data = of_data.to_dict()['Computerisation Probability']
    attributes = {'employed':employed, 'unemployed':unemployed, 'vacancies':vacancies, 'applications':applications,
                    'target_demand':target_demand, 'comp_prob':of_data}

    # Save parameters


    # Calibration data
    start = 18
    end = 59
    empirical_data = sa_calibration_data.iloc[start:end]
    parameters = {}
    
    parameters['T'] = 10.25
    # Shock parameters (not there yet)
    parameters['t_0'] = 10
    parameters['k'] = 1
    parameters['L'] = 1
    parameters['avg_hours_0'] = 1
    parameters['shock_start'] = 1
    parameters['timestep'] = 6

    calibration_output = 'evo'

    # Run simulation
    fitness = deterministic_simulation(individual, G, parameters['T'], empirical_data,
                    parameters['k'], parameters['avg_hours_0'], parameters['shock_start'],
                    attributes, calibration_output, False)
    print('a:', individual[0], 'delta_u:', individual[1], 'delta_nu:', individual[2], 'gamma_u:', individual[3], 'timestep:', individual[4], 'fitness:', fitness)

    #string = 'a: ' + str(individual[0]) + ' delta_u: ' + str(individual[1]) + ' delta_nu: ', str(individual[2]) + ' gamma_u: ' + str(individual[3]) + ' fitness: ' + str(fitness)
    
    #f = open("../Data_Labour/parameters_fitness.txt","a")
    #f.write(string + "\n")
    #f.close
    #results.append({'fitness':fitness, 'individual':individual})

    return fitness,


In [None]:
def deterministic_simulation(individual, G, period, empirical_data, k, avg_hours_0, shock_start, attributes, 
                             calibration_output = False, steady_state = False):

    #set_attributes(G, data)
    # This needs to be put into the network (used as starting point)
    # 0.010277616112863805 delta_nu: 0.0018934089531946136 gamma_u: 0.12611162194060122

    a = individual[0]
    delta_u = individual[1]
    delta_nu = individual[2]
    gamma_u = individual[3]
    timestep = individual[4]

    for key, value in attributes.items():
        nx.set_node_attributes(G, value, str(key))
    
    if steady_state == False:
        timesteps = int(period*52/timestep)*3 # Total amount of steps
        print(timesteps)
    else:
        timesteps = round(period*52/timestep)
    T_steps = int(period*52/timestep) # Steps during simulated business cycle
    
    vacancies = nx.get_node_attributes(G, 'vacancies')
    employed = nx.get_node_attributes(G, 'employed')

    demand_0 = {}

    for key in vacancies.keys():
        demand_0[key] = vacancies[key] + employed[key] 

    vac_data = []
    emp_data = []
    unemp_data = []
    td_data = []

    # Variables to calculate the post shock demand
    risk_factor = nx.get_node_attributes(G, 'comp_prob')
    average_hours_worked_0 = avg_hours_0
    L = sum(demand_0.values())
    

    final_hours_worked = {}

    for occupation in risk_factor.keys():
        final_hours_worked[occupation] = average_hours_worked_0*employed[occupation]*(1-risk_factor[occupation])

    final_average_hours_worked = sum(final_hours_worked.values())/L

    # Post shock demand
    final_demand = {occupation:hours/final_average_hours_worked for occupation, hours in final_hours_worked.items()}

    occupations = G.nodes()
    time = dt.datetime.now()
    

    print('Simulation started at: ', time)
    for t in range(timesteps):
        ny = nx.get_node_attributes(G, 'vacancies')
        u = nx.get_node_attributes(G, 'unemployed')
        e = nx.get_node_attributes(G, 'employed')
        A = nx.get_edge_attributes(G, 'weight')

        s = {}
        f = {}
        for j in occupations:
            s[j] = []
            for i in G.predecessors(j):
                ny_A_sum = sum([ny[k]*A[(i,k)] for k in G.neighbors(i)])
                if ny_A_sum == 0:
                    s[j].append(0)
                else:
                    s[j].append(u[i]*ny[j]*A[(i,j)]/ny_A_sum)

            s[j] = sum(s[j])
            for i in G.predecessors(j):
                ny_A_sum = sum([ny[k]*A[(i,k)] for k in G.neighbors(i)])
                if s[j]*ny_A_sum == 0:
                    f[(i,j)] = 0
                else:
                    f[(i,j)] = u[i]*(ny[j]**(2))*A[(i,j)]*(1 - math.exp(-s[j]/ny[j]))/(s[j]*ny_A_sum)

        new_e = {}
        new_u = {}
        new_ny = {}
        
        target_demand = nx.get_node_attributes(G, 'target_demand')
        current_demand = {}

        for i in occupations:
            current_demand[i] = ny[i] + e[i]
            demand_diff = max(0, current_demand[i] - target_demand[i])

            f_i = sum([f[(j,i)] for j in G.predecessors(i)])

            new_e[i] = e[i] - delta_u*e[i] + (1 - delta_u)*gamma_u*demand_diff + f_i

            f_j = sum([f[(i,j)] for j in G.successors(i)])

            new_u[i] = u[i] + delta_u*e[i] + (1 - delta_u)*gamma_u*demand_diff - f_j

            demand_diff = max(0, target_demand[i]-current_demand[i])

            new_ny[i] = ny[i] + delta_nu*e[i] + (1-delta_nu)*gamma_u*demand_diff - f_i

        nx.set_node_attributes(G, new_ny, 'vacancies')
        nx.set_node_attributes(G, new_e, 'employed')
        nx.set_node_attributes(G, new_u, 'unemployed')

        vac_data.append(nx.get_node_attributes(G, 'vacancies'))
        unemp_data.append(nx.get_node_attributes(G, 'unemployed'))
        emp_data.append(nx.get_node_attributes(G, 'employed'))
        td_data.append(target_demand)
        
        #if shock_start < t:
        #    DDOM.shock(G, demand_0, final_demand, t, t_0, k)

        if T_steps < t:
            DDOM.update_target_demand(G, demand_0, t, T_steps, a)
        # order should be checked and changed
        # if t > shock_start:
        #    shock(G, demand_0, final_demand, t, t_0, k)

    model_data = {'vacancies': vac_data, 'unemployment': unemp_data, 'employment':emp_data}
    # Empirical data
    e_vac_rate = empirical_data['sa_vac_rate']
    e_unemployed = empirical_data['u_trend']

    e_seq = [(u, e_vac_rate.iloc[i]) for i, u in enumerate(e_unemployed)]#
    e_u_max = max(e_unemployed)
    e_u_min = min(e_unemployed)
    e_vac_max = max(e_vac_rate)
    e_vac_min = min(e_vac_rate)

    e_u_mean = np.mean(e_unemployed)
    e_vac_mean = np.mean(e_vac_rate)

    A_e = Polygon(e_seq).buffer(0)

    # Cost is a vector of deviations from goal. Should be 0
    cost = DDOM.calibration_calculation(empirical_data, model_data, A_e, T_steps)

    cost['A_e'] = A_e.area
    if steady_state == False:
        if type(cost['cost']) != str:
            fitness = cost['cost']
        else:
            fitness = [
            abs(e_u_max - cost['m_u_max']),
            abs(e_u_min - cost['m_u_min']),
            abs(e_vac_max - cost['m_vac_max']),
            abs(e_vac_min - cost['m_vac_min'])
            ]
            fitness = np.linalg.norm(fitness)
    else:
        fitness = [abs(e_u_mean - cost['m_u_ss']), abs(e_vac_mean - cost['m_vac_ss'])]
        fitness = fitness[0]*0.3 + fitness[1]*0.7
    
    time = dt.datetime.now()- time
    print('Simulation took: ', time)
    
    if calibration_output == 'evo':
        return fitness
    elif calibration_output == 'True':
        cost['time'] = time
        return cost
    else:
        vac_data = pd.DataFrame(vac_data)
        unemp_data = pd.DataFrame(unemp_data)
        emp_data = pd.DataFrame(emp_data)
        return {'vacancy_data': vac_data, 'unemployment_data': unemp_data, 'employment_data': emp_data, 'cost': cost}