In [81]:
import pandas as pd
import numpy as np
import requests
import demjson

# Add Socioeconomic Indicators

From stat Finland
https://pxnet2.stat.fi/PXWeb/pxweb/en/Kuntien_avainluvut/Kuntien_avainluvut__2021/kuntien_avainluvut_2021_aikasarja.px/table/tableViewLayout1/

In [None]:
#M140 is the employment rate
#"M297" is the intermunicipal migration rate
#"M499"  Annual contribution margin, EUR per capita
#"M478" Share of persons aged over 64 of the population
#"M152" "Number of workplaces in the area"

In [154]:
url = 'https://pxnet2.stat.fi:443/PXWeb/api/v1/en/Kuntien_avainluvut/2021/kuntien_avainluvut_2021_aikasarja.px'
#url_all = 'https://pxnet2.stat.fi:443/PXWeb/api/v1/en/Kuntien_avainluvut/2021/kuntien_avainluvut_2021_aikasarja.px'
#Employment rate
json_query_employment = '''{
  "query": [
    {
      "code": "Alue 2021",
      "selection": {
        "filter": "item",
        "values": [
          "MK01",
          "MK02",
          "MK04",
          "MK05",
          "MK06",
          "MK07",
          "MK08",
          "MK09",
          "MK10",
          "MK11",
          "MK12",
          "MK13",
          "MK14",
          "MK15",
          "MK16",
          "MK17",
          "MK18",
          "MK19"
        ]
      }
    },
    {
      "code": "Tiedot",
      "selection": {
        "filter": "item",
        "values": ["M140"]
      }
    },
    {
      "code": "Vuosi",
      "selection": {
        "filter": "item",
        "values": [
          "2019",
          "2020"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
} '''

In [155]:
json_query_inter_migration_rate = '''{
  "query": [
    {
      "code": "Alue 2021",
      "selection": {
        "filter": "item",
        "values": [
          "MK01",
          "MK02",
          "MK04",
          "MK05",
          "MK06",
          "MK07",
          "MK08",
          "MK09",
          "MK10",
          "MK11",
          "MK12",
          "MK13",
          "MK14",
          "MK15",
          "MK16",
          "MK17",
          "MK18",
          "MK19"
        ]
      }
    },
    {
      "code": "Tiedot",
      "selection": {
        "filter": "item",
        "values": ["M297"]
      }
    },
    {
      "code": "Vuosi",
      "selection": {
        "filter": "item",
        "values": [
          "2019",
          "2020"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
} '''

In [156]:
json_ann_contrib = '''{
  "query": [
    {
      "code": "Alue 2021",
      "selection": {
        "filter": "item",
        "values": [
          "MK01",
          "MK02",
          "MK04",
          "MK05",
          "MK06",
          "MK07",
          "MK08",
          "MK09",
          "MK10",
          "MK11",
          "MK12",
          "MK13",
          "MK14",
          "MK15",
          "MK16",
          "MK17",
          "MK18",
          "MK19"
        ]
      }
    },
    {
      "code": "Tiedot",
      "selection": {
        "filter": "item",
        "values": ["M499"]
      }
    },
    {
      "code": "Vuosi",
      "selection": {
        "filter": "item",
        "values": [
          "2019",
          "2020"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
} '''

In [157]:
json_age_64 = '''{
  "query": [
    {
      "code": "Alue 2021",
      "selection": {
        "filter": "item",
        "values": [
          "MK01",
          "MK02",
          "MK04",
          "MK05",
          "MK06",
          "MK07",
          "MK08",
          "MK09",
          "MK10",
          "MK11",
          "MK12",
          "MK13",
          "MK14",
          "MK15",
          "MK16",
          "MK17",
          "MK18",
          "MK19"
        ]
      }
    },
    {
      "code": "Tiedot",
      "selection": {
        "filter": "item",
        "values": ["M478"]
      }
    },
    {
      "code": "Vuosi",
      "selection": {
        "filter": "item",
        "values": [
          "2019",
          "2020"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
} '''

In [158]:
json_n_workplaces = '''{
  "query": [
    {
      "code": "Alue 2021",
      "selection": {
        "filter": "item",
        "values": [
          "MK01",
          "MK02",
          "MK04",
          "MK05",
          "MK06",
          "MK07",
          "MK08",
          "MK09",
          "MK10",
          "MK11",
          "MK12",
          "MK13",
          "MK14",
          "MK15",
          "MK16",
          "MK17",
          "MK18",
          "MK19"
        ]
      }
    },
    {
      "code": "Tiedot",
      "selection": {
        "filter": "item",
        "values": ["M152"]
      }
    },
    {
      "code": "Vuosi",
      "selection": {
        "filter": "item",
        "values": [
          "2019",
          "2020"
        ]
      }
    }
  ],
  "response": {
    "format": "json-stat2"
  }
} '''

In [159]:
indicator_codes = ["M140", "M297", "M499", "M478", "M152"]
indicator_labels = ['employment_rate', 'intermunicipal_migration_rate', 'annual_contrib_margin', 'share_age_64', 'number_workplaces']
json_queries = [json_query_employment, json_query_inter_migration_rate, json_ann_contrib, json_age_64, json_n_workplaces]


In [169]:
def get_dataframe(url, json_query, indicator_name):
    #print(json_query)
    x = requests.post(url, data = json_query)
    data = demjson.decode(x.text)
    #print(len(data['value']))
    names_dict = data['dimension']['Alue 2021']['category']['label']
    values = data['value']
    names = []
    for key, value in names_dict.items():
        names.append(value)
    data_dict = {'names' : names,}
    for point in range(len(values)):
        if point % 2 !=0:
            if not data_dict.get('2020' + str(indicator_name)):
                print('2020' + indicator_name)
                data_dict['2020' + str(indicator_name)] = []
                data_dict['2020' + indicator_name].append(values[point]) 
            else:
                data_dict['2020' + indicator_name].append(values[point]) 
        else:
            if not data_dict.get('2019' + str(indicator_name)):
                print('2019' + indicator_name)
                data_dict['2019' + str(indicator_name)] = []
                print(values[point])
                data_dict['2019' + str(indicator_name)].append(values[point]) 
            else:
                data_dict['2019' + indicator_name].append(values[point]) 
    df = pd.DataFrame.from_dict(data_dict)
    df = df.set_index('names')
    return df



In [170]:
df_employment_rate = get_dataframe(url, json_query_employment, 'employment_rate')

2019employment_rate
75.2
2020employment_rate


In [174]:
df_employment_rate = get_dataframe(url, json_query_employment, 'employment_rate')
for i in range(1, len(json_queries)):
    temp_df = get_dataframe(url, json_queries[i], indicator_labels[i])
    df_employment_rate = df_employment_rate.join(temp_df)

    

2019employment_rate
75.2
2020employment_rate
2019intermunicipal_migration_rate
8144
2020intermunicipal_migration_rate
2019annual_contrib_margin
601
2020annual_contrib_margin
2019share_age_64
17.6
2020share_age_64
2019number_workplaces
836622
2020number_workplaces


In [180]:
df_employment_rate

Unnamed: 0_level_0,2019employment_rate,2020employment_rate,2019intermunicipal_migration_rate,2020intermunicipal_migration_rate,2019annual_contrib_margin,2020annual_contrib_margin,2019share_age_64,2020share_age_64,2019number_workplaces,2020number_workplaces
names,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Uusimaa,75.2,,8144,1950,601.0,988.5,17.6,17.9,836622,
Southwest Finland,72.4,,887,1380,74.1,532.6,23.3,23.7,202061,
Satakunta,71.2,,-1141,-719,63.9,596.9,27.2,27.7,86401,
Kanta-Häme,73.1,,-69,137,197.5,609.6,25.3,25.8,64199,
Pirkanmaa,71.4,,1886,2517,305.6,706.8,21.8,22.1,217811,
Päijät-Häme,69.5,,-591,-5,222.7,866.6,26.7,27.3,77081,
Kymenlaakso,67.9,,-1286,-848,175.6,763.5,28.0,28.6,61440,
South Karelia,67.4,,-488,-305,387.3,484.5,27.4,28.0,47995,
South Savo,68.9,,-1228,-688,143.5,770.0,30.9,31.7,50145,
North Savo,69.5,,-543,-129,162.0,551.8,25.3,25.9,98975,


## Mapping to area names that the team is using

In [200]:
# Creating a map between the ISO-codes of the different provinces and their ENG/FI/SWE 
# names, for legibility later on.
df_codemap = pd.read_csv('mob_map_nuts3.txt',sep=',', header=None)
df_codemap.columns = ['Code', 'Subdivision name (fi)','Subdivision name (sv)'	,'Subdivision Name (en)',	'nuts_code',	'statfinland_names',	'thl_namings_eng']
iso_name_map = dict(zip(df_codemap.iloc[:,0],df_codemap.iloc[:,3]))

In [201]:
df_codemap = df_codemap.iloc[1: , :]

In [202]:
df_codemap

Unnamed: 0,Code,Subdivision name (fi),Subdivision name (sv),Subdivision Name (en),nuts_code,statfinland_names,thl_namings_eng
1,FI-02,Etelä-Karjala,Södra Karelen,South Karelia,FI1C5,South Karelia,South Karelia
2,FI-03,Etelä-Pohjanmaa,Södra Österbotten,Southern Ostrobothnia,FI194,South Ostrobothnia,Southern Ostrobothnia
3,FI-04,Etelä-Savo,Södra Savolax,Southern Savonia,FI1D1,South Savo,Southern Savonia
4,FI-05,Kainuu,Kajanaland,Kainuu,FI1D8,Kainuu,Kainuu
5,FI-06,Kanta-Häme,Egentliga Tavastland,Tavastia Proper,FI1C2,Kanta-Häme,Tavastia Proper
6,FI-07,Keski-Pohjanmaa,Mellersta Österbotten,Central Ostrobothnia,FI1D5,Central Ostrobothnia,Central Ostrobothnia
7,FI-08,Keski-Suomi,Mellersta Finland,Central Finland,FI193,Central Finland,Central Finland
8,FI-09,Kymenlaakso,Kymmenedalen,Kymenlaakso,FI1C4,Kymenlaakso,Kymenlaakso
9,FI-10,Lappi,Lappland,Lapland,FI1D7,Lapland,Lapland
10,FI-11,Pirkanmaa,Birkaland,Pirkanmaa,FI197,Pirkanmaa,Pirkanmaa


In [203]:
socioeconomic_df = pd.merge(df_employment_rate, df_codemap, left_index=True, right_on='statfinland_names')

In [204]:
socioeconomic_df

Unnamed: 0,2019employment_rate,2020employment_rate,2019intermunicipal_migration_rate,2020intermunicipal_migration_rate,2019annual_contrib_margin,2020annual_contrib_margin,2019share_age_64,2020share_age_64,2019number_workplaces,2020number_workplaces,Code,Subdivision name (fi),Subdivision name (sv),Subdivision Name (en),nuts_code,statfinland_names,thl_namings_eng
17,75.2,,8144,1950,601.0,988.5,17.6,17.9,836622,,FI-18,Uusimaa,Nyland,Uusimaa,FI1B1,Uusimaa,Uusimaa
18,72.4,,887,1380,74.1,532.6,23.3,23.7,202061,,FI-19,Varsinais-Suomi,Egentliga Finland,Southwest Finland,FI1C1,Southwest Finland,Southwest Finland
16,71.2,,-1141,-719,63.9,596.9,27.2,27.7,86401,,FI-17,Satakunta,Satakunda,Satakunta,FI196,Satakunta,Satakunta
5,73.1,,-69,137,197.5,609.6,25.3,25.8,64199,,FI-06,Kanta-Häme,Egentliga Tavastland,Tavastia Proper,FI1C2,Kanta-Häme,Tavastia Proper
10,71.4,,1886,2517,305.6,706.8,21.8,22.1,217811,,FI-11,Pirkanmaa,Birkaland,Pirkanmaa,FI197,Pirkanmaa,Pirkanmaa
15,69.5,,-591,-5,222.7,866.6,26.7,27.3,77081,,FI-16,Päijät-Häme,Päijänne-Tavastland,Päijänne Tavastia,FI1C3,Päijät-Häme,Päijänne Tavastia
8,67.9,,-1286,-848,175.6,763.5,28.0,28.6,61440,,FI-09,Kymenlaakso,Kymmenedalen,Kymenlaakso,FI1C4,Kymenlaakso,Kymenlaakso
1,67.4,,-488,-305,387.3,484.5,27.4,28.0,47995,,FI-02,Etelä-Karjala,Södra Karelen,South Karelia,FI1C5,South Karelia,South Karelia
3,68.9,,-1228,-688,143.5,770.0,30.9,31.7,50145,,FI-04,Etelä-Savo,Södra Savolax,Southern Savonia,FI1D1,South Savo,Southern Savonia
14,69.5,,-543,-129,162.0,551.8,25.3,25.9,98975,,FI-15,Pohjois-Savo,Norra Savolax,Northern Savonia,FI1D2,North Savo,Northern Savonia


In [206]:
socioeconomic_df.shape

(18, 17)

In [207]:
socioeconomic_df.to_csv('../data/processed/socioeconomic_variables.csv')