In [1]:
# import libraries
# for processing data tables
import numpy as np
import pandas as pd

# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# for sending requests to url and parsing
import requests
import json

import sys
sys.path.append('../')

# import useful cumtom functions and variables from my helper.py
from helper import request_data_kosis
from helper import dict_hl_name
from helper import map_hl

import configparser
config = configparser.ConfigParser()
config.read('../../config.ini', encoding='utf8')

['../../config.ini']

## Life Expectancy at Birth

## Homicide Rate

In [2]:
df_hr_hl = request_data_kosis(url = config['hr']['hl_url'], index = ['C2','C2_NM'], target_period = '2020', variable_name = 'hr')\
            .rename(columns = {'C2':'C1','C2_NM':'C1_NM'})
df_hr_hl['C1_NM'] = df_hr_hl['C1_NM'].apply(lambda x: map_hl(x, dict_hl_name))

df_pop_hl = request_data_kosis(url = config['pop']['hl_url'], target_period = '2020', variable_name = 'pop')
df_pop_hl['C1_NM'] = df_pop_hl['C1_NM'].apply(lambda x: map_hl(x, dict_hl_name))

df_hr_hl = df_hr_hl.merge(df_pop_hl.loc[:,['C1_NM','pop']], on = 'C1_NM')
df_hr_hl['hr'] = (df_hr_hl['hr'] / df_hr_hl['pop']) * 100000

df_hr_hl = df_hr_hl.drop('pop', axis = 1)

In [3]:
df_hr_hl.head()

Unnamed: 0,C1,C1_NM,hr,period
0,B002,서울특별시,1.695071,2020
1,B003,부산광역시,1.74484,2020
2,B004,대구광역시,1.308181,2020
3,B005,인천광역시,1.195824,2020
4,B006,광주광역시,1.019448,2020


## Theft Rate

In [4]:
df_tr_hl = request_data_kosis(url = config['tr']['hl_url'], index = ['C2','C2_NM'], target_period = '2020', variable_name = 'tr')\
            .rename(columns = {'C2':'C1','C2_NM':'C1_NM'})
df_tr_hl['C1_NM'] = df_tr_hl['C1_NM'].apply(lambda x: map_hl(x, dict_hl_name))

df_pop_hl = request_data_kosis(url = config['pop']['hl_url'], target_period = '2020', variable_name = 'pop')
df_pop_hl['C1_NM'] = df_pop_hl['C1_NM'].apply(lambda x: map_hl(x, dict_hl_name))

df_tr_hl = df_tr_hl.merge(df_pop_hl.loc[:,['C1_NM','pop']], on = 'C1_NM')
df_tr_hl['tr'] = (df_tr_hl['tr'] / df_tr_hl['pop']) * 100000

df_tr_hl = df_tr_hl.drop('pop', axis = 1)

In [5]:
df_tr_hl.head()

Unnamed: 0,C1,C1_NM,tr,period
0,B002,서울특별시,387.586106,2020
1,B003,부산광역시,415.272006,2020
2,B004,대구광역시,355.661809,2020
3,B005,인천광역시,298.657089,2020
4,B006,광주광역시,341.243114,2020


# Green Area per Capita


In [6]:
df_gapc_hl = request_data_kosis(url = config['gapc']['hl_url'], target_period = '2020', variable_name = 'gapc')

# drop the low-level regions
df_gapc_hl = df_gapc_hl.loc[df_gapc_hl['C1_NM'].isin(dict_hl_name.keys())].copy().drop_duplicates(['C1_NM','gapc'])

In [7]:
df_gapc_hl.head()

Unnamed: 0,C1,C1_NM,gapc,period
1,15315HJG001,서울특별시,24.24,2020
27,15315HJG002,부산광역시,160.59,2020
44,15315HJG003,대구광역시,256.12,2020
53,15315HJG004,인천광역시,102.23,2020
64,15315HJG005,광주광역시,246.67,2020


In [8]:
df_gapc_ll = request_data_kosis(url = config['gapc']['hl_url'], target_period = '2020', variable_name = 'gapc')

# drop the high-level regions
df_gapc_ll = df_gapc_ll.loc[np.logical_not(df_gapc_ll['C1_NM'].isin(df_gapc_hl.loc[:,'C1_NM']))].copy().drop_duplicates(['C1_NM','gapc'])

# set C1 and C1_NM as C2
df_gapc_ll = df_gapc_ll.rename(columns = {'C1':'C2', 'C1_NM':'C2_NM'})

# extract the high level region codes from the low level region codes
df_gapc_ll['C1'] = df_gapc_ll['C2'].str[:11]

# merge the high level region 
df_gapc_ll = df_gapc_ll.merge(df_gapc_hl.loc[:,['C1','C1_NM']], left_on = 'C1', right_on = 'C1')

In [9]:
df_gapc_ll.head()

Unnamed: 0,C2,C2_NM,gapc,period,C1,C1_NM
0,15315HJG001001,종로구,75.0,2020,15315HJG001,서울특별시
1,15315HJG001002,중구,0.2,2020,15315HJG001,서울특별시
2,15315HJG001003,용산구,39.02,2020,15315HJG001,서울특별시
3,15315HJG001004,성동구,14.76,2020,15315HJG001,서울특별시
4,15315HJG001005,광진구,15.22,2020,15315HJG001,서울특별시
