In [1]:
# import libraries
# for processing data tables
import numpy as np
import pandas as pd

# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# for sending requests to url and parsing
import requests
import json

import sys
sys.path.append('../')

# import useful cumtom functions and variables from my helper.py
from helper import request_data_kosis
from helper import dict_hl_name
from helper import map_hl
from helper import request_data_libsta

import configparser
config = configparser.ConfigParser()
config.read('../../config.ini', encoding='utf8')

['../../config.ini']

## Access to Improved Water

In [2]:
df_aiw_hl = request_data_kosis(url = config['aiw']['hl_url'], target_period = '2020', variable_name = 'aiw')
df_aiw_ll = request_data_kosis(url = config['aiw']['ll_url'], target_period = '2020', variable_name = 'aiw')

In [3]:
df_aiw_hl.head()

Unnamed: 0,C1,C1_NM,aiw,period
0,11,서울특별시,100.0,2020
1,21,부산광역시,99.7,2020
2,22,대구광역시,100.0,2020
3,23,인천광역시,99.1,2020
4,24,광주광역시,99.9,2020


In [4]:
df_aiw_ll.head()

Unnamed: 0,C1,C1_NM,aiw,period
0,11,서울특별시,100.0,2020
1,21,부산광역시,99.7,2020
2,22,대구광역시,100.0,2020
3,23,인천광역시,99.1,2020
4,24,광주광역시,99.9,2020


## Access to Improved Sanitation

In [5]:
df_ais_hl = request_data_kosis(url = config['ais']['hl_url'], target_period = '2020', variable_name = 'ais')
df_ais_ll = request_data_kosis(url = config['ais']['ll_url'], target_period = '2020', variable_name = 'ais')

In [6]:
df_ais_hl.head()

Unnamed: 0,C1,C1_NM,ais,period
0,11,서울특별시,100.0,2020
1,21,부산광역시,99.4,2020
2,22,대구광역시,98.8,2020
3,23,인천광역시,97.2,2020
4,24,광주광역시,98.9,2020


In [7]:
df_ais_ll.head()

Unnamed: 0,C1,C1_NM,ais,period
0,11,서울특별시,100.0,2020
1,11010,종로구,100.0,2020
2,11020,중구,100.0,2020
3,11030,용산구,100.0,2020
4,11040,성동구,100.0,2020


## Population Density

In [8]:
df_pop_hl = request_data_kosis(url = config['pop']['hl_url'], target_period = '2020', variable_name = 'pop')
df_ua_hl = request_data_kosis(url = config['ua']['hl_url'], target_period = '2020',variable_name = 'urban_area')

In [9]:
# Unfity the name of high-level regions
df_pop_hl['C1_NM'] = df_pop_hl['C1_NM'].apply(lambda x: map_hl(x,dict_hl_name))
df_ua_hl['C1_NM'] = df_ua_hl['C1_NM'].apply(lambda x: map_hl(x,dict_hl_name))

In [10]:
df_pop_hl = df_pop_hl.merge(df_ua_hl.loc[:,['C1_NM','urban_area']], on = 'C1_NM')
df_pop_hl['pd'] = df_pop_hl['pop'] /  df_pop_hl['urban_area']

In [11]:
df_pop_hl.head()

Unnamed: 0,C1,C1_NM,pop,period,urban_area,pd
0,1520213102303342A.01,서울특별시,9911088.0,2020,605680193.0,0.016364
1,1520213102303342A.02,부산광역시,3438710.0,2020,940825055.0,0.003655
2,1520213102303342A.03,대구광역시,2446144.0,2020,799299015.0,0.00306
3,1520213102303342A.04,인천광역시,3010476.0,2020,529650659.0,0.005684
4,1520213102303342A.05,광주광역시,1471385.0,2020,480081726.0,0.003065


## Physiscian Density

In [12]:
df_phd_hl = request_data_kosis(url = config['phd']['hl_url'], target_period = '2021', variable_name = 'phd')
df_phd_ll = request_data_kosis(url = config['phd']['ll_url'], target_period = '2021', variable_name = 'phd')

In [13]:
df_phd_hl.head()

Unnamed: 0,C1,C1_NM,phd,period
0,11,서울특별시,4.7,2021
1,21,부산광역시,3.5,2021
2,22,대구광역시,3.6,2021
3,23,인천광역시,2.6,2021
4,24,광주광역시,3.7,2021


In [14]:
df_phd_ll['C1_hl'] = df_phd_ll['C1'].str[:2]
df_phd_ll = df_phd_ll.merge(df_phd_hl.loc[:,['C1','C1_NM']].rename(columns={'C1':'C1_hl','C1_NM':'hl'}), on='C1_hl', how = 'left')

In [15]:
df_phd_ll.head()

Unnamed: 0,C1,C1_NM,phd,period,C1_hl,hl
0,11010,종로구,21.1,2021,11,서울특별시
1,11020,중구,10.2,2021,11,서울특별시
2,11030,용산구,3.6,2021,11,서울특별시
3,11040,성동구,3.6,2021,11,서울특별시
4,11050,광진구,3.8,2021,11,서울특별시


## Number of Public Libraries

In [16]:
df_npl_hl = request_data_libsta(config['npl']['hl_url'], map_hl, dict_hl_name)

df_pop_hl = request_data_kosis(url = config['pop']['hl_url'], target_period = '2020', variable_name = 'pop')
df_pop_hl['C1_NM'] = df_pop_hl['C1_NM'].apply(lambda x: map_hl(x,dict_hl_name))

In [17]:
df_npl_hl = df_npl_hl.merge(df_pop_hl.loc[:,['C1_NM','pop']], on = 'C1_NM')
df_npl_hl['npl'] = df_npl_hl['npl'] / df_npl_hl['pop'] * 100000
df_npl_hl = df_npl_hl.drop('pop', axis = 1)

In [18]:
df_npl_hl.head()

Unnamed: 0,C1_NM,npl
0,강원도,16.408447
1,경기도,13.29745
2,경상남도,14.849793
3,경상북도,13.744984
4,광주광역시,33.030104


## 

## Internet Access

In [19]:
df_ia_hl = request_data_kosis(url = config['ia']['hl_url'], target_period = '2021', index = ['C1','C1_NM'], columns = 'C2_NM', scale = 'hl')

In [20]:
df_ia_hl.head()

Unnamed: 0,C1,C1_NM,가능,불가능,period
0,A0501,서울,99.9,0.1,2021
1,A0502,부산,99.9,0.1,2021
2,A0503,대구,99.9,0.1,2021
3,A0504,인천,99.9,0.1,2021
4,A0505,광주,100.0,0.0,2021


## Home Computer Access

In [21]:
df_hca_hl = request_data_kosis(url = config['hca']['hl_url'], target_period = '2021', index = ['C1','C1_NM'], columns = 'C2_NM', scale = 'hl')

In [22]:
df_hca_hl.head()

Unnamed: 0,C1,C1_NM,미보유,보유,period
0,A0401,서울,19.3,80.7,2021
1,A0402,부산,24.5,75.5,2021
2,A0403,대구,27.2,72.8,2021
3,A0404,인천,20.7,79.3,2021
4,A0405,광주,29.1,70.9,2021


## Traffic Fatalities

In [60]:
df_tf = pd.read_csv('../../data/ID/traffic_fatalities_2020.csv', encoding = 'cp949')

In [61]:
# filtering dataset and deep copy
df_tf_hl = df_tf.loc[df_tf['시군구'] == '합계'].copy()

# transform the names of the high_level regions to match with the population dataset
df_tf_hl.loc[:,'시도'] = df_tf_hl.loc[:, '시도'].apply(lambda x: map_hl(x,dict_hl_name))

# change a datatype of the number of the traffic fatlities
df_tf_hl.loc[:,'사망자수(명)'] = df_tf_hl.loc[:,'사망자수(명)'].astype(int)

# change the names of columns and drop unnecessary columns
df_tf_hl = df_tf_hl.rename(columns = {'시도':'C1_NM', '사망자수(명)':'tf'})
df_tf_hl = df_tf_hl.loc[:,['C1_NM', 'tf']]

# merge the traffic fatalities and population dataframes
df_tf_hl = df_tf_hl.merge(df_pop_hl.drop('C1', axis = 1), on = 'C1_NM')

# calculate the traffic fatalities
df_tf_hl['tf'] = (df_tf_hl['tf'] / df_tf_hl['pop']) * 100000

# drop the population column 
df_tf_hl = df_tf_hl.drop('pop', axis = 1)



In [64]:
df_tf_hl.head()

Unnamed: 0,C1_NM,tf,period
0,서울특별시,2.209646,2020
1,부산광역시,2.908067,2020
2,경기도,4.432483,2020
3,강원도,8.524701,2020
4,충청북도,10.440217,2020


## 

## Street Density

## land_allocated_to_streets