In [1]:
# import libraries

# for processing data tables
import numpy as np
import pandas as pd

# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns

# for sending requests to url and parsing
import requests
import json
import xmltodict

In [2]:
start_year = 2015
end_year = 2020

# Green Area per Capita


In [4]:
def get_green_area_per_capita(start_year, end_year, base_url):
    '''
    This function is for getting 'green area per capita' data by metropolitan and local governments from KOSIS API    

    '''
    
    # url for green area per capital // this should be from a database or csv file as a variable
    base_url = ''
    
    list_metro = []
    list_basic = []
    
    for year in range(start_year,end_year+1):
        response = requests.get(base_url.format(year,year))

        # convert a json formatted response to a Pandas Dataframe
        df_green_tmp = pd.DataFrame(json.loads(response.text))[['C1','C1_NM','DT']]
        df_green_tmp = df_green_tmp.rename(columns = {'DT':'green_area_per_capita'})
        df_green_tmp['year'] = year
        # divide the dataframe based on geographical codes ('C1')
        # if len(C1) == 11, it is for a metropolitan government 
        # if len(C1) >= 11, it is for a basic local government
        # metropolitan government
        df_green_metro_tmp = df_green_tmp.loc[df_green_tmp['C1'].str.len() == 11]
        df_green_metro_tmp = df_green_metro_tmp.rename(columns = {'C1':'metro_code', 'C1_NM':'metro_name'})
        
        # basic local government 
        df_green_basic_tmp = df_green_tmp.loc[df_green_tmp['C1'].str.len() > 11]
        df_green_basic_tmp = df_green_basic_tmp.rename(columns = {'C1':'basic_code', 'C1_NM':'basic_name'})
        
        # add dataframes into lists
        list_metro.append(df_green_metro_tmp)
        list_basic.append(df_green_basic_tmp)
        
    # concatenate each list into a dataframe 
    df_green_metro = pd.concat(list_metro, ignore_index = True)
    df_green_basic = pd.concat(list_basic, ignore_index = True)
    
    # get unique metroplitan id    
    df_metro_code = df_green_metro[['metro_code','metro_name']].drop_duplicates()
    
    # add metroplitan id and name into df_green_basic
    df_green_basic['metro_code'] = df_green_basic['basic_code'].str.slice(0,11)
    df_green_basic = df_green_basic.merge(df_metro_code, on = 'metro_code')
    df_green_basic = df_green_basic.drop('metro_code', axis = 1)
    

    return df_green_metro, df_green_basic

df_green_metropolitan, df_green_basic = get_green_area_per_capita(start_year, end_year)

In [5]:
df_green_metropolitan

Unnamed: 0,metro_code,metro_name,green_area_per_capita,year
0,15315HJG000,전국,267.39,2015
1,15315HJG001,서울특별시,24.32,2015
2,15315HJG002,부산광역시,155.71,2015
3,15315HJG003,대구광역시,248.39,2015
4,15315HJG004,인천광역시,100.62,2015
...,...,...,...,...
103,15315HJG013,전라남도,888.95,2020
104,15315HJG014,경상북도,668.05,2020
105,15315HJG015,경상남도,509.78,2020
106,15315HJG016,제주특별자치도,634.77,2020


In [6]:
df_green_basic

Unnamed: 0,basic_code,basic_name,green_area_per_capita,year,metro_name
0,15315HJG001001,종로구,72.24,2015,서울특별시
1,15315HJG001002,중구,0.2,2015,서울특별시
2,15315HJG001003,용산구,38.46,2015,서울특별시
3,15315HJG001004,성동구,14.63,2015,서울특별시
4,15315HJG001005,광진구,15.21,2015,서울특별시
...,...,...,...,...,...
1369,15315HJG0171,세종특별자치시,475.05,2016,세종특별자치시
1370,15315HJG0171,세종특별자치시,400.8,2017,세종특별자치시
1371,15315HJG0171,세종특별자치시,353.63,2018,세종특별자치시
1372,15315HJG0171,세종특별자치시,318.83,2019,세종특별자치시
