### Code Hist.

 - CODE : Crawling - 한국환경공단 대기오염 (KECO Airkorea) 관측정보

 - DESC  
    &ensp; 1) Open API 사용 불가.  
    &emsp;&ensp;&ensp; 특히, 시간별 통계자료는 Air Korea에서 직접 다운로드  
    &emsp;&ensp;&ensp; (https://www.airkorea.or.kr/web/last_amb_hour_data)  
    &ensp; 2) 결측치가 너무 많아서 KECO_AIR Korea 데이터 사용 고려중  
    
  - DATE  
    &ensp; 2023-05-26 Created  
    &ensp; 2023-11-27 Lab Updated  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; 1) 기존 Template에 맞게 통일  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; 2) 공통코드 사용 (.ipynb)  

# 01. Code

## 01-01. Init

### 01-01-01. Init_Module Import

In [6]:
#region Basic_Import
## Basic
import os, sys, warnings
os.path.dirname(os.path.abspath('./__file__'))
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('./__file__'))))
warnings.filterwarnings('ignore')

import numpy as np, pandas as pd
from pandas import DataFrame, Series
pd.options.display.float_format = '{:.10f}'.format

## Datetime
import time, datetime as dt
from datetime import datetime, date, timedelta

import glob, requests, json
from glob import glob

from scipy import stats

## Excel/CSV
import openpyxl, xlrd

import urllib
from urllib.request import urlopen
from urllib.parse import urlencode, unquote, quote_plus

from selenium import webdriver
from selenium.webdriver.chrome.service import Service

from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

## 시각화
import seaborn as sns, matplotlib.pyplot as plt
# %matplotlib inline
plt.rcParams['figure.figsize'] = [10, 8]

#endregion Basic_Import

In [7]:
## Import_DL
str_tar = "tf"
## For Torch
if str_tar == "torch":
    import torch
    import torch.nn as nn
    from torch.nn.utils import weight_norm
    print("Torch Imported")
## For TF
elif str_tar == "tf":
    import tensorflow as tf
    import tensorflow_addons as tfa
    print("Tensorflow Imported")
else:
    print("Error : Cannot be used except for Keywords")
    print(" : torch / tf")

Tensorflow Imported


In [11]:
## Import_Local
from Dev_Common import Data_Datetime as com_date, KMA_Weather as com_KMA, KASI_Holiday as com_Holi

### 01-01-02. Config (Directory, Params)

In [12]:
## Init_config
SEED = 42

np.random.seed(SEED)
tf.random.set_seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = "1"
# random.seed(SEED)

In [13]:
## Define Todate str
str_now_ymd = pd.datetime.now().date()
str_now_y, str_now_m, str_now_d = pd.datetime.now().year, pd.datetime.now().month, pd.datetime.now().day
str_now_hr, str_now_min = pd.datetime.now().hour, pd.datetime.now().minute

print(pd.datetime.now())
print(str(str_now_y) + " / " + str(str_now_m)  + " / " + str(str_now_d))
print(str(str_now_hr) + " : " + str(str_now_min))

2024-07-19 13:52:28.795273
2024 / 7 / 19
13 : 52


In [14]:
## Weather_ASOS Parameter
## 기간 설정
year_Start, year_End = 2020, 2024
str_dt_start, str_dt_end = str(year_Start)+'-01-01', str(year_End)+'-12-31'

# str_Interval = 'DAY' ## 일
str_Interval = 'HR' ## 시간

## 지점 설정
# Observatory = "118" ## 서울
Observatory = "고색동" ## 수원 권선구 고색동
# Observatory = "인계동" ## 수원 권선구 고색동
# Observatory = "송정동(봉명동)" ## 청주 기상대와 가장 인접함

In [15]:
# Define data root directory
str_dir_kecoAirKor = "../data_KECO_Airkor_" + str_Interval + "/"
print(os.listdir(str_dir_kecoAirKor))

# try: os.mkdir('../data_KMA_ASOS/' + str(Observatory) + '_' + str(dict_Observe[Observatory]) + '/')
# except FileExistsError: print("폴더가 이미 존재함.")
# print(str_dir_kmaAsos)

['KECO_AIRKOR_2010-01.csv', 'KECO_AIRKOR_2010-02.csv', 'KECO_AIRKOR_2010-03.csv', 'KECO_AIRKOR_2010-04.csv', 'KECO_AIRKOR_2010-05.csv', 'KECO_AIRKOR_2010-06.csv', 'KECO_AIRKOR_2010-07.csv', 'KECO_AIRKOR_2010-08.csv', 'KECO_AIRKOR_2010-09.csv', 'KECO_AIRKOR_2010-10.csv', 'KECO_AIRKOR_2010-11.csv', 'KECO_AIRKOR_2010-12.csv', 'KECO_AIRKOR_2011-01.csv', 'KECO_AIRKOR_2011-02.csv', 'KECO_AIRKOR_2011-03.csv', 'KECO_AIRKOR_2011-04.csv', 'KECO_AIRKOR_2011-05.csv', 'KECO_AIRKOR_2011-06.csv', 'KECO_AIRKOR_2011-07.csv', 'KECO_AIRKOR_2011-08.csv', 'KECO_AIRKOR_2011-09.csv', 'KECO_AIRKOR_2011-10.csv', 'KECO_AIRKOR_2011-11.csv', 'KECO_AIRKOR_2011-12.csv', 'KECO_AIRKOR_2012-01.csv', 'KECO_AIRKOR_2012-02.csv', 'KECO_AIRKOR_2012-03.csv', 'KECO_AIRKOR_2012-04.csv', 'KECO_AIRKOR_2012-05.csv', 'KECO_AIRKOR_2012-06.csv', 'KECO_AIRKOR_2012-07.csv', 'KECO_AIRKOR_2012-08.csv', 'KECO_AIRKOR_2012-09.csv', 'KECO_AIRKOR_2012-10.csv', 'KECO_AIRKOR_2012-11.csv', 'KECO_AIRKOR_2012-12.csv', 'KECO_AIRKOR_2013-01.csv', 

## 01-02. Data Load (df_raw)

In [26]:
Data_KECO_AIR_tmp = pd.read_csv(str_dir_kecoAirKor + 'KECO_AIRKOR_2014-12.csv') # , sheet_name="Sheet1", engine='openpyxl')
df_observatory = Data_KECO_AIR_tmp[['REGION', 'CD_OBSERVATORY']].drop_duplicates()
df_observatory[(df_observatory['REGION'] == '수원')]

Unnamed: 0,REGION,CD_OBSERVATORY


In [8]:
Data_KECO_AIR = pd.DataFrame()
List_Month = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']
for year in range(year_Start, year_End + 1):
    for month in List_Month:
        print(str(year) + '-' + str(month))
        try:
            ## xls Load
            Data_KECO_AIR_tmp = pd.read_excel(str_dir_kecoAirKor
                                            + 'KECO_AIRKOR_' + str(year) + '-' + str(month) + '.xlsx') # , sheet_name="Sheet1", engine='openpyxl')

            Data_AIRKOR_tmp = Data_KECO_AIR_tmp[Data_KECO_AIR_tmp['측정소명'] == Observatory]
            print(Data_KECO_AIR.shape)
            Data_KECO_AIR = pd.concat([Data_KECO_AIR, pd.DataFrame(Data_AIRKOR_tmp)])

        except Exception as e:
            print(e)
            break

Data_KECO_AIR.to_csv(srt_dir_kecoAirKor + 'KECO_AIRKOR_' + str(Observatory) + "_" + str(year_Start) + '-' + str(year_End) + '.csv')
print(Data_KECO_AIR.shape, " /// ", Data_KECO_AIR.columns)
Data_KECO_AIR

2020-01
(0, 0)
2020-02
(744, 12)
2020-03
(1440, 12)
2020-04
(2184, 12)
2020-05
(2904, 12)
2020-06
(3648, 12)
2020-07
(4368, 12)
2020-08
(5112, 12)
2020-09
(5856, 12)
2020-10
(6576, 12)
2020-11
(7320, 12)
2020-12
(8040, 12)
2021-01
(8784, 12)
2021-02
(9528, 12)
2021-03
(10200, 12)
2021-04
(10944, 12)
2021-05
(11664, 12)
2021-06
(12408, 12)
2021-07


KeyboardInterrupt: 

In [None]:
Data_KECO_AIRKOR = pd.read_csv(srt_dir_kecoAirKor + 'KECO_AIRKOR_' 
                            + str(Observatory) + "_" + str(year_Start) + '-' + str(year_End) + '.csv', index_col = 0)

Data_KECO_AIRKOR = Data_KECO_AIRKOR[['측정일시', 'SO2', 'CO', 'O3', 'NO2', 'PM10', 'PM25']]
# Data_KECO_AIRKOR = Data_KECO_AIRKOR.rename(columns = {'측정일시' : "METER_DATE"})
Data_KECO_AIRKOR

In [None]:
Data_KECO_AIRKOR = com_date.conv_midnight_24to00(Data_KECO_AIRKOR, 'METER_DATE', '측정일시', '%Y%m%d%H')
Data_KECO_AIRKOR = Data_KECO_AIRKOR[['METER_DATE', 'SO2', 'CO', 'O3', 'NO2', 'PM10', 'PM25']]
Data_KECO_AIRKOR['METER_DATE'] = pd.to_datetime(Data_KECO_AIRKOR['METER_DATE'])
Data_KECO_AIRKOR

In [None]:
date = pd.to_datetime(Data_KECO_AIRKOR["METER_DATE"])
dt_start = date.min()
dt_end = date.max()

df_dt = pd.DataFrame()
df_dt = com_date.create_df_dt(df_dt, "METER_DATE", dt_start, dt_end, "1H")
df_dt

In [None]:
df_merged = pd.DataFrame(df_dt)
df_merged = pd.merge(df_merged, Data_KECO_AIRKOR
                     , how = 'left', on = "METER_DATE")
print(df_merged.info())
df_merged

In [None]:
df_merged['SO2'] = df_merged['SO2'].interpolate()
df_merged['CO'] = df_merged['CO'].interpolate()
df_merged['O3'] = df_merged['O3'].interpolate()
df_merged['NO2'] = df_merged['NO2'].interpolate()
df_merged['PM10'] = df_merged['PM10'].interpolate()
df_merged['PM25'] = df_merged['PM25'].interpolate()
print(df_merged.info())

In [None]:
# Data_ASOS.columns
df_merged.to_csv(srt_dir_kecoAirKor
                  + 'KECO_AIRKOR_' + str(Observatory) + "_" + str(year_Start) + '-' + str(year_End) + '_interpolated_hr.csv')