### Code Hist.

 - CODE : Crawling - 한국환경공단 대기오염 (KECO Airkorea) 관측정보

 - DESC  
    &ensp; 1) Open API 사용 불가.  
    &emsp;&ensp;&ensp; 특히, 시간별 통계자료는 Air Korea에서 직접 다운로드  
    &emsp;&ensp;&ensp; (https://www.airkorea.or.kr/web/last_amb_hour_data)  
    &ensp; 2) 결측치가 너무 많아서 KECO_AIR Korea 데이터 사용 고려중  
    
  - DATE  
    &ensp; 2023-05-26 Created  
    &ensp; 2023-11-27 Lab Updated  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; 1) 기존 Template에 맞게 통일  
    &emsp;&emsp;&emsp;&emsp;&emsp;&emsp; 2) 공통코드 사용 (.ipynb)  

# 01. Code

## 01-01. Init

### 01-01-01. Init_Module Import

In [1]:
#region Basic_Import
## Basic
import os, sys, warnings
os.path.dirname(os.path.abspath('./__file__'))
sys.path.append(os.path.dirname(os.path.abspath(os.path.dirname('./__file__'))))
warnings.filterwarnings('ignore')

import numpy as np, pandas as pd
from pandas import DataFrame, Series
pd.options.display.float_format = '{:.10f}'.format

## Datetime
import time, datetime as dt
from datetime import datetime, date, timedelta

## glob : 
import glob, requests, json
from glob import glob

from scipy import stats

## Excel/CSV
import openpyxl, xlrd

import urllib
from urllib.request import urlopen
from urllib.parse import urlencode, unquote, quote_plus

from selenium import webdriver
from selenium.webdriver.chrome.service import Service

from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup

## 시각화
import seaborn as sns, matplotlib.pyplot as plt
# %matplotlib inline
plt.rcParams['figure.figsize'] = [10, 8]

#endregion Basic_Import

In [2]:
## Import_DL
str_tar = "tf"
## For Torch
if str_tar == "torch":
    import torch, torch.nn as nn
    from torch.nn.utils import weight_norm
    print("Torch Imported")
## For TF
elif str_tar == "tf":
    import tensorflow as tf, tensorflow_addons as tfa
    print("Tensorflow Imported")
else:
    print("Error : Cannot be used except for Keywords")
    print(" : torch / tf")

Tensorflow Imported


In [3]:
## Import_Local
from Dev_Common import Data_Datetime as com_date, KMA_Weather as com_KMA, KECO_AirKor as com_KECO, KASI_Holiday as com_Holi

### 01-01-02. Config (Directory, Params)

In [4]:
## Init_config
SEED = 42

np.random.seed(SEED)
tf.random.set_seed(SEED)
os.environ["PYTHONHASHSEED"] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = "1"
# random.seed(SEED)

In [5]:
## Define Todate str
str_now_ymd = pd.datetime.now().date()
str_now_y, str_now_m, str_now_d = pd.datetime.now().year, pd.datetime.now().month, pd.datetime.now().day
str_now_hr, str_now_min = pd.datetime.now().hour, pd.datetime.now().minute

print(pd.datetime.now())
print(str(str_now_y) + " / " + str(str_now_m)  + " / " + str(str_now_d))
print(str(str_now_hr) + " : " + str(str_now_min))

2024-07-19 17:36:19.742585
2024 / 7 / 19
17 : 36


In [6]:
# Define data root directory
str_dir_Airkor_HR = "../data_KECO_Airkor_HR/"
str_dir_xlsx = "../data_KECO_Airkor_HR/file_xlsx/"
print(os.listdir(str_dir_xlsx))

list_file_quarter = glob(str_dir_xlsx + "*.xlsx")
print(len(list_file_quarter))
list_file_quarter

['KECO_AIRKOR_2017-01.xlsx', 'KECO_AIRKOR_2017-02.xlsx', 'KECO_AIRKOR_2017-03.xlsx', 'KECO_AIRKOR_2017-04.xlsx', 'KECO_AIRKOR_2017-05.xlsx', 'KECO_AIRKOR_2017-06.xlsx', 'KECO_AIRKOR_2017-07.xlsx', 'KECO_AIRKOR_2017-08.xlsx', 'KECO_AIRKOR_2017-09.xlsx', 'KECO_AIRKOR_2017-10.xlsx', 'KECO_AIRKOR_2017-11.xlsx', 'KECO_AIRKOR_2017-12.xlsx', 'KECO_AIRKOR_2019-01.xlsx', 'KECO_AIRKOR_2019-02.xlsx', 'KECO_AIRKOR_2019-03.xlsx', 'KECO_AIRKOR_2019-04.xlsx', 'KECO_AIRKOR_2019-05.xlsx', 'KECO_AIRKOR_2019-06.xlsx', 'KECO_AIRKOR_2019-07.xlsx', 'KECO_AIRKOR_2019-08.xlsx', 'KECO_AIRKOR_2019-09.xlsx', 'KECO_AIRKOR_2019-10.xlsx', 'KECO_AIRKOR_2019-11.xlsx', 'KECO_AIRKOR_2019-12.xlsx', 'KECO_AIRKOR_2020-01.xlsx', 'KECO_AIRKOR_2020-02.xlsx', 'KECO_AIRKOR_2020-03.xlsx', 'KECO_AIRKOR_2020-04.xlsx', 'KECO_AIRKOR_2020-05.xlsx', 'KECO_AIRKOR_2020-06.xlsx', 'KECO_AIRKOR_2020-07.xlsx', 'KECO_AIRKOR_2020-08.xlsx', 'KECO_AIRKOR_2020-09.xlsx', 'KECO_AIRKOR_2020-10.xlsx', 'KECO_AIRKOR_2020-11.xlsx', 'KECO_AIRKOR_2020-1

['../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-01.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-02.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-03.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-04.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-05.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-06.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-07.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-08.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-09.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-10.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-11.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2017-12.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2019-01.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2019-02.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2019-03.xlsx',
 '../data_KECO_Airkor_HR/file_xlsx\\KECO_AIRKOR_2019-04

## 01-02. Data Load (df_raw)

In [7]:
## 단순 Xlsx -> CSV 변환
for i in range(0, len(list_file_quarter)):
    str_file = str(list_file_quarter[i])[len(str_dir_xlsx):-5]
    print(str_file)

    try : 
        print(True)
        Data_KECO_AIR_tmp = pd.read_excel(str_dir_xlsx + str_file + '.xlsx', engine='openpyxl')
        Data_KECO_AIR_tmp.to_csv(str_dir_Airkor_HR + str_file + '.csv')
    except Exception as e : ## BadZipFile: File is not a zip file
        ## 통상 Xlsx 파일이 잘못되었을 때의 문제이나, 
        ## 현재 케이스에서는 CSV임에도 xlsx 확장자명으로 잘못 업로드된 파일인 것 같다.
        if e == 'File is not a zip file':
            print(e)
            Data_KECO_AIR_tmp = pd.read_csv(str_dir_xlsx + str_file + '.xlsx', index_col=0)
            Data_KECO_AIR_tmp.to_csv(str_dir_Airkor_HR + str_file + '.csv')

KECO_AIRKOR_2017-01
True
KECO_AIRKOR_2017-02
True
KECO_AIRKOR_2017-03
True
KECO_AIRKOR_2017-04
True
KECO_AIRKOR_2017-05
True
KECO_AIRKOR_2017-06
True
KECO_AIRKOR_2017-07
True
KECO_AIRKOR_2017-08
True
KECO_AIRKOR_2017-09
True
KECO_AIRKOR_2017-10
True
KECO_AIRKOR_2017-11
True
KECO_AIRKOR_2017-12
True
KECO_AIRKOR_2019-01
True
KECO_AIRKOR_2019-02
True
KECO_AIRKOR_2019-03
True
KECO_AIRKOR_2019-04
True
KECO_AIRKOR_2019-05
True
KECO_AIRKOR_2019-06
True
KECO_AIRKOR_2019-07
True
KECO_AIRKOR_2019-08
True
KECO_AIRKOR_2019-09
True
KECO_AIRKOR_2019-10
True
KECO_AIRKOR_2019-11
True
KECO_AIRKOR_2019-12
True
KECO_AIRKOR_2020-01
True
KECO_AIRKOR_2020-02
True
KECO_AIRKOR_2020-03
True
KECO_AIRKOR_2020-04
True
KECO_AIRKOR_2020-05
True
KECO_AIRKOR_2020-06
True
KECO_AIRKOR_2020-07
True
KECO_AIRKOR_2020-08
True
KECO_AIRKOR_2020-09
True
KECO_AIRKOR_2020-10
True
KECO_AIRKOR_2020-11
True
KECO_AIRKOR_2020-12
True
KECO_AIRKOR_2021-01
True
KECO_AIRKOR_2021-02
True
KECO_AIRKOR_2021-03
True
KECO_AIRKOR_2021-04
True


In [8]:
Data_KECO_AIR_tmp = pd.read_csv(str_dir_kecoAirKor + str_file, index_col=0)
Data_KECO_AIR_tmp

NameError: name 'str_dir_kecoAirKor' is not defined