In [2]:
import pandas as pd
import numpy as np
import os

# 데이터 출처

* [기상청 기상자료개방포털 단기예보](https://data.kma.go.kr/data/rmt/rmtList.do?code=420&pgmNo=572)
    * 울산 -> 울산시 남구 선암동
    * 당진 -> 충남 당진시 석문면

* 2020-12 ~ 2021-2 까지 가져옴

In [3]:
def get_csv(region, year_month):
    '''
    region: ulsan, dangjin
    year_month: 202012 ~ 202106
    '''
    
    path_fcst = './fcst_data'
    raw_dict = {}
    region_dict = {'dangjin': '석문면', 'ulsan': '선암동'}
    info_list = ['3시간기온', '습도', '풍속', '풍향', '하늘상태']
    
    # 석문면_3시간기온_202012_202012.csv
    for info in info_list:
        file_name = f'{region_dict[region]}_{info}_{year_month}_{year_month}.csv'
        file_path = os.path.join(path_fcst, file_name)
        df = pd.read_csv(file_path)
        raw_dict[info] = df
    
    return raw_dict

def get_fcst(region, year_month):
    '''
    region: ulsan, dangjin
    year_month: 202012 ~ 202106
    '''
    raw_dict = get_csv(region, year_month)
    col_df = ['Forecast time', 'forecast', 'Temperature', 'Humidity', 'WindSpeed', 'WindDirection', 'Cloud']
    df_result = pd.DataFrame(columns=col_df)
    
    col_dict = {'3시간기온': 'Temperature', '습도': 'Humidity', '풍속': 'WindSpeed', '풍향': 'WindDirection', '하늘상태': 'Cloud'}

    for category, df in raw_dict.items():
        day = df.iloc[:, 0].apply(lambda x: f'{x:0>2}')
        hour = df['hour'].apply(lambda x: f'{x:0>4}')
        df_result['Forecast time'] = year_month + day + ' ' + hour + '00'
        df_result['forecast'] = df['forecast'] # df.iloc[:, 2]
        df_result[col_dict[category]] = df.iloc[:, -1]
    
    df_result['Forecast time'] = pd.to_datetime(df_result['Forecast time'])
    df_result['Forecast time'] = df_result['Forecast time'].astype(str)
    
    return df_result


In [4]:
data_ulsan_fcst = pd.DataFrame()
data_dangjin_fcst = pd.DataFrame()
year_months = ['202012', '202101', '202102', '202103', '202104', '202105', '202106']

for year_month in year_months:
    ulsan = get_fcst('ulsan', year_month)
    dangjin = get_fcst('dangjin', year_month)
    data_ulsan_fcst = pd.concat([data_ulsan_fcst, ulsan])
    data_dangjin_fcst = pd.concat([data_dangjin_fcst, dangjin])


In [5]:
data_ulsan_fcst

Unnamed: 0,Forecast time,forecast,Temperature,Humidity,WindSpeed,WindDirection,Cloud
0,2020-12-01 02:00:00,4,10.0,35.0,3.0,356.0,1.0
1,2020-12-01 02:00:00,7,6.0,55.0,2.6,335.0,1.0
2,2020-12-01 02:00:00,10,4.0,45.0,3.1,328.0,3.0
3,2020-12-01 02:00:00,13,3.0,50.0,2.9,326.0,3.0
4,2020-12-01 02:00:00,16,3.0,55.0,3.1,329.0,3.0
...,...,...,...,...,...,...,...
4435,2021-06-30 23:00:00,52,26.0,70.0,2.3,57.0,4.0
4436,2021-06-30 23:00:00,55,24.0,80.0,2.6,52.0,4.0
4437,2021-06-30 23:00:00,58,23.0,85.0,3.5,45.0,4.0
4438,2021-06-30 23:00:00,61,24.0,85.0,5.2,185.0,4.0


In [6]:
data_dangjin_fcst

Unnamed: 0,Forecast time,forecast,Temperature,Humidity,WindSpeed,WindDirection,Cloud
0,2020-12-01 02:00:00,4,4.0,40.0,4.3,352.0,3.0
1,2020-12-01 02:00:00,7,3.0,50.0,3.0,12.0,3.0
2,2020-12-01 02:00:00,10,2.0,50.0,2.7,27.0,3.0
3,2020-12-01 02:00:00,13,1.0,55.0,3.2,22.0,3.0
4,2020-12-01 02:00:00,16,1.0,55.0,3.0,24.0,4.0
...,...,...,...,...,...,...,...
4435,2021-06-30 23:00:00,52,28.0,55.0,6.6,117.0,4.0
4436,2021-06-30 23:00:00,55,28.0,55.0,6.6,117.0,4.0
4437,2021-06-30 23:00:00,58,25.0,75.0,5.3,122.0,4.0
4438,2021-06-30 23:00:00,61,24.0,80.0,3.9,123.0,4.0


In [7]:
data_ulsan_fcst.to_csv('./new_dataset/new_fcst_ulsan.csv', index=None)
data_dangjin_fcst.to_csv('./new_dataset/new_fcst_dangjin.csv', index=None)