# 예보 데이터 불러오기
[API 출처 : 공공데이터포털, 기상예보 2.0버전](https://www.data.go.kr/data/15084084/openapi.do)

In [None]:
# ! pip install xmltodict

In [151]:
import pandas as pd
from urllib.parse import urlencode, quote_plus
from urllib.request import urlopen
import json
import xmltodict
import os

In [1]:
# API
# key = input('encoded api key를 입력해주세요 > ')
key = 'your_key'

In [129]:
def get_api_data(base_date, base_time, region, key, n_row, n_page):
    dict_region = {'dangjin': (53, 144), 'ulsan': (102, 83)}
    x, y = dict_region[region]
    
    url_base = 'http://apis.data.go.kr/1360000/VilageFcstInfoService_2.0/getVilageFcst'
    queryParams = urlencode({
        quote_plus('numOfRows'): n_row,      # 불러오고 싶은만큼 가져오기
        quote_plus('pageNo'): n_page,
        quote_plus('dataType'): 'XML',
        quote_plus('base_date'): base_date,  # 예보 발표 날짜
        quote_plus('base_time'): base_time,  # 예보 발표 시간
        quote_plus('nx'): str(x),
        quote_plus('ny'): str(y),
    })

    url = f'{url_base}?serviceKey={key}&{queryParams}'
    response = urlopen(url)
    results = response.read().decode('utf-8')
    results_json = xmltodict.parse(results)

    data = json.loads(json.dumps(results_json))
    
    try:
        logs = data['response']['body']['items']['item']
        df = pd.DataFrame(logs)
    except:
        df = pd.DataFrame()
    
    return df

def get_data(base_date, base_time, region, key, n_row=100, n_page=1):
    
    # get raw data
    df_raw = pd.DataFrame()
    for n in range(1, n_page+1):
        new_df_raw = get_api_data(base_date, base_time, region, key, n_row, n)
        df_raw = pd.concat([df_raw, new_df_raw])
#     print(df_raw)
    
    try:
        df_raw['fcst'] = df_raw['fcstDate'] + ' ' + df_raw['fcstTime']
        print(base_date, base_time, f'has data! ({region})')
    except:
        print(base_date, base_time, f'has no data... ({region})')
        return df_raw
    
    # dictionaries
    category = {'REH': 'Humidity',
                'SKY': 'Cloud',
                'TMP': 'Temperature',
                'VEC': 'WindDirection',
                'WSD': 'WindSpeed'}
    col_df = ['Forecast time', 'forecast', 'Temperature', 'Humidity', 'WindSpeed', 'WindDirection', 'Cloud']
    
    # make dataframe for return
    list_fcst = df_raw['fcst'].drop_duplicates()
    df = pd.DataFrame(columns=col_df, index=range(len(list_fcst)))
    
    for i, fcst in enumerate(list_fcst):
        df_day = df_raw[df_raw['fcst'] == fcst].copy()
        base_t = pd.to_datetime(f'{base_date} {base_time}')
        fcst_t = pd.to_datetime(fcst)
        df.iloc[i]['Forecast time'] = base_t
        df.iloc[i]['forecast'] = int((pd.to_datetime(fcst_t) - pd.to_datetime(base_t)) / pd.Timedelta(hours=1))
        
        for j in range(len(df_day)):
            cat = df_day.iloc[j]['category']
            if cat in category.keys():
                df.iloc[i][category[cat]] = df_day.iloc[j]['fcstValue']
    
    df['Forecast time'] = df['Forecast time'].astype(str)
#     df['forecast'] = df['forecast'].apply(lambda h: f'{h:0>2}00')
    
    
    
    return df


In [130]:
df_ulsan = pd.DataFrame()
df_dangjin = pd.DataFrame()

now = pd.Timestamp.now()
list_date = pd.date_range(end=now, periods=24, freq='H')
for date in list_date:
    base_date = str(date.date())
    base_date = base_date.replace('-', '')
    base_hour = f'{date.hour:0>2}00'
    # print(base_date, base_hour)
    
    new_df_ulsan = get_data(base_date, base_hour, 'ulsan', key, n_page=10)
    new_df_dangjin = get_data(base_date, base_hour, 'dangjin', key, n_page=10)
    
    df_ulsan = pd.concat([df_ulsan, new_df_ulsan])
    df_dangjin = pd.concat([df_dangjin, new_df_dangjin])


20210701 1500 has no data...
20210701 1500 has no data...
20210701 1600 has no data...
20210701 1600 has no data...
20210701 1700 has data!
20210701 1700 has data!
20210701 1800 has no data...
20210701 1800 has no data...
20210701 1900 has no data...
20210701 1900 has no data...
20210701 2000 has data!
20210701 2000 has data!
20210701 2100 has no data...
20210701 2100 has no data...
20210701 2200 has no data...
20210701 2200 has no data...
20210701 2300 has data!
20210701 2300 has data!
20210702 0000 has no data...
20210702 0000 has no data...
20210702 0100 has no data...
20210702 0100 has no data...
20210702 0200 has data!
20210702 0200 has data!
20210702 0300 has no data...
20210702 0300 has no data...
20210702 0400 has no data...
20210702 0400 has no data...
20210702 0500 has data!
20210702 0500 has data!
20210702 0600 has no data...
20210702 0600 has no data...
20210702 0700 has no data...
20210702 0700 has no data...
20210702 0800 has data!
20210702 0800 has data!
20210702 0900 ha

In [138]:
df_ulsan.iloc[10]

Forecast time    2021-07-01 17:00:00
forecast                          11
Temperature                       22
Humidity                          90
WindSpeed                        2.9
WindDirection                    360
Cloud                              1
Name: 10, dtype: object

In [133]:
df_dangjin.head(24)

Unnamed: 0,Forecast time,forecast,Temperature,Humidity,WindSpeed,WindDirection,Cloud
0,2021-07-01 17:00:00,1,28,60,1.4,295,1
1,2021-07-01 17:00:00,2,27,70,0.3,315,1
2,2021-07-01 17:00:00,3,25,70,0.2,333,3
3,2021-07-01 17:00:00,4,24,70,0.6,18,3
4,2021-07-01 17:00:00,5,23,80,1.5,23,1
5,2021-07-01 17:00:00,6,22,80,1.2,42,1
6,2021-07-01 17:00:00,7,22,85,1.4,39,1
7,2021-07-01 17:00:00,8,22,85,1.8,63,1
8,2021-07-01 17:00:00,9,21,85,1.7,66,1
9,2021-07-01 17:00:00,10,21,85,1.9,62,1


In [152]:
info = now.strftime('%Y%m%d_%H%M')
df_ulsan.to_csv(f'./fcst_api_data/fcst_ulsan_{info}.csv', index=None)
df_dangjin.to_csv(f'./fcst_api_data/fcst_dangjin_{info}.csv', index=None)