# 예보 데이터 불러오기
[API 출처 : 공공데이터포털, 기상예보 2.0버전](https://www.data.go.kr/data/15084084/openapi.do)

In [1]:
! pip install xmltodict



In [9]:
import pandas as pd
from urllib.parse import urlencode, quote_plus
from urllib.request import urlopen
import json
import xmltodict
import os

In [10]:
# API
# key = input('encoded api key를 입력해주세요 > ')
key = 'your_key'

In [11]:
def get_api_data(base_date, base_time, region, key, n_row, n_page):
    dict_region = {'dangjin': (53, 144), 'ulsan': (102, 83)}
    x, y = dict_region[region]
    
    url_base = 'http://apis.data.go.kr/1360000/VilageFcstInfoService_2.0/getVilageFcst'
    queryParams = urlencode({
        quote_plus('numOfRows'): n_row,      # 불러오고 싶은만큼 가져오기
        quote_plus('pageNo'): n_page,
        quote_plus('dataType'): 'XML',
        quote_plus('base_date'): base_date,  # 예보 발표 날짜
        quote_plus('base_time'): base_time,  # 예보 발표 시간
        quote_plus('nx'): str(x),
        quote_plus('ny'): str(y),
    })

    url = f'{url_base}?serviceKey={key}&{queryParams}'
    response = urlopen(url)
    results = response.read().decode('utf-8')
    results_json = xmltodict.parse(results)

    data = json.loads(json.dumps(results_json))
    
    try:
        logs = data['response']['body']['items']['item']
        df = pd.DataFrame(logs)
    except:
        df = pd.DataFrame()
    
    return df

def get_data(base_date, base_time, region, key, n_row=100, n_page=1):
    
    # get raw data
    df_raw = pd.DataFrame()
    for n in range(1, n_page+1):
        new_df_raw = get_api_data(base_date, base_time, region, key, n_row, n)
        df_raw = pd.concat([df_raw, new_df_raw])
#     print(df_raw)
    
    try:
        df_raw['fcst'] = df_raw['fcstDate'] + ' ' + df_raw['fcstTime']
        print(base_date, base_time, f'has data! ({region})')
    except:
        print(base_date, base_time, f'has no data... ({region})')
        return df_raw
    
    # dictionaries
    category = {'REH': 'Humidity',
                'SKY': 'Cloud',
                'TMP': 'Temperature',
                'VEC': 'WindDirection',
                'WSD': 'WindSpeed'}
    col_df = ['Forecast time', 'forecast', 'Temperature', 'Humidity', 'WindSpeed', 'WindDirection', 'Cloud']
    
    # make dataframe for return
    list_fcst = df_raw['fcst'].drop_duplicates()
    df = pd.DataFrame(columns=col_df, index=range(len(list_fcst)))
    
    for i, fcst in enumerate(list_fcst):
        df_day = df_raw[df_raw['fcst'] == fcst].copy()
        base_t = pd.to_datetime(f'{base_date} {base_time}')
        fcst_t = pd.to_datetime(fcst)
        df.iloc[i]['Forecast time'] = base_t
        df.iloc[i]['forecast'] = int((pd.to_datetime(fcst_t) - pd.to_datetime(base_t)) / pd.Timedelta(hours=1))
        
        for j in range(len(df_day)):
            cat = df_day.iloc[j]['category']
            if cat in category.keys():
                df.iloc[i][category[cat]] = df_day.iloc[j]['fcstValue']
    
    df['Forecast time'] = df['Forecast time'].astype(str)
#     df['forecast'] = df['forecast'].apply(lambda h: f'{h:0>2}00')
    
    
    
    return df


In [13]:
df_ulsan = pd.DataFrame()
df_dangjin = pd.DataFrame()

now = pd.Timestamp.now()
list_date = pd.date_range(end=now, periods=24, freq='H')
for date in list_date:
    base_date = str(date.date())
    base_date = base_date.replace('-', '')
    base_hour = f'{date.hour:0>2}00'
    # print(base_date, base_hour)
    
    new_df_ulsan = get_data(base_date, base_hour, 'ulsan', key, n_page=10)
    new_df_dangjin = get_data(base_date, base_hour, 'dangjin', key, n_page=10)
    
    df_ulsan = pd.concat([df_ulsan, new_df_ulsan])
    df_dangjin = pd.concat([df_dangjin, new_df_dangjin])


20210702 1500 has no data... (ulsan)
20210702 1500 has no data... (dangjin)
20210702 1600 has no data... (ulsan)
20210702 1600 has no data... (dangjin)
20210702 1700 has data! (ulsan)
20210702 1700 has data! (dangjin)
20210702 1800 has no data... (ulsan)
20210702 1800 has no data... (dangjin)
20210702 1900 has no data... (ulsan)
20210702 1900 has no data... (dangjin)
20210702 2000 has data! (ulsan)
20210702 2000 has data! (dangjin)
20210702 2100 has no data... (ulsan)
20210702 2100 has no data... (dangjin)
20210702 2200 has no data... (ulsan)
20210702 2200 has no data... (dangjin)
20210702 2300 has data! (ulsan)
20210702 2300 has data! (dangjin)
20210703 0000 has no data... (ulsan)
20210703 0000 has no data... (dangjin)
20210703 0100 has no data... (ulsan)
20210703 0100 has no data... (dangjin)
20210703 0200 has data! (ulsan)
20210703 0200 has data! (dangjin)
20210703 0300 has no data... (ulsan)
20210703 0300 has no data... (dangjin)
20210703 0400 has no data... (ulsan)
20210703 0400 h

In [None]:
df_ulsan.iloc[10]

In [None]:
df_dangjin.head(24)

In [14]:
info = now.strftime('%Y%m%d_%H%M')
df_ulsan.to_csv(f'./fcst_api_data/fcst_ulsan_{info}.csv', index=None)
df_dangjin.to_csv(f'./fcst_api_data/fcst_dangjin_{info}.csv', index=None)

In [12]:
ex = get_api_data('20210703', '0200', 'ulsan', key, 100, 1)
ex

Unnamed: 0,baseDate,baseTime,category,fcstDate,fcstTime,fcstValue,nx,ny
0,20210703,0200,TMP,20210703,0300,23,102,83
1,20210703,0200,UUU,20210703,0300,-2.2,102,83
2,20210703,0200,VVV,20210703,0300,-2.9,102,83
3,20210703,0200,VEC,20210703,0300,38,102,83
4,20210703,0200,WSD,20210703,0300,3.7,102,83
...,...,...,...,...,...,...,...,...
95,20210703,0200,PTY,20210703,1100,0,102,83
96,20210703,0200,POP,20210703,1100,30,102,83
97,20210703,0200,PCP,20210703,1100,1mm 미만,102,83
98,20210703,0200,REH,20210703,1100,80,102,83
