# 예보 데이터 불러오기
[한국동서발전 태양광 발전량 현황 인증키 확인](https://data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15057210)
[인증키 주소(로그인필요..)](https://data.go.kr/iim/api/selectAPIAcountView.do)

In [1]:
! pip install xmltodict



In [None]:
import pandas as pd
from urllib.parse import urlencode, quote_plus
from urllib.request import urlopen
import json
import xmltodict

In [1]:
# API
ServiceKey = input('encoded api key를 입력해주세요 > ')

encoded api key를 입력해주세요 > ㅁ


In [4]:
def get_obs_ulsan(start, end, key):
    '''
    예시
    start = pd.Timestamp('2018-03-01 00:00:00')
    end = pd.Timestamp('2018-03-31 00:00:00')
    key = ServiceKey
    
    위와 같이 넣으면 2018년 3월 동안
    매 시간 울산의 obs data를 가져온다.
    '''
    startDt = f'{start.year}{start.month:0>2d}{start.day:0>2d}'
    startHh = '00'
    endDt = f'{end.year}{end.month:0>2d}{end.day:0>2d}'
    endHh = '23'
    
    url_base = 'http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList'
    
    date_start = pd.to_datetime(start)
    date_end = pd.to_datetime(end)
    num_hours = int((date_end - date_start) / pd.Timedelta(hours=1)) + 1
    
    queryParams = urlencode({
        quote_plus('pageNo'): 1,
        quote_plus('numOfRows'): num_hours,
        quote_plus('dataCd'): 'ASOS',
        quote_plus('dateCd'): 'HR',
        quote_plus('stnIds'): 152,               # ulsan
        quote_plus('startDt'): startDt,
        quote_plus('startHh'): startHh,
        quote_plus('endDt'): endDt,
        quote_plus('endHh'): endHh,
    })
    
    url = f'{url_base}?serviceKey={key}&{queryParams}'
    response = urlopen(url)
    results = response.read().decode('utf-8')
    results_json = xmltodict.parse(results)

    data = json.loads(json.dumps(results_json))

    if 'ERROR' in data['response']['header']['resultMsg']:
        print('Fail.')
    else:
        print('Success!')
        
    logs = data['response']['body']['items']['item']
    df = pd.DataFrame(logs)
    
    col_need_origin = ['tm', 'stnId', 'stnNm', 'ta', 'ws', 'wd', 'hm', 'dc10Tca']
    col_need = [
        'tm', 'stnId', 'stnNm', 'ta', 'ws', 'wd', 'hm', 'dc10Tca',
        'rn', 'dsnw', 'pv', 'ps', 'ss', 'dc10LmcsCa',
    ]
    dict_col = {
        'stnId': '지점',
        'stnNm': '지점명',
        'tm': '일시',
        'ta': '기온(°C)',
        'ws': '풍속(m/s)',
        'wd': '풍향(16방위)',
        'hm': '습도(%)',
        'dc10Tca': '전운량(10분위)',
        'rn': '강수량(mm)',        # 추가
        'dsnw': '적설(cm)',
        'pv': '증기압(hPa)',
        'ps': '해면기압(hPa)',
        'ss': '일조(hr)',
        'dc10LmcsCa': '중하층운량(10분위)'
    }

    weather = df[col_need].copy()
    weather = weather.rename(columns=dict_col)
    
    return weather


def get_obs_dangjin(start, end, key):
    '''
    예시
    start = pd.Timestamp('2018-03-01 00:00:00')
    end = pd.Timestamp('2018-03-31 00:00:00')
    key = ServiceKey
    
    위와 같이 넣으면 2018년 3월 동안
    매 시간 당진(서산)의 obs data를 가져온다.
    '''
    startDt = f'{start.year}{start.month:0>2d}{start.day:0>2d}'
    startHh = '00'
    endDt = f'{end.year}{end.month:0>2d}{end.day:0>2d}'
    endHh = '23'
    
    url_base = 'http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList'
    
    date_start = pd.to_datetime(start)
    date_end = pd.to_datetime(end)
    num_hours = int((date_end - date_start) / pd.Timedelta(hours=1)) + 1
    
    queryParams = urlencode({
        quote_plus('pageNo'): 1,
        quote_plus('numOfRows'): num_hours,
        quote_plus('dataCd'): 'ASOS',
        quote_plus('dateCd'): 'HR',
        quote_plus('stnIds'): 129,               # 서산 (당진 옆)
        quote_plus('startDt'): startDt,
        quote_plus('startHh'): startHh,
        quote_plus('endDt'): endDt,
        quote_plus('endHh'): endHh,
    })
    
    url = f'{url_base}?serviceKey={key}&{queryParams}'
    response = urlopen(url)
    results = response.read().decode('utf-8')
    results_json = xmltodict.parse(results)

    data = json.loads(json.dumps(results_json))

    if 'ERROR' in data['response']['header']['resultMsg']:
        print('Fail.')
    else:
        print('Success!')
        
    logs = data['response']['body']['items']['item']
    df = pd.DataFrame(logs)
    
    col_need_origin = ['tm', 'stnId', 'stnNm', 'ta', 'ws', 'wd', 'hm', 'dc10Tca']
    col_need = [
        'tm', 'stnId', 'stnNm', 'ta', 'ws', 'wd', 'hm', 'dc10Tca',
        'rn', 'dsnw', 'pv', 'ps', 'ss', 'dc10LmcsCa',
    ]
    dict_col = {
        'stnId': '지점',
        'stnNm': '지점명',
        'tm': '일시',
        'ta': '기온(°C)',
        'ws': '풍속(m/s)',
        'wd': '풍향(16방위)',
        'hm': '습도(%)',
        'dc10Tca': '전운량(10분위)',
        'rn': '강수량(mm)',        # 추가
        'dsnw': '적설(cm)',
        'pv': '증기압(hPa)',
        'ps': '해면기압(hPa)',
        'ss': '일조(hr)',
        'dc10LmcsCa': '중하층운량(10분위)'
    }

    weather = df[col_need].copy()
    weather = weather.rename(columns=dict_col)
    
    return weather

In [5]:
pd.date_range('20150101', '20210529', freq='M')[0]

Timestamp('2015-01-31 00:00:00', freq='M')

In [6]:
# ulsan data 가져오기
obs_ulsan = pd.DataFrame()

date_start = '20150101'
date_end = '20210601'

month_start = pd.date_range(date_start, date_end, freq='MS')
month_end = pd.date_range(date_start, date_end, freq='M')

obs_ulsan = pd.DataFrame()
for month in range(len(month_end)):
    tmp = get_obs_ulsan(month_start[month], month_end[month], ServiceKey)
    obs_ulsan = pd.concat([obs_ulsan, tmp])
    print(f'trial {month} : {month_start[month]} ~ {month_end[month]}')

print('Done!')


Success!
trial 0 : 2015-01-01 00:00:00 ~ 2015-01-31 00:00:00
Success!
trial 1 : 2015-02-01 00:00:00 ~ 2015-02-28 00:00:00
Success!
trial 2 : 2015-03-01 00:00:00 ~ 2015-03-31 00:00:00
Success!
trial 3 : 2015-04-01 00:00:00 ~ 2015-04-30 00:00:00
Success!
trial 4 : 2015-05-01 00:00:00 ~ 2015-05-31 00:00:00
Success!
trial 5 : 2015-06-01 00:00:00 ~ 2015-06-30 00:00:00
Success!
trial 6 : 2015-07-01 00:00:00 ~ 2015-07-31 00:00:00
Success!
trial 7 : 2015-08-01 00:00:00 ~ 2015-08-31 00:00:00
Success!
trial 8 : 2015-09-01 00:00:00 ~ 2015-09-30 00:00:00
Success!
trial 9 : 2015-10-01 00:00:00 ~ 2015-10-31 00:00:00
Success!
trial 10 : 2015-11-01 00:00:00 ~ 2015-11-30 00:00:00
Success!
trial 11 : 2015-12-01 00:00:00 ~ 2015-12-31 00:00:00
Success!
trial 12 : 2016-01-01 00:00:00 ~ 2016-01-31 00:00:00
Success!
trial 13 : 2016-02-01 00:00:00 ~ 2016-02-29 00:00:00
Success!
trial 14 : 2016-03-01 00:00:00 ~ 2016-03-31 00:00:00
Success!
trial 15 : 2016-04-01 00:00:00 ~ 2016-04-30 00:00:00
Success!
trial 16 

In [7]:
obs_ulsan.to_csv('new_obs_ulsan.csv', index = False)

In [None]:
# dangjin data 가져오기
obs_dangjin = pd.DataFrame()

date_start = '20150101'
date_end = '20210601'

month_start = pd.date_range(date_start, date_end, freq='MS')
month_end = pd.date_range(date_start, date_end, freq='M')

obs_dangjin = pd.DataFrame()
for month in range(len(month_end)):
    tmp = get_obs_dangjin(month_start[month], month_end[month], ServiceKey)
    obs_dangjin = pd.concat([obs_dangjin, tmp])
    print(f'trial {month} : {month_start[month]} ~ {month_end[month]}')

print('Done!')


Success!
trial 0 : 2015-01-01 00:00:00 ~ 2015-01-31 00:00:00
Success!
trial 1 : 2015-02-01 00:00:00 ~ 2015-02-28 00:00:00
Success!
trial 2 : 2015-03-01 00:00:00 ~ 2015-03-31 00:00:00
Success!
trial 3 : 2015-04-01 00:00:00 ~ 2015-04-30 00:00:00
Success!
trial 4 : 2015-05-01 00:00:00 ~ 2015-05-31 00:00:00
Success!
trial 5 : 2015-06-01 00:00:00 ~ 2015-06-30 00:00:00
Success!
trial 6 : 2015-07-01 00:00:00 ~ 2015-07-31 00:00:00
Success!
trial 7 : 2015-08-01 00:00:00 ~ 2015-08-31 00:00:00
Success!
trial 8 : 2015-09-01 00:00:00 ~ 2015-09-30 00:00:00


In [None]:
# 저장하기
obs_dangjin.to_csv('new_obs_dangjin.csv', index = False)

In [None]:
obs_dangjin

# 이하 무시해도 되는 코드

In [None]:
"""
start = month_start[month]
end = month_end[month]
key = ServiceKey

startDt = f'{start.year}{start.month:0>2d}{start.day:0>2d}'
startHh = '00'
endDt = f'{end.year}{end.month:0>2d}{end.day:0>2d}'
endHh = '23'

url_base = 'http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList'

date_start = pd.to_datetime(start)
date_end = pd.to_datetime(end)
num_hours = int((date_end - date_start) / pd.Timedelta(hours=1)) + 1

queryParams = urlencode({
    quote_plus('pageNo'): 1,
    quote_plus('numOfRows'): num_hours,
    quote_plus('dataCd'): 'ASOS',
    quote_plus('dateCd'): 'HR',
    quote_plus('stnIds'): 152,               # ulsan
    quote_plus('startDt'): startDt,
    quote_plus('startHh'): startHh,
    quote_plus('endDt'): endDt,
    quote_plus('endHh'): endHh,
})

url = f'{url_base}?serviceKey={key}&{queryParams}'
response = urlopen(url)
results = response.read().decode('utf-8')
results_json = xmltodict.parse(results)

data = json.loads(json.dumps(results_json))

if 'ERROR' in data['response']['header']['resultMsg']:
    print('Fail.')
else:
    print('Success!')
"""

In [None]:
"""
logs = data['response']['body']['items']['item']
df = pd.DataFrame(logs)

col_need_origin = ['tm', 'stnId', 'stnNm', 'ta', 'ws', 'wd', 'hm', 'dc10Tca']
col_need = [
    'tm', 'stnId', 'stnNm', 'ta', 'ws', 'wd', 'hm', 'dc10Tca',
    'rn', 'pv', 'ps', 'ss', 'dc10LmcsCa',
]
dict_col = {
    'stnId': '지점',
    'stnNm': '지점명',
    'tm': '일시',
    'ta': '기온(°C)',
    'ws': '풍속(m/s)',
    'wd': '풍향(16방위)',
    'hm': '습도(%)',
    'dc10Tca': '전운량(10분위)',
    'rn': '강수량(mm)',        # 추가
    'pv': '증기압(hPa)',
    'ps': '해면기압(hPa)',
    'ss': '일조(hr)',
    'dc10LmcsCa': '중하층운량(10분위)'
}

weather = df[col_need].copy()
weather = weather.rename(columns=dict_col)
weather
"""

# -------------------------

In [None]:
"""
obs_dangjin = pd.DataFrame()
tmp1 = get_obs_dangjin('20180301', '20180331', ServiceKey)
tmp2 = get_obs_dangjin('20180401', '20180430', ServiceKey)

obs_dangjin = pd.concat([obs_ulsan, tmp1])
obs_dangjin = pd.concat([obs_ulsan, tmp2])
obs_dangjin
"""

In [None]:
# tmp

In [None]:
"""
stnlds_ulsan = 152    # 울산
stnlds_dangjin = 129  # 서산(당진 옆)

url_base = 'http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList'
key = ServiceKey
queryParams = urlencode({
    quote_plus('pageNo'): 1,
    quote_plus('numOfRows'): 28 * 24,
    quote_plus('dataCd'): 'ASOS',
    quote_plus('dateCd'): 'HR',
    quote_plus('stnIds'): stnlds_ulsan,
    quote_plus('startDt'): '20200201', # 예보 발표 날짜
    quote_plus('startHh'): '00',       # 예보 발표 시간
    quote_plus('endDt'): '20200228',
    quote_plus('endHh'): '23',
})

url = f'{url_base}?serviceKey={key}&{queryParams}'
response = urlopen(url)
results = response.read().decode('utf-8')
results_json = xmltodict.parse(results)

data = json.loads(json.dumps(results_json))

if 'ERROR' in data['response']['header']['resultMsg']:
    print('Fail.')
else:
    print('Success!')
    

"""


In [None]:
# data 대강 확인하기
# data

In [None]:
# logs = data['response']['body']['items']['item']
# df = pd.DataFrame(logs)
# df

In [None]:
# df.columns