# 실시간 기상 데이터 수집

패키지 불러오기

In [2]:
import pandas as pd
import requests
import json
import datetime

## 1. 대구의 읍면동과 좌표 데이터 불러오기

In [7]:
# 엑셀파일 읽기
df_daegu = pd.read_excel('./dataset/daegu_regionXY.xlsx')

# 읍면동보다 넓은 범위의 데이터 삭제 (ex) 광역시, 시/군/구 등)
df_daegu.dropna(inplace=True)

# 대구에 해당하는 데이터만 남기기
drop_list = df_daegu[df_daegu['1단계'] != '대구광역시'].index
df_daegu.drop(drop_list, inplace=True)

# 필요없는 컬럼 지우기
df_daegu.drop(['1단계','2단계','경도(시)','경도(분)','경도(초)','위도(시)','위도(분)','위도(초)','경도(초/100)','위도(초/100)'], axis=1, inplace=True)

df_daegu

Unnamed: 0,3단계,격자 X,격자 Y
676,동인동,89,91
677,삼덕동,89,90
678,성내1동,89,90
679,성내2동,89,90
680,성내3동,89,90
...,...,...,...
820,옥포읍,87,89
821,현풍읍,86,87
822,가창면,89,89
823,하빈면,86,91


## 2. 이전 데이터에 대한 데이터 프레임 생성

한 자리 숫자를 두자리의 문자열 형태로 변경하는 함수

In [7]:
def str_2words(word):
    word = str(word)
    if len(word)==1:
        word='0'+word
    return word

데이터 프레임의 틀 생성

In [7]:
df = pd.DataFrame({
    'year':[],
    'month':[],
    'day':[],
    'hour':[],
    'temp':[],
    'rainfall':[],
    'humidiry':[],
    'wind_speed':[],
    'wind_direction':[],
    'region':[]
})

과거 데이터 수집

In [8]:
for i in range(14):     # 코드 작성시간 기준으로 13시까지 데이터 존재
    # api에 필요한 값
    #   ServiceKey  : 인증키
    #   pageNo      : 페이지 번호
    #   numOfRows   : 한 페이지 결과 수
    #   dataType	: 요청 자료 형식 (JSON,XML)
    #   base_date   : 데이터 발표 날짜
    #   base_time   : 데이터 발표 시각
    #   nx          : 예보 지점의 x좌표
    #   ny	        : 예보 지점의 y좌표
    base_url = 'http://apis.data.go.kr/1360000/VilageFcstInfoService_2.0/getUltraSrtNcst?serviceKey='
    serviceKey = 'U4YvEM6l6J58M949SHQpfEwFm7ZNve4WNkKnXgdeb2mkxFMws8amIjVNvAVyhg5m%2Bht%2BsccUyMv7TnybLn5fzg%3D%3D'
    base_year = 2022
    base_month = 6
    base_day = 10
    base_time = i
    headers = {'Content-Type': 'application/json', 'charset': 'UTF-8', 'Accept': '*/*'}

    # 지역별로 반복
    for index, rows in df_daegu.iterrows():
        region = rows[0]
        nx = rows[1]
        ny = rows[2]

        # api를 JSON형식으로 읽음
        res = requests.get(base_url+serviceKey+'&pageNo=1&numOfRows=1000&dataType=JSON&base_date='+str(base_year)+str_2words(base_month)+str_2words(base_day)+'&base_time='+str_2words(base_time)+'00&nx='+str(nx)+'&ny='+str(ny), headers=headers)
        jsonObject = json.loads(res.text).get("response").get("body").get("items").get("item")
        # JSON 파일을 Parsing
        for i in jsonObject:
            if i.get('category') == 'REH':
                hum = i.get('obsrValue')
            elif i.get('category') == 'RN1':
                rain = i.get('obsrValue')
            elif i.get('category') == 'T1H':
                temp = i.get('obsrValue')
            elif i.get('category') == 'VEC':
                wind_direction = i.get('obsrValue')
            elif i.get('category') == 'WSD':
                wind_speed = i.get('obsrValue')
        
        # 데이터를 DF에 저장
        df.loc[len(df)] = [base_year,base_month,base_day,base_time,temp,rain,hum,wind_speed,wind_direction,region]

In [10]:
df

Unnamed: 0,year,month,day,hour,temp,rainfall,humidiry,wind_speed,wind_direction,region
0,2022,6,10,0,17.9,0,77,2.2,125,동인동
1,2022,6,10,0,17.9,0,77,2.2,125,삼덕동
2,2022,6,10,0,17.9,0,77,2.2,125,성내1동
3,2022,6,10,0,17.9,0,77,2.2,125,성내2동
4,2022,6,10,0,17.9,0,77,2.2,125,성내3동
...,...,...,...,...,...,...,...,...,...,...
1983,2022,6,10,13,28,0,42,3.4,254,옥포읍
1984,2022,6,10,13,27.2,0,43,3.1,189,현풍읍
1985,2022,6,10,13,28.6,0,37,1.4,300,가창면
1986,2022,6,10,13,27.5,0,42,2.9,219,하빈면


df를 local에 저장

In [12]:
df_ = df
df_.to_csv('./output/Deagu_data.csv', index=False)

## 3. 실시간 수집하는 방법

이전 데이터 불러오기

In [3]:
df = pd.read_csv('./output/Daegu_data.csv')

데이터 수집 코드

In [19]:
# 현재 시각
now = datetime.datetime.now()

# 현재 시각을 기준으로 한시간 전 데이터가 있는지 확인. 없으면 추가
if datetime.datetime(year=df.iloc[len(df)-1,0], month=df.iloc[len(df)-1,1], day=df.iloc[len(df)-1,2], hour=df.iloc[len(df)-1,3]) <= now - datetime.timedelta(hours=2):
    # api를 위한 항목
    base_url = 'http://apis.data.go.kr/1360000/VilageFcstInfoService_2.0/getUltraSrtNcst?serviceKey='
    serviceKey = 'U4YvEM6l6J58M949SHQpfEwFm7ZNve4WNkKnXgdeb2mkxFMws8amIjVNvAVyhg5m%2Bht%2BsccUyMv7TnybLn5fzg%3D%3D'
    base_year = now.year
    base_month = now.month
    base_day = now.day
    base_time = now.hour
    if base_time == 0:
        base_day = base_day-1
        base_time = 23
    else:
        base_time = base_time-1
    headers = {'Content-Type': 'application/json', 'charset': 'UTF-8', 'Accept': '*/*'}

    for index, rows in df_daegu.iterrows():
        region = rows[0]
        nx = rows[1]
        ny = rows[2]

        # API를 JSON형식으로 읽기
        res = requests.get(base_url+serviceKey+'&pageNo=1&numOfRows=1000&dataType=JSON&base_date='+str(base_year)+str_2words(base_month)+str_2words(base_day)+'&base_time='+str_2words(base_time)+'00&nx='+str(nx)+'&ny='+str(ny), headers=headers)
        jsonObject = json.loads(res.text).get("response").get("body").get("items").get("item")
        
        # Parsing
        for i in jsonObject:
            if i.get('category') == 'REH':
                hum = i.get('obsrValue')
            elif i.get('category') == 'RN1':
                rain = i.get('obsrValue')
            elif i.get('category') == 'T1H':
                temp = i.get('obsrValue')
            elif i.get('category') == 'VEC':
                wind_direction = i.get('obsrValue')
            elif i.get('category') == 'WSD':
                wind_speed = i.get('obsrValue')
        
        # save at DF
        df.loc[len(df)] = [base_year,base_month,base_day,base_time,temp,rain,hum,wind_speed,wind_direction,region]
    
    # save at local
    df.to_csv('./output/Daegu_data.csv', index=False)

    # message
    print('추가 완료')
else:
    # message
    print('추가할 필요 없음')


추가할 필요 없음


In [20]:
df

Unnamed: 0,year,month,day,hour,temp,rainfall,humidiry,wind_speed,wind_direction,region
0,2022,6,10,0,17.9,0,77,2.2,125,동인동
1,2022,6,10,0,17.9,0,77,2.2,125,삼덕동
2,2022,6,10,0,17.9,0,77,2.2,125,성내1동
3,2022,6,10,0,17.9,0,77,2.2,125,성내2동
4,2022,6,10,0,17.9,0,77,2.2,125,성내3동
...,...,...,...,...,...,...,...,...,...,...
2125,2022,6,10,14,27.7,0,40,3.4,217,옥포읍
2126,2022,6,10,14,27.1,0,43,2.6,203,현풍읍
2127,2022,6,10,14,28.8,0,39,1.6,158,가창면
2128,2022,6,10,14,27.9,0,41,3.3,200,하빈면


## 4. 임의의 시간 추가

이전 데이터 불러오기

In [3]:
df = pd.read_csv('./output/Daegu_data.csv')

In [4]:
def str_2words(word):
    word = str(word)
    if len(word)==1:
        word='0'+word
    return word

In [5]:
df

Unnamed: 0,year,month,day,hour,temp,rainfall,humidiry,wind_speed,wind_direction,region
0,2022,6,10,0,17.9,0,77,2.2,125,동인동
1,2022,6,10,0,17.9,0,77,2.2,125,삼덕동
2,2022,6,10,0,17.9,0,77,2.2,125,성내1동
3,2022,6,10,0,17.9,0,77,2.2,125,성내2동
4,2022,6,10,0,17.9,0,77,2.2,125,성내3동
...,...,...,...,...,...,...,...,...,...,...
2693,2022,6,10,18,24.8,0,56,1.8,210,옥포읍
2694,2022,6,10,18,24.1,0,61,1.6,192,현풍읍
2695,2022,6,10,18,25.8,0,47,0.7,149,가창면
2696,2022,6,10,18,23.9,0,64,1.0,245,하빈면


데이터 수집 코드

In [18]:
# api를 위한 항목
base_url = 'http://apis.data.go.kr/1360000/VilageFcstInfoService_2.0/getUltraSrtNcst?serviceKey='
serviceKey = 'U4YvEM6l6J58M949SHQpfEwFm7ZNve4WNkKnXgdeb2mkxFMws8amIjVNvAVyhg5m%2Bht%2BsccUyMv7TnybLn5fzg%3D%3D'
base_year = 2022
base_month = 6
base_day = 11
headers = {'Content-Type': 'application/json', 'charset': 'UTF-8', 'Accept': '*/*'}
for base_time in range(12): 
    for index, rows in df_daegu.iterrows():
        region = rows[0]
        nx = rows[1]
        ny = rows[2]

        # API를 JSON형식으로 읽기
        res = requests.get(base_url+serviceKey+'&pageNo=1&numOfRows=1000&dataType=JSON&base_date='+str(base_year)+str_2words(base_month)+str_2words(base_day)+'&base_time='+str_2words(base_time)+'00&nx='+str(nx)+'&ny='+str(ny), headers=headers)
        jsonObject = json.loads(res.text).get("response").get("body").get("items").get("item")
        
        # Parsing
        for i in jsonObject:
            if i.get('category') == 'REH':
                hum = i.get('obsrValue')
            elif i.get('category') == 'RN1':
                rain = i.get('obsrValue')
            elif i.get('category') == 'T1H':
                temp = i.get('obsrValue')
            elif i.get('category') == 'VEC':
                wind_direction = i.get('obsrValue')
            elif i.get('category') == 'WSD':
                wind_speed = i.get('obsrValue')
        
        # save at DF
        df.loc[len(df)] = [base_year,base_month,base_day,base_time,temp,rain,hum,wind_speed,wind_direction,region]

# save at local
df.to_csv('./output/Daegu_data.csv', index=False)

# message
print('추가 완료')


추가 완료


In [22]:
df

Unnamed: 0,year,month,day,hour,temp,rainfall,humidiry,wind_speed,wind_direction,region
0,2022,6,10,0,17.9,0,77,2.2,125,동인동
1,2022,6,10,0,17.9,0,77,2.2,125,삼덕동
2,2022,6,10,0,17.9,0,77,2.2,125,성내1동
3,2022,6,10,0,17.9,0,77,2.2,125,성내2동
4,2022,6,10,0,17.9,0,77,2.2,125,성내3동
...,...,...,...,...,...,...,...,...,...,...
5107,2022,6,11,11,24.9,0,59,5.1,65,옥포읍
5108,2022,6,11,11,27,0,54,2.1,348,현풍읍
5109,2022,6,11,11,26.2,0,51,1.9,87,가창면
5110,2022,6,11,11,26.3,0,51,3.3,108,하빈면
