In [1]:
observer_str = """- 서울(108)
- 춘천(101)
- 북강릉(104)
- 천안(232)
- 충주(127)
- 울진(130)
- 전주(146)
- 추풍령(135)
- 포항(138)
- 광주(156)
- 합천(285)
- 부산(159)
- 남해(295)
- 제주(184)"""
observer_dict = {}
for elem in observer_str.split("-"):
    splited_elem = elem.replace(')', ' ').strip().split('(')
    if len(splited_elem) == 2:
        observer_dict[splited_elem[0]] = int(splited_elem[1])
observer_dict

{'서울': 108,
 '춘천': 101,
 '북강릉': 104,
 '천안': 232,
 '충주': 127,
 '울진': 130,
 '전주': 146,
 '추풍령': 135,
 '포항': 138,
 '광주': 156,
 '합천': 285,
 '부산': 159,
 '남해': 295,
 '제주': 184}

In [2]:
import requests
from datetime import datetime, timedelta
import time
from tqdm import tqdm
import numpy as np
def str_to_float_for_weather_data(x):
    if x == '':
        return 0.0
    else:
        return np.float32(x)
def str_to_int_for_weather_data(x):
    if x == '':
        return 0
    else:
        return np.int32(x)

# API 키 불러오기
with open("./weather_api.txt", 'r') as f:
    api_key = f.readline().strip()

observer_results_dict = {}
start_date = datetime(2024,1,1).date()
today = datetime.now().date()
tq = tqdm(observer_dict.items())
cnt = 1
for observer_key, observer_code in tq:
    now_date = start_date
    observer_results_dict[observer_code] = []
    while now_date < today:
        date_str = now_date.strftime("%Y%m%d")
        try:
            query_str = f"http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList?serviceKey={api_key}&numOfRows=24&pageNo=1&dataType=JSON&dataCd=ASOS&dateCd=HR&stnIds={observer_code}&endDt={date_str}&endHh=23&startHh=00&startDt={date_str}"
            res = requests.get(query_str)
            result_items = res.json()['response']['body']['items']['item']
            
            for item in result_items:
                tm = datetime.strptime(item['tm'], '%Y-%m-%d %H:%M')
                rn = str_to_float_for_weather_data(item['rn']) # 강수량
                dsnw = str_to_float_for_weather_data(item['dsnw']) # 적설
                dc10Tca = str_to_int_for_weather_data(item['dc10Tca']) # 전운량 -> 구름의 양 10분위..
                dmstMtphNo = item['dmstMtphNo'] # 기상 코드 -> 싸락눈(11) + 연무(40) -> 1140으로 붙여서 나오는 듯..?
                
                processed_weather_data = [tm, observer_code, observer_key, rn, dsnw, dc10Tca, dmstMtphNo]
                observer_results_dict[observer_code].append(processed_weather_data)
            now_date += timedelta(days = 1)
        except Exception as e:
            time.sleep(3.5)
        time.sleep(1)
        tq.set_description_str(f"{cnt}/{len(tq)} {observer_key} {now_date}/{today}")
    cnt += 1


14/14 제주 2024-05-14/2024-05-14: 100%|██████████| 14/14 [50:02<00:00, 214.47s/it]


In [None]:
import pandas as pd
total_result = []
for k, v in observer_results_dict.items():
    total_result += v
    
total_result = pd.DataFrame(total_result, columns = ['tm', 'id' 'location', 'rain', 'snow', 'cloud', 'weather_code'])
start_date_str, end_date_str = start_date.strftime("%Y%m%d"), today.strftime("%Y%m%d")
total_result['weather_code'] = total_result['weather_code'].map(lambda x: None if x == '' else x)
total_result.to_csv(f"./weather_data_{start_date_str}_{end_date_str}.csv", index=False)
display(total_result)

[각 관측소의 메타 데이터](https://data.kma.go.kr/tmeta/stn/selectStnList.do)로부터 관측소 위경도 좌표 추출

In [2]:
import pandas as pd
import numpy as np
weather_station_meta_df = pd.read_csv("./META_관측지점정보.csv", encoding='cp949')
weather_station_meta_df

Unnamed: 0,지점,시작일,종료일,지점명,지점주소,관리관서,위도,경도,노장해발고도(m),기압계(관측장비지상높이(m)),기온계(관측장비지상높이(m)),풍속계(관측장비지상높이(m)),강우계(관측장비지상높이(m))
0,90,1968-01-01,,속초,강원특별자치도 고성군토성면 봉포5길9 속초자동기상관측소,속초기상대(90),38.2509,128.5647,17.53,18.73,1.7,10.0,1.4
1,93,2016-10-01,,북춘천,강원특별자치도 춘천시신북읍 산천리264(장본1길 12) 춘천기상대,춘천기상대(101),37.9474,127.7544,95.78,96.78,1.5,10.0,1.4
2,95,1988-01-01,,철원,강원특별자치도 철원군갈말읍 명성로179번길 26 철원자동기상관측소,춘천기상대(101),38.1479,127.3042,155.48,156.98,1.8,13.0,1.5
3,98,1998-02-01,,동두천,경기도 동두천시방죽로 16-47동두천서비스센터,수도권기상청(119),37.9019,127.0607,115.62,116.74,1.7,10.0,1.0
4,99,2013-10-22,,파주,경기도 파주시문산읍 마정로46-29(파주기상대),수도권기상청(119),37.8859,126.7665,30.59,31.99,1.7,10.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
139,288,1973-01-01,,밀양,경상남도 밀양시점필재로 5밀양자동기상관측소,울산기상대(152),35.4915,128.7441,8.31,12.50,2.0,10.0,1.3
140,289,1972-03-30,,산청,경상남도 산청군 산청읍 꽃봉산로133번길 3산청자동기상관측소,창원기상대(155),35.4130,127.8791,138.22,138.80,1.6,10.0,0.6
141,294,1972-01-24,,거제,경상남도 거제시 장평2로2길 47거제자동기상관측소(장평동),부산지방기상청(159),34.8882,128.6046,44.83,46.70,1.7,10.0,0.6
142,295,1972-01-24,,남해,경상남도 남해군이동면 남해대로2423 남해자동기상관측소,부산지방기상청(159),34.8166,127.9264,45.71,47.01,1.8,10.0,0.8


In [3]:
weather_station_meta_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 144 entries, 0 to 143
Data columns (total 13 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   지점                144 non-null    int64  
 1   시작일               144 non-null    object 
 2   종료일               47 non-null     object 
 3   지점명               144 non-null    object 
 4   지점주소              135 non-null    object 
 5   관리관서              129 non-null    object 
 6   위도                144 non-null    float64
 7   경도                144 non-null    float64
 8   노장해발고도(m)         144 non-null    float64
 9   기압계(관측장비지상높이(m))  141 non-null    float64
 10  기온계(관측장비지상높이(m))  141 non-null    float64
 11  풍속계(관측장비지상높이(m))  142 non-null    float64
 12  강우계(관측장비지상높이(m))  141 non-null    float64
dtypes: float64(7), int64(1), object(5)
memory usage: 14.8+ KB


In [4]:
# 종료일이 없는 지점만 추출
weather_station_meta_df = weather_station_meta_df[weather_station_meta_df.iloc[:, 2].isna()].reset_index(drop = True)

# id, 관측소 이름, 위도, 경도 추출
weather_station_meta_df = weather_station_meta_df.iloc[:, [0, 3, 6, 7]]
weather_station_meta_df.columns = ['weather_station_id', 'weather_station_name', 'latitude', 'longitude']
weather_station_meta_df

Unnamed: 0,id,point_name,latitute,longitude
0,90,속초,38.2509,128.5647
1,93,북춘천,37.9474,127.7544
2,95,철원,38.1479,127.3042
3,98,동두천,37.9019,127.0607
4,99,파주,37.8859,126.7665
...,...,...,...,...
92,288,밀양,35.4915,128.7441
93,289,산청,35.4130,127.8791
94,294,거제,34.8882,128.6046
95,295,남해,34.8166,127.9264


In [5]:
weather_station_meta_df.to_csv("./weather_station_coord.csv", index = False)

### 활성화된 전체 관측소 데이터 크롤링

In [1]:
import requests
from datetime import datetime, timedelta
import time
from tqdm import tqdm
import numpy as np
import pandas as pd
ws_info_df = pd.read_csv("./weather_station_long_lat.csv").loc[:, ["weather_station_id", "weather_station_name"]]
ws_info_df

Unnamed: 0,weather_station_id,weather_station_name
0,90,속초
1,93,북춘천
2,95,철원
3,98,동두천
4,99,파주
...,...,...
92,288,밀양
93,289,산청
94,294,거제
95,295,남해


In [2]:
ws_info_df.values

array([[90, '속초'],
       [93, '북춘천'],
       [95, '철원'],
       [98, '동두천'],
       [99, '파주'],
       [100, '대관령'],
       [101, '춘천'],
       [102, '백령도'],
       [104, '북강릉'],
       [105, '강릉'],
       [106, '동해'],
       [108, '서울'],
       [112, '인천'],
       [114, '원주'],
       [115, '울릉도'],
       [119, '수원'],
       [121, '영월'],
       [127, '충주'],
       [129, '서산'],
       [130, '울진'],
       [131, '청주'],
       [133, '대전'],
       [135, '추풍령'],
       [136, '안동'],
       [137, '상주'],
       [138, '포항'],
       [140, '군산'],
       [143, '대구'],
       [146, '전주'],
       [152, '울산'],
       [155, '창원'],
       [156, '광주'],
       [159, '부산'],
       [162, '통영'],
       [165, '목포'],
       [168, '여수'],
       [169, '흑산도'],
       [170, '완도'],
       [172, '고창'],
       [174, '순천'],
       [177, '홍성'],
       [181, '서청주'],
       [184, '제주'],
       [185, '고산'],
       [188, '성산'],
       [189, '서귀포'],
       [192, '진주'],
       [201, '강화'],
       [202, '양평'],
       [203, '이

In [5]:
def str_to_float_for_weather_data(x):
    if x == '':
        return 0.0
    else:
        return np.float32(x)
def str_to_int_for_weather_data(x):
    if x == '':
        return 0
    else:
        return np.int32(x)

# API 키 불러오기
with open("./weather_api.txt", 'r') as f:
    api_key = f.readline().strip()
    
observer_results_dict = {}
start_date = datetime(2024,1,1).date()
today = datetime.now().date()
tq = tqdm(ws_info_df.values)
cnt = 1
for observer_code, observer_key in tq:
    now_date = start_date
    observer_results_dict[observer_code] = []
    while now_date < today:
        date_str = now_date.strftime("%Y%m%d")
        try:
            query_str = f"http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList?serviceKey={api_key}&numOfRows=24&pageNo=1&dataType=JSON&dataCd=ASOS&dateCd=HR&stnIds={observer_code}&endDt={date_str}&endHh=23&startHh=00&startDt={date_str}"
            res = requests.get(query_str)
            result_items = res.json()['response']['body']['items']['item']
            
            for item in result_items:
                tm = datetime.strptime(item['tm'], '%Y-%m-%d %H:%M')
                rn = str_to_float_for_weather_data(item['rn']) # 강수량
                dsnw = str_to_float_for_weather_data(item['dsnw']) # 적설
                dc10Tca = str_to_int_for_weather_data(item['dc10Tca']) # 전운량 -> 구름의 양 10분위..
                dmstMtphNo = item['dmstMtphNo'] # 기상 코드 -> 싸락눈(11) + 연무(40) -> 1140으로 붙여서 나오는 듯..?
                
                processed_weather_data = [tm, observer_code, observer_key, rn, dsnw, dc10Tca, dmstMtphNo]
                observer_results_dict[observer_code].append(processed_weather_data)
            now_date += timedelta(days = 1)
        except Exception as e:
            time.sleep(3.5)
        time.sleep(1)
        tq.set_description_str(f"{cnt}/{len(tq)} {observer_key} {now_date}/{today}")
    cnt += 1


97/97 북부산 2024-05-14/2024-05-14: 100%|██████████| 97/97 [5:55:42<00:00, 220.02s/it]   


In [13]:
import pandas as pd
total_result = []
for k, v in observer_results_dict.items():
    total_result += v
    
total_result = pd.DataFrame(total_result, columns = ['tm', 'id', 'location', 'rain', 'snow', 'cloud', 'weather_code'])
start_date_str, end_date_str = start_date.strftime("%Y%m%d"), today.strftime("%Y%m%d")
total_result['weather_code'] = total_result['weather_code'].map(lambda x: None if x == '' else x)
total_result.to_csv(f"./weather_data_{start_date_str}_{end_date_str}_full.csv", index=False)
display(total_result)

Unnamed: 0,tm,id,location,rain,snow,cloud,weather_code
0,2024-01-01 00:00:00,90,속초,0.0,0.0,8,
1,2024-01-01 01:00:00,90,속초,0.0,0.0,8,
2,2024-01-01 02:00:00,90,속초,0.0,0.0,8,
3,2024-01-01 03:00:00,90,속초,0.4,0.0,9,
4,2024-01-01 04:00:00,90,속초,0.0,0.0,9,
...,...,...,...,...,...,...,...
311947,2024-05-13 19:00:00,296,북부산,0.0,0.0,0,
311948,2024-05-13 20:00:00,296,북부산,0.0,0.0,0,
311949,2024-05-13 21:00:00,296,북부산,0.0,0.0,0,
311950,2024-05-13 22:00:00,296,북부산,0.0,0.0,0,
