In [1]:
import os

import numpy as np
import pandas as pd
from glob import glob

import seaborn as sns 
import matplotlib.pyplot as plt

import requests
import calendar
import time

In [2]:
def get_info(source):
    
    
    if(source=='bridge'):
        url = f"http://api.hrfco.go.kr/{SERVICE_KEY}/waterlevel/list/10M/"
        table= {
        '서울시(청담대교)': 1018662,
        '서울시(잠수교)': 1018680,
        '서울시(한강대교)': 1018683,
        '서울시(행주대교)': 1019630,
        '서울시(광진교)' : 1018640,
        '남양주시(팔당대교)': 1018610,
        '서울시(중랑교)': 1018675
        }
    elif(source=='dam'):
        url = f"http://api.hrfco.go.kr/{SERVICE_KEY}/dam/list/10M/"
        table = {
        '팔당댐': 1017310
        }
    elif(source=='rf'):
        url = f"http://api.hrfco.go.kr/{SERVICE_KEY}/rainfall/list/10M/"
        table = {
        '서울시(대곡교)': 10184100,
        '남양주시(진관교)':10184110,
        '서울시(송정동)':10184140
        }
    else:
        url = f"http://www.khoa.go.kr/api/oceangrid/tideObs/search.do?ServiceKey={SERVICE_KEY2}"
        table = {
        '강화대교': 'DT_0032'
        }
        
    return url,table

In [9]:
def collect_data(start,end,source):
    
    start_year,end_year=start[:-2],end[:-2]
    start_month,end_month=start[-2:],end[-2:]
    origin_url,table=get_info(source) # 크롤링 할 url과 지점 코드를 가져옴.
    for name, code in table.items():
        print(f"{name} Crawling start ###################")
        os.makedirs(f'data/{source}/{name}', exist_ok=True) # 대교명 폴더가 없을시 생성
        for year in range(start_year, end_year+1):    # 시작년도~마지막년도
            ms, me = (start_month,end_month+1)        # 시작월~마지막 월 
            for month in range(ms, me):
                weekday, end = calendar.monthrange(year, month)
                # [한강홍수통제소]-대교,댐,강수량 / [바다누리해양정보]-조위
                if(source!='tide'):
                    start_date, end_date = f"{year}{month:02}010000", f"{year}{month:02}{end:02}2350"   
                    url=origin_url+f"{code}/{start_date}/{end_date}.json" 
                    response = requests.get(url,verify=False) 
                    df = pd.DataFrame(response.json()['content'])
                else:
                    df_month=[]
                    for day in range(1,end+1):
                        start_date = f"{year}{month:02}{day:02}" 
                        url = origin_url+f"&ObsCode={code}&Date={start_date}&ResultType=json"
                        response = requests.get(url,verify=False)
                        try:
                            df = pd.DataFrame(response.json()['result']['data'])
                            df=df.set_index('record_time',drop=True)
                            df.index = pd.to_datetime(df.index)
                            df['tide_level']=df['tide_level'].astype('int')
                            df = df.resample('10T').mean()
                            df_month.append(df)
                        except:
                            pass
                    try:
                        df = pd.concat(df_month, axis=0)
                    except:
                        print("존재하지 않는 일자입니다.")
                        break
                    df['record_time']=df.index
                df.to_csv(f"G:/내 드라이브/project/수위예측/data/{source}/{name}/{year}{month:02}_{name}.csv",index=False)
                time.sleep(3)
            print(f"{year} end ")
        print(f"{name} Crawling end ###################")

In [None]:
if __name__ == "__main__":
    while True:
        source = input("bridge / dam / rf / tide 를 입력하세요. (종료하려면 아무 키나 입력하세요): ")
        if source not in ['bridge', 'dam', 'rf', 'tide']:break
        start = int(input("시작일을 입력하세요(ex 202309)"))
        end = int(input("종료일를 입력하세요(ex 202311)"))
        collect_data(start, end, source)
        print("데이터 수집이 완료되었습니다.")