# Notebook 기본 세팅

In [16]:
# Constant 선언

# 프로젝트 루트 디렉토리를 식별하기 위한 마커 파일 이름
ROOT_MARKER = "pyproject.toml"

# 한글 표시를 위한 나눔바른고딕 폰트 파일 이름
# matplotlib 의 font_manager 에 실제 폰트 파일의 위치를 넣어주어야 한다.
KOREAN_FONT_FILE = "NanumBarunGothic.ttf"

# matplotlib 에서는 font-family 의 이름으로 font 를 설정한다.
# 그래서 font 파일 그 자체가 아니라, 그 파일의 family 이름을 적어준다.
KOREAN_FONT_FAMILY = "NanumBarunGothic"

# 참고
# Font Family 와 Font File 의 차이는,
# Font Family 는 비슷한 디자인 특성을 공유하는 글꼴 그룹을 의미한다.
#
# 예를 들어 '나눔바른고딕' 폰트 패밀리는 일반(Regular), 굵게(Bold), 기울임(Italic) 등 여러 스타일을 포함할 수 있다.
# 반면, 폰트 파일(.ttf, .otf 등)은 이러한 폰트의 하나의 스타일이 저장된 실제 파일이다.
#
# 이 프로젝트에서는 폰트 용량을 줄이기 위해 일반(Regular) 인 NanumBarunGothic.ttf 만 사용한다.

In [17]:
# 프로젝트 root 를 sys.path 에 추가해서 import 구문을 사용하기 쉽게
from pathlib import Path


def find_project_root() -> Path:
    """
    pyproject.toml 파일을 기준으로 루트 디렉토리를 찾는다.
    :return: Path: 프로젝트 루트 디렉토리 경로
    """

    current_path = Path().resolve()

    while current_path != current_path.parent:
        if (current_path / ROOT_MARKER).exists():
            return current_path

        current_path = current_path.parent

    raise FileNotFoundError("프로젝트 루트 디렉토리를 찾을 수 없습니다.")


ROOT_DIR = find_project_root()

In [18]:
# matplotlib 의 한글 font 설정
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt


FONTS_DATA_DIR = ROOT_DIR / "notebooks" / "fonts"


def setup_korean_font():
    font_path = FONTS_DATA_DIR / KOREAN_FONT_FILE
    fm.fontManager.addfont(font_path)

    # 폰트 설정
    plt.rcParams["font.family"] = KOREAN_FONT_FAMILY
    plt.rcParams["axes.unicode_minus"] = False


setup_korean_font()

# Naver Cloud Storage 연동해보기

In [19]:
from dotenv import load_dotenv


# 환경 변수 로드 (.env 파일에 키 저장 : 보안용)
load_dotenv()

False

In [20]:
import os


# 환경 변수 "NCLOUD_ACCESS_KEY"의 값 읽기
NCLOUD_ACCESS_KEY = os.getenv("NCLOUD_ACCESS_KEY")
NCLOUD_SECRET_KEY = os.getenv("NCLOUD_SECRET_KEY")
NCLOUD_STORAGE_REGION = os.getenv("NCLOUD_STORAGE_REGION")
NCLOUD_STORAGE_BUCKET = os.getenv("NCLOUD_STORAGE_BUCKET")
NCLOUD_STORAGE_ENDPOINT_URL = os.getenv("NCLOUD_STORAGE_ENDPOINT_URL")

In [21]:
import boto3
from botocore.client import Config


# S3 클라이언트 생성
s3_client = boto3.client(
    "s3",
    endpoint_url=NCLOUD_STORAGE_ENDPOINT_URL,
    aws_access_key_id=NCLOUD_ACCESS_KEY,
    aws_secret_access_key=NCLOUD_SECRET_KEY,
    region_name=NCLOUD_STORAGE_REGION,
    config=Config(
        signature_version="s3v4"
    ),  # s3v4: AWS Signature Version 4 (요청 인증에 사용되는 보안 서명 방식, 대부분의 S3 서비스가 이 방식을 사용)
)

In [22]:
from io import StringIO

import pandas as pd


DATASETS_DIR = "datasets"


def upload_dataframe_as_csv(df: pd.DataFrame, index: bool = False):
    csv_buffer = StringIO()
    df.to_csv(csv_buffer, index=index)
    return s3_client.put_object(
        Bucket=NCLOUD_STORAGE_BUCKET,
        Key=f"{DATASETS_DIR}/data.csv",
        Body=csv_buffer.getvalue(),
    )

In [23]:
example_df = pd.DataFrame({"name": ["tom", "jane", "elly"], "gender": ["m", "f", "f"], "age": [21, 23, 34]})
example_df

Unnamed: 0,name,gender,age
0,tom,m,21
1,jane,f,23
2,elly,f,34


In [24]:
upload_dataframe_as_csv(example_df)

{'ResponseMetadata': {'RequestId': 'cf846799-2bbd-4eca-9427-ffd10c59a75e',
  'HostId': 'Yzk0YmI0YmM5NGJlMjQ4N2RmNTRmYTM2OGNhNTE0NGZjNjMzNmEzYWI4NDZiMGRhMTVjNDFhYjM5NDg3YzcyZg==',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'etag': '"84a182d4f332cf1fd04213ecd3c9263e"',
   'x-amz-checksum-crc32': 'jeoaSw==',
   'x-amz-checksum-type': 'FULL_OBJECT',
   'x-amz-id-2': 'Yzk0YmI0YmM5NGJlMjQ4N2RmNTRmYTM2OGNhNTE0NGZjNjMzNmEzYWI4NDZiMGRhMTVjNDFhYjM5NDg3YzcyZg==',
   'x-amz-request-id': 'cf846799-2bbd-4eca-9427-ffd10c59a75e',
   'x-amz-version-id': '6d8de76f-3c2d-11f0-b92f-9cc2c468d112',
   'date': 'Thu, 29 May 2025 01:37:05 GMT',
   'content-length': '0',
   'server': 'Ncloud Storage'},
  'RetryAttempts': 0},
 'ETag': '"84a182d4f332cf1fd04213ecd3c9263e"',
 'ChecksumCRC32': 'jeoaSw==',
 'ChecksumType': 'FULL_OBJECT',
 'VersionId': '6d8de76f-3c2d-11f0-b92f-9cc2c468d112'}

In [25]:
def read_csv_as_dataframe(key: str) -> pd.DataFrame:
    r = s3_client.get_object(Bucket=NCLOUD_STORAGE_BUCKET, Key=key)
    file_content = r["Body"].read().decode("utf-8")
    csv_buffer = StringIO(file_content)
    return pd.read_csv(csv_buffer)

In [26]:
read_csv_as_dataframe(f"{DATASETS_DIR}/data.csv")

Unnamed: 0,name,gender,age
0,tom,m,21
1,jane,f,23
2,elly,f,34


In [27]:
s3_client.delete_object(Bucket=NCLOUD_STORAGE_BUCKET, Key=f"{DATASETS_DIR}/data.csv")

{'ResponseMetadata': {'RequestId': '308d661e-a61e-4d3f-9303-779da8a5742e',
  'HostId': 'YzcyNzVkOGYyMzNkNmJiNTM3NDEzMGZmZDVkOGUzMjM4YjBmNWM4NTQ0MWRiMGE1Y2JkNGRhYWQzN2Q3ZDVkMw==',
  'HTTPStatusCode': 204,
  'HTTPHeaders': {'x-amz-delete-marker': 'true',
   'x-amz-id-2': 'YzcyNzVkOGYyMzNkNmJiNTM3NDEzMGZmZDVkOGUzMjM4YjBmNWM4NTQ0MWRiMGE1Y2JkNGRhYWQzN2Q3ZDVkMw==',
   'x-amz-request-id': '308d661e-a61e-4d3f-9303-779da8a5742e',
   'x-amz-version-id': '6dec61d8-3c2d-11f0-99e9-9cc2c464a269',
   'date': 'Thu, 29 May 2025 01:37:06 GMT',
   'server': 'Ncloud Storage'},
  'RetryAttempts': 0},
 'DeleteMarker': True,
 'VersionId': '6dec61d8-3c2d-11f0-99e9-9cc2c464a269'}

# 공공 데이터 포털 - 기상청 데이터

In [28]:
WEATHER_API_KEY = os.getenv("WEATHER_API_KEY")

In [29]:
import requests


# 샘플 코드
url = "http://apis.data.go.kr/1360000/AsosDalyInfoService/getWthrDataList"  # 기상관측일자료목록조회 : 요청주소
params = {
    "serviceKey": WEATHER_API_KEY,
    "pageNo": "1",
    "numOfRows": "999",
    "dataType": "JSON",
    "dataCd": "ASOS",
    "dateCd": "DAY",
    "startDt": "20250101",
    "endDt": "20250131",
    "stnIds": "108",
}

res = requests.get(url, params=params)

In [30]:
response = res.json()
response

{'response': {'header': {'resultCode': '00', 'resultMsg': 'NORMAL_SERVICE'},
  'body': {'dataType': 'JSON',
   'items': {'item': [{'stnId': '108',
      'stnNm': '서울',
      'tm': '2025-01-01',
      'avgTa': '2.6',
      'minTa': '-2.5',
      'minTaHrmt': '0441',
      'maxTa': '8.9',
      'maxTaHrmt': '1540',
      'mi10MaxRn': '',
      'mi10MaxRnHrmt': '',
      'hr1MaxRn': '',
      'hr1MaxRnHrmt': '',
      'sumRnDur': '',
      'sumRn': '',
      'maxInsWs': '9.7',
      'maxInsWsWd': '290',
      'maxInsWsHrmt': '1417',
      'maxWs': '5.1',
      'maxWsWd': '250',
      'maxWsHrmt': '1519',
      'avgWs': '2.2',
      'hr24SumRws': '1861',
      'maxWd': '250',
      'avgTd': '-3.6',
      'minRhm': '49',
      'minRhmHrmt': '1543',
      'avgRhm': '64.3',
      'avgPv': '4.8',
      'avgPa': '1011.0',
      'maxPs': '1023.1',
      'maxPsHrmt': '2359',
      'minPs': '1020.0',
      'minPsHrmt': '1446',
      'avgPs': '1021.8',
      'ssDur': '9.6',
      'sumSsHr': '5.6',


In [31]:
item = response["response"]["body"]["items"]["item"][0]
item

{'stnId': '108',
 'stnNm': '서울',
 'tm': '2025-01-01',
 'avgTa': '2.6',
 'minTa': '-2.5',
 'minTaHrmt': '0441',
 'maxTa': '8.9',
 'maxTaHrmt': '1540',
 'mi10MaxRn': '',
 'mi10MaxRnHrmt': '',
 'hr1MaxRn': '',
 'hr1MaxRnHrmt': '',
 'sumRnDur': '',
 'sumRn': '',
 'maxInsWs': '9.7',
 'maxInsWsWd': '290',
 'maxInsWsHrmt': '1417',
 'maxWs': '5.1',
 'maxWsWd': '250',
 'maxWsHrmt': '1519',
 'avgWs': '2.2',
 'hr24SumRws': '1861',
 'maxWd': '250',
 'avgTd': '-3.6',
 'minRhm': '49',
 'minRhmHrmt': '1543',
 'avgRhm': '64.3',
 'avgPv': '4.8',
 'avgPa': '1011.0',
 'maxPs': '1023.1',
 'maxPsHrmt': '2359',
 'minPs': '1020.0',
 'minPsHrmt': '1446',
 'avgPs': '1021.8',
 'ssDur': '9.6',
 'sumSsHr': '5.6',
 'hr1MaxIcsrHrmt': '1200',
 'hr1MaxIcsr': '1.73',
 'sumGsr': '8.55',
 'ddMefs': '',
 'ddMefsHrmt': '',
 'ddMes': '',
 'ddMesHrmt': '',
 'sumDpthFhsc': '',
 'avgTca': '2.6',
 'avgLmac': '2.6',
 'avgTs': '-0.2',
 'minTg': '-9.5',
 'avgCm5Te': '-0.2',
 'avgCm10Te': '-0.3',
 'avgCm20Te': '0.9',
 'avgCm30Te':

In [32]:
item.keys()

dict_keys(['stnId', 'stnNm', 'tm', 'avgTa', 'minTa', 'minTaHrmt', 'maxTa', 'maxTaHrmt', 'mi10MaxRn', 'mi10MaxRnHrmt', 'hr1MaxRn', 'hr1MaxRnHrmt', 'sumRnDur', 'sumRn', 'maxInsWs', 'maxInsWsWd', 'maxInsWsHrmt', 'maxWs', 'maxWsWd', 'maxWsHrmt', 'avgWs', 'hr24SumRws', 'maxWd', 'avgTd', 'minRhm', 'minRhmHrmt', 'avgRhm', 'avgPv', 'avgPa', 'maxPs', 'maxPsHrmt', 'minPs', 'minPsHrmt', 'avgPs', 'ssDur', 'sumSsHr', 'hr1MaxIcsrHrmt', 'hr1MaxIcsr', 'sumGsr', 'ddMefs', 'ddMefsHrmt', 'ddMes', 'ddMesHrmt', 'sumDpthFhsc', 'avgTca', 'avgLmac', 'avgTs', 'minTg', 'avgCm5Te', 'avgCm10Te', 'avgCm20Te', 'avgCm30Te', 'avgM05Te', 'avgM10Te', 'avgM15Te', 'avgM30Te', 'avgM50Te', 'sumLrgEv', 'sumSmlEv', 'n99Rn', 'iscs', 'sumFogDur'])

In [33]:
# DataFrame으로 변환
try:
    items = response["response"]["body"]["items"]["item"]
    df = pd.DataFrame(items)
    print(df.head(3))
except KeyError:
    print("❌ API 응답에서 item 데이터를 찾을 수 없습니다.")
    print(response)

  stnId stnNm          tm avgTa minTa minTaHrmt maxTa maxTaHrmt mi10MaxRn  \
0   108    서울  2025-01-01   2.6  -2.5      0441   8.9      1540             
1   108    서울  2025-01-02   0.5  -2.9      0804   6.0      1507             
2   108    서울  2025-01-03  -1.1  -5.1      0522   3.2      1551             

  mi10MaxRnHrmt  ... avgM05Te avgM10Te avgM15Te avgM30Te avgM50Te sumLrgEv  \
0                ...      3.4      6.9     10.0     16.7     18.1      1.3   
1                ...      3.4      6.8      9.9     16.6     18.0      1.9   
2                ...      3.3      6.7      9.8     16.5     18.0      1.3   

  sumSmlEv n99Rn                         iscs sumFogDur  
0      1.9                                               
1      2.6                                               
2      1.9   0.0  {눈}2052-{눈}{강도0}2100-2122.             

[3 rows x 62 columns]


In [34]:
df.shape

(31, 62)

# 데이터셋

In [35]:
import calendar
from time import sleep

from tqdm import tqdm


def get_weather_data(stn_id, year, month, service_key):
    start_date = f"{year}{month:02d}01"
    # 해당 월의 마지막 날 계산
    last_day = calendar.monthrange(year, month)[1]
    end_date = f"{year}{month:02d}{last_day:02d}"

    url = "http://apis.data.go.kr/1360000/AsosDalyInfoService/getWthrDataList"
    params = {
        "serviceKey": service_key,
        "pageNo": "1",
        "numOfRows": "999",
        "dataType": "JSON",
        "dataCd": "ASOS",
        "dateCd": "DAY",
        "startDt": start_date,
        "endDt": end_date,
        "stnIds": stn_id,
    }

    res = requests.get(url, params=params)
    data = res.json()

    items = data.get("response", {}).get("body", {}).get("items", {}).get("item", [])
    return items


# 수집 지점 설정
stn_ids = list(
    map(
        str,
        [
            90,
            93,
            95,
            98,
            99,
            100,
            101,
            102,
            104,
            105,
            106,
            108,
            112,
            114,
            115,
            119,
            121,
            127,
            129,
            130,
            131,
            133,
            135,
            136,
            137,
            138,
            140,
            143,
            146,
            152,
            155,
            156,
            159,
            162,
            165,
            168,
            169,
            170,
            172,
            174,
            177,
            184,
            185,
            188,
            189,
            192,
            201,
            202,
            203,
            211,
            212,
            216,
            217,
            221,
            226,
            232,
            235,
            236,
            238,
            239,
            243,
            244,
            245,
            247,
            248,
            251,
            252,
            253,
            254,
            255,
            257,
            258,
            259,
            260,
            261,
            262,
            263,
            264,
            266,
            268,
            271,
            272,
            273,
            276,
            277,
            278,
            279,
            281,
            283,
            284,
            285,
            288,
            289,
            294,
            295,
        ],
    )
)

years1 = list(range(2020, 2025))
months = range(1, 13)

all_data = []

for stn in tqdm(stn_ids, desc="지점"):
    for y in tqdm(years1, leave=False, desc="연도"):
        for m in months:
            try:
                rows = get_weather_data(stn, y, m, WEATHER_API_KEY)
                all_data.extend(rows)
                sleep(0.2)  # 속도 제한을 고려한 대기 (1초당 5건 이하로)
            except Exception as e:
                print(f"[ERROR] {stn} - {y}-{m}: {e}")
                continue

# DataFrame으로 저장
df1 = pd.DataFrame(all_data)
df1.to_csv("weather_data1.csv", index=False)

지점:   2%|▏         | 2/95 [02:32<1:57:56, 76.09s/it]

[ERROR] 95 - 2020-7: ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
[ERROR] 95 - 2020-10: Expecting value: line 1 column 1 (char 0)


지점:   5%|▌         | 5/95 [08:51<2:47:55, 111.95s/it]

[ERROR] 100 - 2020-4: Expecting value: line 1 column 1 (char 0)


지점:   8%|▊         | 8/95 [11:57<1:47:37, 74.22s/it] 

[ERROR] 104 - 2023-4: Expecting value: line 1 column 1 (char 0)


지점:  17%|█▋        | 16/95 [18:39<1:09:19, 52.66s/it]

[ERROR] 121 - 2021-10: Expecting value: line 1 column 1 (char 0)
[ERROR] 121 - 2021-11: Expecting value: line 1 column 1 (char 0)




[ERROR] 121 - 2022-8: Expecting value: line 1 column 1 (char 0)


지점:  18%|█▊        | 17/95 [20:38<1:34:18, 72.54s/it]

[ERROR] 121 - 2024-12: Expecting value: line 1 column 1 (char 0)


지점:  25%|██▌       | 24/95 [28:37<1:20:56, 68.40s/it]

[ERROR] 137 - 2021-8: Expecting value: line 1 column 1 (char 0)


지점:  31%|███       | 29/95 [35:38<1:33:20, 84.86s/it]

[ERROR] 152 - 2020-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 152 - 2020-9: Expecting value: line 1 column 1 (char 0)
[ERROR] 152 - 2020-11: Expecting value: line 1 column 1 (char 0)




[ERROR] 152 - 2021-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 152 - 2022-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 152 - 2022-6: Expecting value: line 1 column 1 (char 0)




[ERROR] 152 - 2024-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 152 - 2024-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 152 - 2024-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 152 - 2024-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 152 - 2024-8: Expecting value: line 1 column 1 (char 0)


지점:  32%|███▏      | 30/95 [39:43<2:23:44, 132.69s/it]

[ERROR] 155 - 2022-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2022-2: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2022-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2022-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2022-6: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2022-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2022-8: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2022-9: Expecting value: line 1 column 1 (char 0)




[ERROR] 155 - 2022-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2023-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2023-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2023-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2023-8: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2023-9: Expecting value: line 1 column 1 (char 0)




[ERROR] 155 - 2023-11: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2023-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2024-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2024-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2024-6: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2024-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2024-8: Expecting value: line 1 column 1 (char 0)
[ERROR] 155 - 2024-9: Expecting value: line 1 column 1 (char 0)


지점:  33%|███▎      | 31/95 [41:01<2:04:09, 116.40s/it]

[ERROR] 156 - 2020-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2020-2: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2020-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2020-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2020-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2020-9: Expecting value: line 1 column 1 (char 0)




[ERROR] 156 - 2020-11: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2020-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2021-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2021-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2021-9: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2021-10: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2021-11: Expecting value: line 1 column 1 (char 0)




[ERROR] 156 - 2022-2: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2022-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2022-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2022-6: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2022-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2022-9: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2022-10: Expecting value: line 1 column 1 (char 0)




[ERROR] 156 - 2023-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2023-2: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2023-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2023-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2023-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2023-8: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2023-11: Expecting value: line 1 column 1 (char 0)




[ERROR] 156 - 2024-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2024-2: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2024-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2024-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2024-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2024-6: Expecting value: line 1 column 1 (char 0)
[ERROR] 156 - 2024-10: Expecting value: line 1 column 1 (char 0)


지점:  34%|███▎      | 32/95 [41:58<1:43:24, 98.48s/it] 

[ERROR] 159 - 2020-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2020-2: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2020-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2020-9: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2020-10: Expecting value: line 1 column 1 (char 0)




[ERROR] 159 - 2020-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2021-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2021-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2021-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2021-8: Expecting value: line 1 column 1 (char 0)




[ERROR] 159 - 2021-11: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2021-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2022-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 159 - 2022-4: Expecting value: line 1 column 1 (char 0)


지점:  37%|███▋      | 35/95 [47:38<1:49:11, 109.20s/it]

[ERROR] 168 - 2024-1: Expecting value: line 1 column 1 (char 0)


지점:  39%|███▉      | 37/95 [52:29<2:02:12, 126.41s/it]

[ERROR] 170 - 2022-5: Expecting value: line 1 column 1 (char 0)


지점:  40%|████      | 38/95 [54:30<1:58:33, 124.80s/it]

[ERROR] 172 - 2020-7: Expecting value: line 1 column 1 (char 0)


지점:  45%|████▌     | 43/95 [1:02:48<1:30:05, 103.95s/it]

[ERROR] 188 - 2020-10: Expecting value: line 1 column 1 (char 0)




[ERROR] 188 - 2021-3: Expecting value: line 1 column 1 (char 0)


지점:  46%|████▋     | 44/95 [1:05:03<1:36:21, 113.37s/it]

[ERROR] 189 - 2020-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2021-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2021-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2021-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2021-8: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2021-10: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2021-11: Expecting value: line 1 column 1 (char 0)




[ERROR] 189 - 2022-2: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2022-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2022-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2022-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2022-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2022-8: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2022-11: Expecting value: line 1 column 1 (char 0)




[ERROR] 189 - 2023-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2023-2: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2023-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2023-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2023-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2023-6: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2023-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2023-8: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2023-9: Expecting value: line 1 column 1 (char 0)




[ERROR] 189 - 2024-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2024-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2024-8: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2024-9: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2024-10: Expecting value: line 1 column 1 (char 0)
[ERROR] 189 - 2024-11: Expecting value: line 1 column 1 (char 0)


지점:  47%|████▋     | 45/95 [1:06:10<1:22:46, 99.33s/it] 

[ERROR] 192 - 2020-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2020-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2020-5: Expecting value: line 1 column 1 (char 0)




[ERROR] 192 - 2020-11: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2020-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2021-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2021-4: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2021-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2021-6: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2021-9: Expecting value: line 1 column 1 (char 0)




[ERROR] 192 - 2021-11: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2021-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2022-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2022-6: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2022-7: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2022-8: Expecting value: line 1 column 1 (char 0)




[ERROR] 192 - 2022-11: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2022-12: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2023-1: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2023-3: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2023-5: Expecting value: line 1 column 1 (char 0)
[ERROR] 192 - 2023-8: Expecting value: line 1 column 1 (char 0)




[ERROR] 192 - 2024-2: Expecting value: line 1 column 1 (char 0)


지점:  52%|█████▏    | 49/95 [1:12:57<1:18:14, 102.06s/it]

[ERROR] 211 - 2020-9: Expecting value: line 1 column 1 (char 0)




[ERROR] 211 - 2021-3: Expecting value: line 1 column 1 (char 0)


지점:  60%|██████    | 57/95 [1:28:06<58:44, 92.75s/it]   


KeyboardInterrupt: 

In [36]:
all_data

[{'stnId': '90',
  'stnNm': '속초',
  'tm': '2020-01-01',
  'avgTa': '0.8',
  'minTa': '-3.4',
  'minTaHrmt': '0024',
  'maxTa': '4.8',
  'maxTaHrmt': '1330',
  'mi10MaxRn': '',
  'mi10MaxRnHrmt': '',
  'hr1MaxRn': '',
  'hr1MaxRnHrmt': '',
  'sumRnDur': '',
  'sumRn': '',
  'maxInsWs': '10.3',
  'maxInsWsWd': '270',
  'maxInsWsHrmt': '0941',
  'maxWs': '5.5',
  'maxWsWd': '290',
  'maxWsHrmt': '1553',
  'avgWs': '2.4',
  'hr24SumRws': '2059',
  'maxWd': '230',
  'avgTd': '-12.6',
  'minRhm': '28',
  'minRhmHrmt': '1312',
  'avgRhm': '36.1',
  'avgPv': '2.3',
  'avgPa': '1024.5',
  'maxPs': '1028.2',
  'maxPsHrmt': '0920',
  'minPs': '1025.4',
  'minPsHrmt': '1416',
  'avgPs': '1026.7',
  'ssDur': '9.6',
  'sumSsHr': '8.9',
  'hr1MaxIcsrHrmt': '',
  'hr1MaxIcsr': '',
  'sumGsr': '',
  'ddMefs': '',
  'ddMefsHrmt': '',
  'ddMes': '',
  'ddMesHrmt': '',
  'sumDpthFhsc': '',
  'avgTca': '0.3',
  'avgLmac': '0.0',
  'avgTs': '-0.2',
  'minTg': '-7.0',
  'avgCm5Te': '',
  'avgCm10Te': '',
  '

In [37]:
df1 = pd.DataFrame(all_data)
df1.to_csv("weather_data1.csv", index=False)

In [41]:
df1

Unnamed: 0,stnId,stnNm,tm,avgTa,minTa,minTaHrmt,maxTa,maxTaHrmt,mi10MaxRn,mi10MaxRnHrmt,...,avgM05Te,avgM10Te,avgM15Te,avgM30Te,avgM50Te,sumLrgEv,sumSmlEv,n99Rn,iscs,sumFogDur
0,90,속초,2020-01-01,0.8,-3.4,0024,4.8,1330,,,...,,,,,,,,,,
1,90,속초,2020-01-02,2.8,-2.1,0037,6.6,1428,,,...,,,,,,,,,,
2,90,속초,2020-01-03,4.7,0.8,0638,8.5,1256,,,...,,,,,,,,,,
3,90,속초,2020-01-04,4.0,1.1,2400,8.1,1353,,,...,,,,,,,,,,
4,90,속초,2020-01-05,3.1,-0.6,0547,8.8,1350,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99578,235,보령,2024-12-27,0.4,-2.1,0739,3.8,1554,,,...,,,,,,0.9,1.3,,,
99579,235,보령,2024-12-28,-0.5,-2.8,0750,1.8,1552,,,...,,,,,,0.7,1.0,,,
99580,235,보령,2024-12-29,3.5,-0.7,0742,8.2,1346,,,...,,,,,,1.4,1.9,,,
99581,235,보령,2024-12-30,7.9,4.8,0020,11.5,1418,,,...,,,,,,1.4,2.0,,,


In [38]:
df1.columns

Index(['stnId', 'stnNm', 'tm', 'avgTa', 'minTa', 'minTaHrmt', 'maxTa',
       'maxTaHrmt', 'mi10MaxRn', 'mi10MaxRnHrmt', 'hr1MaxRn', 'hr1MaxRnHrmt',
       'sumRnDur', 'sumRn', 'maxInsWs', 'maxInsWsWd', 'maxInsWsHrmt', 'maxWs',
       'maxWsWd', 'maxWsHrmt', 'avgWs', 'hr24SumRws', 'maxWd', 'avgTd',
       'minRhm', 'minRhmHrmt', 'avgRhm', 'avgPv', 'avgPa', 'maxPs',
       'maxPsHrmt', 'minPs', 'minPsHrmt', 'avgPs', 'ssDur', 'sumSsHr',
       'hr1MaxIcsrHrmt', 'hr1MaxIcsr', 'sumGsr', 'ddMefs', 'ddMefsHrmt',
       'ddMes', 'ddMesHrmt', 'sumDpthFhsc', 'avgTca', 'avgLmac', 'avgTs',
       'minTg', 'avgCm5Te', 'avgCm10Te', 'avgCm20Te', 'avgCm30Te', 'avgM05Te',
       'avgM10Te', 'avgM15Te', 'avgM30Te', 'avgM50Te', 'sumLrgEv', 'sumSmlEv',
       'n99Rn', 'iscs', 'sumFogDur'],
      dtype='object')

In [39]:
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 99583 entries, 0 to 99582
Data columns (total 62 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   stnId           99583 non-null  object
 1   stnNm           99583 non-null  object
 2   tm              99583 non-null  object
 3   avgTa           99583 non-null  object
 4   minTa           99583 non-null  object
 5   minTaHrmt       99583 non-null  object
 6   maxTa           99583 non-null  object
 7   maxTaHrmt       99583 non-null  object
 8   mi10MaxRn       99583 non-null  object
 9   mi10MaxRnHrmt   99583 non-null  object
 10  hr1MaxRn        99583 non-null  object
 11  hr1MaxRnHrmt    99583 non-null  object
 12  sumRnDur        99583 non-null  object
 13  sumRn           99583 non-null  object
 14  maxInsWs        99583 non-null  object
 15  maxInsWsWd      99583 non-null  object
 16  maxInsWsHrmt    99583 non-null  object
 17  maxWs           99583 non-null  object
 18  maxWsW

In [40]:
df1.shape

(99583, 62)

In [48]:
df1.isnull().sum()

stnId        0
stnNm        0
tm           0
avgTa        0
minTa        0
            ..
sumLrgEv     0
sumSmlEv     0
n99Rn        0
iscs         0
sumFogDur    0
Length: 62, dtype: int64

In [None]:
years2 = list(range(2015, 2020))  # 나중에 실행

for stn in tqdm(stn_ids, desc="지점"):
    for y in tqdm(years2, leave=False, desc="연도"):
        for m in months:
            try:
                rows = get_weather_data(stn, y, m, WEATHER_API_KEY)
                all_data.extend(rows)
                sleep(0.2)  # 속도 제한을 고려한 대기 (1초당 5건 이하로)
            except Exception as e:
                print(f"[ERROR] {stn} - {y}-{m}: {e}")
                continue

# DataFrame으로 저장
df2 = pd.DataFrame(all_data)
df2.to_csv("weather_data2.csv", index=False)