# Notebook 기본 세팅

In [1]:
# Constant 선언

# 프로젝트 루트 디렉토리를 식별하기 위한 마커 파일 이름
ROOT_MARKER = "pyproject.toml"

# 한글 표시를 위한 나눔바른고딕 폰트 파일 이름
# matplotlib 의 font_manager 에 실제 폰트 파일의 위치를 넣어주어야 한다.
KOREAN_FONT_FILE = "NanumBarunGothic.ttf"

# matplotlib 에서는 font-family 의 이름으로 font 를 설정한다.
# 그래서 font 파일 그 자체가 아니라, 그 파일의 family 이름을 적어준다.
KOREAN_FONT_FAMILY = "NanumBarunGothic"

# 참고
# Font Family 와 Font File 의 차이는,
# Font Family 는 비슷한 디자인 특성을 공유하는 글꼴 그룹을 의미한다.
#
# 예를 들어 '나눔바른고딕' 폰트 패밀리는 일반(Regular), 굵게(Bold), 기울임(Italic) 등 여러 스타일을 포함할 수 있다.
# 반면, 폰트 파일(.ttf, .otf 등)은 이러한 폰트의 하나의 스타일이 저장된 실제 파일이다.
#
# 이 프로젝트에서는 폰트 용량을 줄이기 위해 일반(Regular) 인 NanumBarunGothic.ttf 만 사용한다.

In [2]:
# 프로젝트 root 를 sys.path 에 추가해서 import 구문을 사용하기 쉽게
from pathlib import Path


def find_project_root() -> Path:
    """
    pyproject.toml 파일을 기준으로 루트 디렉토리를 찾는다.
    :return: Path: 프로젝트 루트 디렉토리 경로
    """

    current_path = Path().resolve()

    while current_path != current_path.parent:
        if (current_path / ROOT_MARKER).exists():
            return current_path

        current_path = current_path.parent

    raise FileNotFoundError("프로젝트 루트 디렉토리를 찾을 수 없습니다.")


ROOT_DIR = find_project_root()

In [3]:
# matplotlib 의 한글 font 설정
import matplotlib.font_manager as fm
import matplotlib.pyplot as plt


FONTS_DATA_DIR = ROOT_DIR / "notebooks" / "fonts"


def setup_korean_font():
    font_path = FONTS_DATA_DIR / KOREAN_FONT_FILE
    fm.fontManager.addfont(font_path)

    # 폰트 설정
    plt.rcParams["font.family"] = KOREAN_FONT_FAMILY
    plt.rcParams["axes.unicode_minus"] = False


setup_korean_font()

# Data Pipeline Sample

In [4]:
import pandas as pd

from src.data.loader import AsosDataLoader
from src.libs.storage import Storage


storage = Storage.create()

loader = AsosDataLoader(storage)

- 종관 지점별 5년치 기상 데이터 가져오기

In [5]:
df_per_station = loader.load()

[2025-05-30 15:50:25] INFO [src.libs.storage._check_and_log_response] Success to retrieve 
[2025-05-30 15:50:25] INFO [src.libs.storage._check_and_log_response] Success to read datasets/20200527-20250527-100-daegwallyeong.csv
[2025-05-30 15:50:25] INFO [src.libs.storage._check_and_log_response] Success to read datasets/20200527-20250527-101-chuncheon.csv
[2025-05-30 15:50:26] INFO [src.libs.storage._check_and_log_response] Success to read datasets/20200527-20250527-102-baengnyeongdo.csv
[2025-05-30 15:50:26] INFO [src.libs.storage._check_and_log_response] Success to read datasets/20200527-20250527-105-gangneung.csv
[2025-05-30 15:50:26] INFO [src.libs.storage._check_and_log_response] Success to read datasets/20200527-20250527-106-donghae.csv
[2025-05-30 15:50:26] INFO [src.libs.storage._check_and_log_response] Success to read datasets/20200527-20250527-108-seoul.csv
[2025-05-30 15:50:26] INFO [src.libs.storage._check_and_log_response] Success to read datasets/20200527-20250527-112-inch

- 지점별 데이터를 한 개의 DataFrame 로 병합하기

In [6]:
total_df = pd.concat(list(df_per_station.values()))
total_df

Unnamed: 0,stn_id,stn_nm,tm,avg_ta,min_ta,min_ta_hrmt,max_ta,max_ta_hrmt,sum_rn_dur,mi10_max_rn,...,avg_m05_te,avg_m10_te,avg_m15_te,avg_m30_te,avg_m50_te,sum_lrg_ev,sum_sml_ev,n99_rn,iscs,sum_fog_dur
0,100,대관령,2020-05-27,13.4,8.0,529.0,19.2,1332.0,,0.1,...,,,,,,3.8,5.4,,,
1,100,대관령,2020-05-28,12.3,7.0,243.0,19.4,1144.0,,0.0,...,,,,,,3.4,4.8,,,
2,100,대관령,2020-05-29,12.6,4.4,550.0,19.9,1207.0,,,...,,,,,,4.7,6.7,,,
3,100,대관령,2020-05-30,14.1,4.0,503.0,22.8,1426.0,,,...,,,,,,5.7,8.2,,,
4,100,대관령,2020-05-31,16.5,5.6,453.0,22.3,1058.0,,,...,,,,,,4.4,6.3,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1822,99,파주,2025-05-23,16.5,13.6,2346.0,21.9,1425.0,,0.5,...,,,,,,1.7,2.4,,,
1823,99,파주,2025-05-24,14.8,9.6,2333.0,19.5,1248.0,,0.1,...,,,,,,2.7,3.8,,,
1824,99,파주,2025-05-25,15.5,7.6,520.0,22.3,1533.0,,,...,,,,,,4.2,6.1,,,
1825,99,파주,2025-05-26,16.7,9.0,526.0,25.3,1501.0,,,...,,,,,,4.2,6.0,,,


- 결측치 처리기

In [7]:
from src.data.imputer import WeatherDataImputer


imputer = WeatherDataImputer()

In [8]:
transformed_df = imputer.transform(total_df)
transformed_df

Unnamed: 0,stn_id,stn_nm,tm,avg_ta,min_ta,min_ta_hrmt,max_ta,max_ta_hrmt,mi10_max_rn,mi10_max_rn_hrmt,...,max_ps_hrmt,min_ps,min_ps_hrmt,avg_ps,ss_dur,sum_ss_hr,avg_tca,avg_lmac,avg_ts,min_tg
0,100,대관령,2020-05-27,19.8,15.3,529.0,25.7,1332.0,0.1,0.0,...,2359.0,1004.9,1722.0,1008.3,14.3,11.2,5.8,0.1,24.9,4.9
1,100,대관령,2020-05-28,20.2,14.0,243.0,25.9,1144.0,0.0,0.0,...,2340.0,1006.7,1255.0,1009.1,14.3,12.6,1.8,0.4,26.1,3.2
2,100,대관령,2020-05-29,20.1,15.1,550.0,26.6,1207.0,0.0,0.0,...,2338.0,1013.4,949.0,1014.7,14.3,12.3,4.9,0.3,26.6,0.6
3,100,대관령,2020-05-30,19.8,16.2,503.0,24.0,1426.0,0.0,0.0,...,502.0,1013.8,1553.0,1017.1,14.3,0.5,9.8,0.8,23.3,0.6
4,100,대관령,2020-05-31,18.9,15.5,453.0,22.9,1058.0,0.0,0.0,...,15.0,1006.7,1730.0,1012.4,14.3,0.7,7.6,6.0,22.6,2.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1822,99,파주,2025-05-23,18.4,16.0,2346.0,21.2,1425.0,0.5,1700.0,...,2242.0,1009.7,58.0,1011.6,14.2,1.1,9.6,7.0,21.0,14.0
1823,99,파주,2025-05-24,15.4,14.2,2333.0,18.6,1248.0,0.1,0.0,...,2339.0,1009.5,1556.0,1008.4,14.2,0.6,9.6,4.8,16.5,9.8
1824,99,파주,2025-05-25,16.9,13.8,520.0,22.3,1533.0,0.0,0.0,...,2243.0,1013.3,2.0,1014.6,14.2,10.6,3.5,2.6,24.2,8.0
1825,99,파주,2025-05-26,17.6,13.5,526.0,23.4,1501.0,0.0,0.0,...,835.0,1016.0,1653.0,1018.2,14.2,8.8,2.9,1.1,24.6,9.5


- 날시 label(맑음, 흐림, 비, 눈) 을 추가

In [9]:
from src.data.labeler import WeatherLabeler


transformed_df["weather"] = transformed_df.apply(
    lambda row: WeatherLabeler(row).determine_weather_label().value, axis=1
)