In [None]:
# !pip -q install geopandas shapely pyproj fiona pandas


In [None]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

from pathlib import Path
# 프로젝트 루트 (필요시 이 한 줄만 수정)
ROOT = '/content/drive/MyDrive/colab/firecast'

# 데이터 경로
P_FIRE_DIR   = f'{ROOT}/data/raw/fires/FRT000102_42'   # 샘플 구조: .../fires/FRT000102_42/ TB_FFAS_FF_OCCRR_42.*
P_WEATHER_DIR= f'{ROOT}/data/raw/weather'
P_PROC_DIR   = f'{ROOT}/data/processed'
P_FEAT_DIR   = f'{ROOT}/data/features'

import os
os.makedirs(P_PROC_DIR, exist_ok=True)
os.makedirs(P_FEAT_DIR, exist_ok=True)

print('ROOT:', ROOT)
print('FIRE_DIR:', P_FIRE_DIR)
print('WEATHER_DIR:', P_WEATHER_DIR)


In [None]:
import geopandas as gpd
import glob, os, pandas as pd

# shapefile 경로 찾기
shp_candidates = glob.glob(os.path.join(P_FIRE_DIR, '*.shp'))
assert len(shp_candidates) == 1, f'*.shp가 1개가 아닙니다: {shp_candidates}'
FIRE_SHP = shp_candidates[0]
print('FIRE_SHP:', FIRE_SHP)

fires = gpd.read_file(FIRE_SHP)
print('rows, cols:', fires.shape)
print('CRS:', fires.crs)
print('bounds:', fires.total_bounds)
print('columns:', list(fires.columns))


# OCCRR_DTM의 앞 4자리(연도)만 추출
fires['YEAR'] = fires['OCCRR_DTM'].astype(str).str[:4]

# 2020년, 2021년 데이터만 필터링
fires = fires[fires['YEAR'].isin(['2020', '2021'])].copy()

fires.shape, fires.head()
fires = fires.drop(columns=['YEAR'])

# # 상위 5행
# fires.head()

from IPython.display import display
display(fires)

In [None]:
show_cols = [c for c in ['CTPRV_NM','SGNG_NM','EMNDN_NM','OCCCR_RI','OCCRR_DTM','OCUR_DYWK'] if c in fires.columns]
for c in show_cols:
    print(f'\n[{c}] top values:')
    print(fires[c].astype('string').value_counts(dropna=False).head(10))


In [None]:
import numpy as np

# 예: OCCRR_DTM = 'YYYYMMDDHHMM' 형태일 가능성
date_col = 'OCCRR_DTM' if 'OCCRR_DTM' in fires.columns else None
if date_col:
    # 길이가 8 이상이면 YYYYMMDD까지만 파싱
    fires['OCCRR_DATE_STR'] = fires[date_col].astype('string').str.slice(0,8)
    fires['OCCRR_DATE'] = pd.to_datetime(fires['OCCRR_DATE_STR'], errors='coerce', format='%Y%m%d')
    fires['YEAR']  = fires['OCCRR_DATE'].dt.year
    fires['MONTH'] = fires['OCCRR_DATE'].dt.month
    fires[['OCCRR_DTM','OCCRR_DATE','YEAR','MONTH']].head()
else:
    print('날짜 컬럼(OCCRR_DTM)이 없어 파생을 건너뜁니다.')


In [None]:
# 웹 지도/시각화 대비 WGS84로 변환
fires_4326 = fires.to_crs('EPSG:4326') if fires.crs and fires.crs.to_string()!='EPSG:4326' else fires.copy()

ax = fires_4326.plot(figsize=(6,6), markersize=1)
_ = ax.set_title('Fire points (quick look, EPSG:4326)')


In [None]:
import glob

csv_list = sorted(glob.glob(os.path.join(P_WEATHER_DIR, '*.csv')))
print('weather csv files:', len(csv_list))
for p in csv_list:
    print('-', os.path.basename(p))

# 첫 몇 개만 열어 컬럼 구조 확인
def pick(cols, candidates):
    cols_low = {c.lower(): c for c in cols}
    for c in candidates:
        if c.lower() in cols_low:
            return cols_low[c.lower()]
    return None

meta = []
for p in csv_list[:2]:  # 샘플 2개만 확인
    df0 = pd.read_csv(p, nrows=50, low_memory=False, encoding='cp949')
    stn_col = pick(df0.columns, ['STN','stn','stnid','지점','지점번호'])
    lat_col = pick(df0.columns, ['LAT','lat','위도'])
    lon_col = pick(df0.columns, ['LON','lon','경도'])
    tmx_col = pick(df0.columns, ['TM_X','tm_x','X','x'])
    tmy_col = pick(df0.columns, ['TM_Y','tm_y','Y','y'])
    meta.append({
        'file': os.path.basename(p),
        'stn_col': stn_col, 'lat_col': lat_col, 'lon_col': lon_col,
        'tmx_col': tmx_col, 'tmy_col': tmy_col,
        'n_rows_look': len(df0)
    })

pd.DataFrame(meta)