In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import pandas_gbq

import requests
from bs4 import BeautifulSoup
import json
import lxml

from datetime import datetime, timedelta
import time

import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import folium
from folium.plugins import MarkerCluster, HeatMap

from shapely.geometry import Point, Polygon, MultiPolygon, shape
from shapely import wkt

import googlemaps
from google.cloud import bigquery
from google.oauth2 import service_account

import os
import glob
import warnings

DATA_PATH = "data/"
KEY_PATH = "config/"

key_path = KEY_PATH + "fireforest-team-ys-2023.json"
servicekey_path = KEY_PATH + "serviceKey.json"

warnings.filterwarnings("ignore")

In [2]:
def get_service_key(servicekey_path, key_name):
    """
    주어진 서비스 키 파일에서 지정된 키 이름에 해당하는 서비스 키를 반환합니다.

    Args:
        servicekey_path (str): 서비스 키 파일의 경로.
        key_name (str): 반환할 서비스 키의 이름.

    Returns:
        str or None: 지정된 키 이름에 해당하는 서비스 키. 키를 찾을 수 없는 경우 None을 반환합니다.
    """
    
    with open(servicekey_path) as f:
        data = json.load(f)
        service_key = data.get(key_name)
    return service_key

In [3]:
def save_dataframe_to_bigquery(df, dataset_id, table_id, key_path):
    """
    주어진 데이터프레임을 BigQuery 테이블에 저장합니다.

    Args:
        df (pandas.DataFrame): 저장할 데이터프레임.
        dataset_id (str): 대상 데이터셋의 ID.
        table_id (str): 대상 테이블의 ID.
        key_path (str): 서비스 계정 키 파일의 경로.

    Returns:
        None
    """
    
    # Credentials 객체 생성
    credentials = service_account.Credentials.from_service_account_file(key_path)

    # 빅쿼리 클라이언트 객체 생성
    client = bigquery.Client(credentials=credentials)

    # 테이블 레퍼런스 생성
    table_ref = client.dataset(dataset_id).table(table_id)

    # 데이터프레임을 BigQuery 테이블에 적재
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = "WRITE_TRUNCATE"  # 기존 테이블 내용 삭제 후 삽입

    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()  # 작업 완료 대기

    print(f"Data inserted into table {table_id} successfully.")

In [4]:
def get_dataframe_from_bigquery(dataset_id, table_id, key_path):
    """
    주어진 BigQuery 테이블에서 데이터를 조회하여 DataFrame으로 반환합니다.

    Args:
        dataset_id (str): 대상 데이터셋의 ID.
        table_id (str): 대상 테이블의 ID.
        key_path (str): 서비스 계정 키 파일의 경로.

    Returns:
        pandas.DataFrame: 조회된 데이터를 담은 DataFrame 객체.
    """

    # Credentials 객체 생성
    credentials = service_account.Credentials.from_service_account_file(key_path)

    # BigQuery 클라이언트 생성
    client = bigquery.Client(credentials=credentials, project=credentials.project_id)

    # 테이블 레퍼런스 생성
    table_ref = client.dataset(dataset_id).table(table_id)

    # 테이블 데이터를 DataFrame으로 변환
    df = client.list_rows(table_ref).to_dataframe()

    return df

In [5]:
def save_geodataframe_to_bigquery(gdf, dataset_id, table_id, key_path):
    """
    주어진 Geopandas GeoDataFrame을 BigQuery 테이블에 저장합니다.

    Args:
        gdf (geopandas.GeoDataFrame): 저장할 Geopandas GeoDataFrame 객체.
        dataset_id (str): 대상 데이터셋의 ID.
        table_id (str): 대상 테이블의 ID.
        key_path (str): 서비스 계정 키 파일의 경로.

    Returns:
        None
    """
        
    gdf = gdf.to_crs('EPSG:4326')
    gdf['geometry'] = gdf['geometry'].astype(str)

    # Geopandas GeoDataFrame을 Pandas DataFrame으로 변환
    df = pd.DataFrame(gdf)

    # Credentials 객체 생성
    credentials = service_account.Credentials.from_service_account_file(key_path)

    # 빅쿼리 클라이언트 객체 생성
    client = bigquery.Client(credentials=credentials)

    # 테이블 레퍼런스 생성
    table_ref = client.dataset(dataset_id).table(table_id)

    # 데이터프레임을 BigQuery 테이블에 적재
    job_config = bigquery.LoadJobConfig()
    job_config.write_disposition = "WRITE_TRUNCATE"  # 기존 테이블 내용 삭제 후 삽입

    job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)
    job.result()  # 작업 완료 대기

    print(f"Data inserted into table {table_id} successfully.")

In [6]:
def get_geodataframe_from_bigquery(dataset_id, table_id, key_path):
    """
    주어진 BigQuery 테이블에서 데이터를 조회하여 Geopandas GeoDataFrame으로 반환합니다.

    Args:
        dataset_id (str): 대상 데이터셋의 ID.
        table_id (str): 대상 테이블의 ID.
        key_path (str): 서비스 계정 키 파일의 경로.

    Returns:
        geopandas.GeoDataFrame: 조회된 데이터를 담은 Geopandas GeoDataFrame 객체.
    """
    
    # Credentials 객체 생성
    credentials = service_account.Credentials.from_service_account_file(key_path)

    # 빅쿼리 클라이언트 객체 생성
    client = bigquery.Client(credentials=credentials)

    # 쿼리 작성
    query = f"SELECT * FROM `{dataset_id}.{table_id}`"

    # 쿼리 실행
    df = client.query(query).to_dataframe()
    
    # 'geometry' 열의 문자열을 다각형 객체로 변환
    df['geometry'] = df['geometry'].apply(wkt.loads)

    # GeoDataFrame으로 변환
    gdf = gpd.GeoDataFrame(df, geometry='geometry')
    gdf.crs = "EPSG:4326"

    return gdf

In [7]:
def get_lat_lng(address, api_key):
    """
    주소를 기반으로 위도와 경도 정보를 조회합니다.

    Args:
        address (str): 위도와 경도를 조회할 주소.
        api_key (str): Google Maps API 키.

    Returns:
        tuple: 조회된 위도와 경도 정보를 담은 튜플. (latitude, longitude). 주소를 찾을 수 없는 경우 (None, None)을 반환합니다.
    """
    
    gmaps = googlemaps.Client(key=api_key)
    
    geocode_result = gmaps.geocode(address, language='ko')
    if geocode_result:
        location = geocode_result[0]['geometry']['location']
        latitude = location['lat']
        longitude = location['lng']
        return latitude, longitude
    else:
        return None, None

In [8]:
def add_lat_lng_to_dataframe(dataframe, address_column, api_key):
    """
    데이터프레임에 주소 열의 값을 기반으로 위도와 경도 정보를 추가합니다.

    Args:
        dataframe (pandas.DataFrame): 주소 열에 위도와 경도 정보를 추가할 데이터프레임.
        address_column (str): 주소 정보가 포함된 열의 이름.
        api_key (str): Google Maps API 키.

    Returns:
        None
    """
    
    latitudes = []
    longitudes = []

    for address in dataframe[address_column]:
        latitude, longitude = get_lat_lng(address, api_key)
        latitudes.append(latitude)
        longitudes.append(longitude)

    dataframe['latitude'] = latitudes
    dataframe['longitude'] = longitudes

# RAW_DATA 처리

In [9]:
# Collection Data Load

weather_stations = pd.read_csv(DATA_PATH + "weather_stations.csv", encoding="cp949")
weather_days = pd.read_csv(DATA_PATH + "weather_days.csv", encoding="cp949")

forestfire_occurs = pd.read_csv(DATA_PATH + "forestfire_occurs.csv", encoding="cp949", dtype="object")
forestfire_occurs_add = pd.read_csv(DATA_PATH + "forestfire_occurs_concat.csv", encoding="cp949", dtype="object")

gangwon_SGG = gpd.read_file(DATA_PATH + "시군구_강원/LARD_ADM_SECT_SGG_42.shp", encoding='cp949')
gangwon_UMD = gpd.read_file(DATA_PATH + "읍면동(법정동)_강원/LSMD_ADM_SECT_UMD_42.shp", encoding='cp949')
gangwon_code = pd.read_csv(DATA_PATH + "gangwon_code.csv", encoding="cp949")

In [10]:
# BigQuery 에 RAW_DATA 저장

save_dataframe_to_bigquery(weather_stations, "RAW_DATA", "weather_stations", key_path)
save_dataframe_to_bigquery(weather_days, "RAW_DATA", "weather_days", key_path)
save_dataframe_to_bigquery(forestfire_occurs, "RAW_DATA", "forestfire_occurs", key_path)
save_dataframe_to_bigquery(forestfire_occurs_add, "RAW_DATA", "forestfire_occurs_add", key_path)
save_dataframe_to_bigquery(gangwon_code, "RAW_DATA", "gangwon_code", key_path)

save_geodataframe_to_bigquery(gangwon_SGG, "RAW_DATA", "gangwon_SGG", key_path)
save_geodataframe_to_bigquery(gangwon_UMD, "RAW_DATA", "gangwon_UMD", key_path)

Data inserted into table weather_stations successfully.
Data inserted into table weather_days successfully.
Data inserted into table forestfire_occurs successfully.
Data inserted into table forestfire_occurs_add successfully.
Data inserted into table gangwon_code successfully.
Data inserted into table gangwon_SGG successfully.
Data inserted into table gangwon_UMD successfully.


# PREPROCESSING_DATA 처리

In [11]:
# BigQuery 에 RAW_DATA Load

weather_stations = get_dataframe_from_bigquery("RAW_DATA", "weather_stations", key_path).sort_values(["stnId"])
weather_days = get_dataframe_from_bigquery("RAW_DATA", "weather_days", key_path).sort_values(["stnId", "tm"])
forestfire_occurs = get_dataframe_from_bigquery("RAW_DATA", "forestfire_occurs", key_path).sort_values(["objt_id", "occu_date"])
forestfire_occurs_add = get_dataframe_from_bigquery("RAW_DATA", "forestfire_occurs_add", key_path).sort_values(["objt_id", "occu_date"])
gangwon_code = get_dataframe_from_bigquery("RAW_DATA", "gangwon_code", key_path).sort_values(["code"])

gangwon_SGG = get_geodataframe_from_bigquery("RAW_DATA", "gangwon_SGG", key_path).sort_values(["ADM_SECT_C", "SGG_NM"])
gangwon_UMD = get_geodataframe_from_bigquery("RAW_DATA", "gangwon_UMD", key_path).sort_values(["EMD_CD"])

#### 기상관측소 정보를 통해 현재 운영중인 강원도 지역의 기상관측소 찾기

In [12]:
weather_stations = weather_stations[weather_stations["stnAddress"].str.contains("강원도") & weather_stations["endDate"].isna()]
weather_stations = weather_stations.reset_index(drop=True)
weather_stations

Unnamed: 0,stnId,startDate,endDate,stnNm,stnAddress,stnAdministrative,stnLatitude,stnLongitude,elevation,barometer,thermometer,anemometer,raingauge
0,90,1968-01-01,,속초,강원도 고성군토성면 봉포5길9 속초자동기상관측소,속초기상대(90),38.2509,128.5647,17.53,18.73,1.7,10.0,1.4
1,93,2016-10-01,,북춘천,강원도 춘천시신북읍 산천리264(장본1길 12) 춘천기상대,춘천기상대(101),37.9474,127.7544,95.78,96.78,1.5,10.0,1.4
2,95,1988-01-01,,철원,강원도 철원군갈말읍 명성로179번길 26 철원자동기상관측소,춘천기상대(101),38.1479,127.3042,155.48,156.98,1.8,13.0,1.5
3,100,2006-11-07,,대관령,강원도 평창군대관령면 경강로5372 대관령자동기상관측소,대관령기상대(100),37.6771,128.7183,772.43,773.43,1.7,10.0,1.4
4,101,1966-01-01,,춘천,강원도 춘천시충열로 91번길12 춘천자동기상관측소,춘천기상대(101),37.9026,127.7357,75.82,77.05,1.5,10.0,1.4
5,104,2008-07-28,,북강릉,강원도 강릉시사천면 과학단지로130 강원지방기상청,강릉(구 105),37.8046,128.8554,75.24,76.67,1.7,10.0,1.4
6,105,1911-10-03,,강릉,강원도 강릉시용강동 63-20강릉자동기상관측소,강릉(구 105),37.7515,128.891,27.12,28.22,1.7,10.0,0.5
7,106,1992-05-01,,동해,강원도 동해시중앙로 31동해자동기상관측소,동해기상대(106),37.5071,129.1243,40.46,41.66,1.7,10.0,1.4
8,114,1971-09-06,,원주,강원도 원주시단구로 159원주자동기상관측소,원주기상대(114),37.3375,127.9466,150.11,151.11,1.7,14.0,1.4
9,121,1994-12-01,,영월,강원도 영월군영월읍 영월로 1894-25 영월자동기상관측소,영월기상대(121),37.1813,128.4574,240.54,242.05,1.7,10.0,1.4


#### 강원도 지역코드 데이터 전처리

In [13]:
gangwon_code["code"] = gangwon_code["code"].astype(str).str[:-2]
gangwon_code['code'] = gangwon_code['code'].drop_duplicates()
gangwon_code = gangwon_code[~gangwon_code["code"].isna()].rename(columns={"code":"EMD_CD"})
gangwon_code

Unnamed: 0,EMD_CD,address
0,42000000,강원도
1,42110000,강원도 춘천시
2,42110101,강원도 춘천시 봉의동
3,42110102,강원도 춘천시 요선동
4,42110103,강원도 춘천시 낙원동
...,...,...
1367,42830310,강원도 양양군 서면
1388,42830320,강원도 양양군 손양면
1413,42830330,강원도 양양군 현북면
1428,42830340,강원도 양양군 현남면


#### 강원도 지역 12 분할 (참고 : 강원지방기상청 관할 구역 지도)

In [14]:
# 강원북부내륙: 철원, 화천, 양구(1), 인제(1) 
# 강원중부내륙: 춘천, 홍천(1)
# 강원남부내륙: 원주, 횡성, 영월, 평창(1), 정선(1)
# 강원북부영서산지: 양구(2), 인제(2)
# 강원중부영서산지: 홍천(2), 평창(2)
# 강원남부영서산지: 정선(2)
# 강원북부영동산지: 고성(2), 속초(2), 양양(2)
# 강원중부영동산지: 강릉(2)
# 강원남부영동산지: 동해(2), 삼척(2), 태백
# 강원북부해안: 고성(1), 속초(1), 양양(1)
# 강원중부해안: 강릉(1)
# 강원남부해안: 동해(1), 삼척(1)

gangwon_sample = pd.merge(gangwon_UMD, gangwon_code, on="EMD_CD")

강원북부내륙 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("철원군"))|
    (gangwon_sample["address"].str.contains("화천군"))|
    (gangwon_sample["address"].str.contains("양구군") & gangwon_sample["address"].str.contains("방산면"))|
    (gangwon_sample["address"].str.contains("양구군") & gangwon_sample["address"].str.contains("동면"))|
    (gangwon_sample["address"].str.contains("양구군") & gangwon_sample["address"].str.contains("국토정중앙면"))|
    (gangwon_sample["address"].str.contains("양구군") & gangwon_sample["address"].str.contains("양구읍"))|
    (gangwon_sample["address"].str.contains("인제군") & gangwon_sample["address"].str.contains("인제군 남면"))|
    (gangwon_sample["address"].str.contains("인제군") & gangwon_sample["address"].str.contains("인제읍"))
    ]["geometry"].unary_union)

강원중부내륙 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("춘천시"))|
    (gangwon_sample["address"].str.contains("홍천군") & ~gangwon_sample["address"].str.contains("내면"))
    ]["geometry"].unary_union)

강원남부내륙 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("원주시"))|
    (gangwon_sample["address"].str.contains("횡성군"))|
    (gangwon_sample["address"].str.contains("영월군"))|
    (gangwon_sample["address"].str.contains("평창군") & ~gangwon_sample["address"].str.contains("대관령면") & ~gangwon_sample["address"].str.contains("진부면"))|
    (gangwon_sample["address"].str.contains("정선군") & gangwon_sample["address"].str.contains("남면"))|
    (gangwon_sample["address"].str.contains("정선군") & gangwon_sample["address"].str.contains("신동읍"))|
    (gangwon_sample["address"].str.contains("정선군") & gangwon_sample["address"].str.contains("정선읍"))
    ]["geometry"].unary_union)

강원북부영서산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("양구군") & gangwon_sample["address"].str.contains("해안면"))|
    (gangwon_sample["address"].str.contains("인제군") & gangwon_sample["address"].str.contains("상남면"))|
    (gangwon_sample["address"].str.contains("인제군") & gangwon_sample["address"].str.contains("서화면"))|
    (gangwon_sample["address"].str.contains("인제군") & gangwon_sample["address"].str.contains("기린면"))|
    (gangwon_sample["address"].str.contains("인제군") & gangwon_sample["address"].str.contains("북면"))
    ]["geometry"].unary_union)

강원중부영서산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("홍천군") & gangwon_sample["address"].str.contains("내면"))|
    (gangwon_sample["address"].str.contains("평창군") & gangwon_sample["address"].str.contains("대관령면"))|
    (gangwon_sample["address"].str.contains("평창군") & gangwon_sample["address"].str.contains("진부면"))
    ]["geometry"].unary_union)

강원남부영서산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("정선군") & ~gangwon_sample["address"].str.contains("남면") & ~gangwon_sample["address"].str.contains("신동읍") & ~gangwon_sample["address"].str.contains("정선읍"))
    ]["geometry"].unary_union)

강원북부영동산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("고성군") & gangwon_sample["address"].str.contains("수동면"))|
    (gangwon_sample["address"].str.contains("고성군") & gangwon_sample["address"].str.contains("간성읍"))|
    (gangwon_sample["address"].str.contains("속초시") & gangwon_sample["address"].str.contains("설악동"))|
    (gangwon_sample["address"].str.contains("속초시") & gangwon_sample["address"].str.contains("도문동"))|
    (gangwon_sample["address"].str.contains("속초시") & gangwon_sample["address"].str.contains("노학동"))|
    (gangwon_sample["address"].str.contains("양양군") & gangwon_sample["address"].str.contains("현북면"))|
    (gangwon_sample["address"].str.contains("양양군") & gangwon_sample["address"].str.contains("서면"))
    ]["geometry"].unary_union)

강원중부영동산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("강릉시") & gangwon_sample["address"].str.contains("연곡면"))|
    (gangwon_sample["address"].str.contains("강릉시") & gangwon_sample["address"].str.contains("왕산면"))|
    (gangwon_sample["address"].str.contains("강릉시") & gangwon_sample["address"].str.contains("성산면"))
    ]["geometry"].unary_union)

강원남부영동산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("동해시") & gangwon_sample["address"].str.contains("신흥동"))|
    (gangwon_sample["address"].str.contains("동해시") & gangwon_sample["address"].str.contains("비천동"))|
    (gangwon_sample["address"].str.contains("동해시") & gangwon_sample["address"].str.contains("달방동"))|
    (gangwon_sample["address"].str.contains("동해시") & gangwon_sample["address"].str.contains("이로동"))|
    (gangwon_sample["address"].str.contains("동해시") & gangwon_sample["address"].str.contains("이기동"))|
    (gangwon_sample["address"].str.contains("동해시") & gangwon_sample["address"].str.contains("삼화동"))|
    (gangwon_sample["address"].str.contains("삼척시") & gangwon_sample["address"].str.contains("하장면"))|
    (gangwon_sample["address"].str.contains("삼척시") & gangwon_sample["address"].str.contains("미로면"))|
    (gangwon_sample["address"].str.contains("삼척시") & gangwon_sample["address"].str.contains("신기면"))|
    (gangwon_sample["address"].str.contains("삼척시") & gangwon_sample["address"].str.contains("도계읍"))|
    (gangwon_sample["address"].str.contains("삼척시") & gangwon_sample["address"].str.contains("노곡면"))|
    (gangwon_sample["address"].str.contains("삼척시") & gangwon_sample["address"].str.contains("가곡면"))|
    (gangwon_sample["address"].str.contains("태백시"))
    ]["geometry"].unary_union)

강원북부해안 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("고성군") & ~gangwon_sample["address"].str.contains("수동면") & ~gangwon_sample["address"].str.contains("간성읍"))|
    (gangwon_sample["address"].str.contains("속초시") & ~gangwon_sample["address"].str.contains("설악동") & ~gangwon_sample["address"].str.contains("도문동") & ~gangwon_sample["address"].str.contains("도문동"))|
    (gangwon_sample["address"].str.contains("양양군") & ~gangwon_sample["address"].str.contains("현북면") & ~gangwon_sample["address"].str.contains("서면"))
    ]["geometry"].unary_union)

강원중부해안 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("강릉시") & ~gangwon_sample["address"].str.contains("연곡면") & ~gangwon_sample["address"].str.contains("왕산면") & ~gangwon_sample["address"].str.contains("성산면"))
    ]["geometry"].unary_union)

강원남부해안 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("동해시") & ~gangwon_sample["address"].str.contains("신흥동") & ~gangwon_sample["address"].str.contains("비천동") & ~gangwon_sample["address"].str.contains("달방동") & ~gangwon_sample["address"].str.contains("이로동") & ~gangwon_sample["address"].str.contains("이기동") & ~gangwon_sample["address"].str.contains("삼화동"))|
    (gangwon_sample["address"].str.contains("삼척시") & ~gangwon_sample["address"].str.contains("하장면") & ~gangwon_sample["address"].str.contains("미로면") & ~gangwon_sample["address"].str.contains("신기면") & ~gangwon_sample["address"].str.contains("도계읍") & ~gangwon_sample["address"].str.contains("노곡면") & ~gangwon_sample["address"].str.contains("가곡면"))
    ]["geometry"].unary_union)

In [15]:
w_regions = [강원북부내륙, 강원중부내륙, 강원남부내륙, 강원북부영서산지, 강원중부영서산지, 강원남부영서산지,
             강원북부영동산지, 강원중부영동산지, 강원남부영동산지, 강원북부해안, 강원중부해안, 강원남부해안]

강원북부내륙.crs = "EPSG:4326"
강원중부내륙.crs = "EPSG:4326"
강원남부내륙.crs = "EPSG:4326"
강원북부영서산지.crs = "EPSG:4326"
강원중부영서산지.crs = "EPSG:4326"
강원남부영서산지.crs = "EPSG:4326"
강원북부영동산지.crs = "EPSG:4326"
강원중부영동산지.crs = "EPSG:4326"
강원남부영동산지.crs = "EPSG:4326"
강원북부해안.crs = "EPSG:4326"
강원중부해안.crs = "EPSG:4326"
강원남부해안.crs = "EPSG:4326"

map = folium.Map(location=[37.55, 128], zoom_start=8)

# 기상 관측소 위치 표시
for index, row in weather_stations.iterrows():
    popup = folium.Popup(row['stnNm'] + ' 기상 관측소', max_width=300)
    folium.Marker(
        location=[row['stnLatitude'], row['stnLongitude']],
        popup=popup,
        icon=folium.Icon(icon='info-sign', color='blue'),
    ).add_to(map)
    
region_colors = {
    '강원북부내륙': 'blue',
    '강원중부내륙': 'red',
    '강원남부내륙': 'green',
    '강원북부영서산지': 'orange',
    '강원중부영서산지': 'purple',
    '강원남부영서산지': 'pink',
    '강원북부영동산지': 'yellow',
    '강원중부영동산지': 'cyan',
    '강원남부영동산지': 'magenta',
    '강원북부해안': 'gray',
    '강원중부해안': 'brown',
    '강원남부해안': 'olive'
}

# Add the regions to the map with different colors
folium.GeoJson(강원북부내륙, style_function=lambda feature: {'fillColor': region_colors['강원북부내륙'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원중부내륙, style_function=lambda feature: {'fillColor': region_colors['강원중부내륙'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원남부내륙, style_function=lambda feature: {'fillColor': region_colors['강원남부내륙'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원북부영서산지, style_function=lambda feature: {'fillColor': region_colors['강원북부영서산지'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원중부영서산지, style_function=lambda feature: {'fillColor': region_colors['강원중부영서산지'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원남부영서산지, style_function=lambda feature: {'fillColor': region_colors['강원남부영서산지'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원북부영동산지, style_function=lambda feature: {'fillColor': region_colors['강원북부영동산지'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원중부영동산지, style_function=lambda feature: {'fillColor': region_colors['강원중부영동산지'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원남부영동산지, style_function=lambda feature: {'fillColor': region_colors['강원남부영동산지'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원북부해안, style_function=lambda feature: {'fillColor': region_colors['강원북부해안'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원중부해안, style_function=lambda feature: {'fillColor': region_colors['강원중부해안'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원남부해안, style_function=lambda feature: {'fillColor': region_colors['강원남부해안'], 'color': 'black'}).add_to(map)

map

#### 강원도 지역 9 분할 (참고 : 논문 및 회의)
- 강원도 지역을 12 분할 했을 때 기상관측소 위치 정보가 없는 지역이 있다고 판단하여 회의를 통해 재분할

In [16]:
# 강원북부내륙: 철원군, 화천군
# 강원중부내륙: 춘천시, 홍천군(~내면)
# 강원남부내륙: 원주시, 횡성군
# 강원북부산지: 양구군, 인제군
# 강원중부산지: 홍천군(내면), 평창군(대관령면, 진부면)
# 강원남부산지: 열월군, 정선군, 평창군(~대관령면, ~진부면)
# 강원북부해안: 고성군, 속초시, 양양군
# 강원중부해안: 강릉시
# 강원남부해안: 동해시, 삼척시, 태백시

gangwon_sample = pd.merge(gangwon_UMD, gangwon_code, on="EMD_CD")

강원북부내륙 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("철원군"))|
    (gangwon_sample["address"].str.contains("화천군"))
    ]["geometry"].unary_union)

강원중부내륙 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("춘천시"))|
    (gangwon_sample["address"].str.contains("홍천군") & ~gangwon_sample["address"].str.contains("내면"))
    ]["geometry"].unary_union)

강원남부내륙 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("원주시"))|
    (gangwon_sample["address"].str.contains("횡성군"))
    ]["geometry"].unary_union)

강원북부산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("양구군"))|
    (gangwon_sample["address"].str.contains("인제군"))
    ]["geometry"].unary_union)

강원중부산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("홍천군") & gangwon_sample["address"].str.contains("내면"))|
    (gangwon_sample["address"].str.contains("평창군") & gangwon_sample["address"].str.contains("대관령면"))|
    (gangwon_sample["address"].str.contains("평창군") & gangwon_sample["address"].str.contains("진부면"))
    ]["geometry"].unary_union)

강원남부산지 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("영월군"))|
    (gangwon_sample["address"].str.contains("정선군"))|
    (gangwon_sample["address"].str.contains("평창군") & ~gangwon_sample["address"].str.contains("대관령면") & ~gangwon_sample["address"].str.contains("진부면"))
    ]["geometry"].unary_union)

강원북부해안 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("고성군"))|
    (gangwon_sample["address"].str.contains("속초시"))|
    (gangwon_sample["address"].str.contains("양양군"))
    ]["geometry"].unary_union)

강원중부해안 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("강릉시"))
    ]["geometry"].unary_union)

강원남부해안 = gpd.GeoSeries(gangwon_sample[
    (gangwon_sample["address"].str.contains("동해시"))|
    (gangwon_sample["address"].str.contains("삼척시"))|
    (gangwon_sample["address"].str.contains("태백시"))
    ]["geometry"].unary_union)

In [17]:
w_regions = [강원북부내륙, 강원중부내륙, 강원남부내륙,
             강원북부산지, 강원중부산지, 강원남부산지,
             강원북부해안, 강원중부해안, 강원남부해안]

강원북부내륙.crs = "EPSG:4326"
강원중부내륙.crs = "EPSG:4326"
강원남부내륙.crs = "EPSG:4326"
강원북부산지.crs = "EPSG:4326"
강원중부산지.crs = "EPSG:4326"
강원남부산지.crs = "EPSG:4326"
강원북부해안.crs = "EPSG:4326"
강원중부해안.crs = "EPSG:4326"
강원남부해안.crs = "EPSG:4326"

map = folium.Map(location=[37.55, 128], zoom_start=8)

# 기상 관측소 위치 표시
for index, row in weather_stations.iterrows():
    popup = folium.Popup(row['stnNm'] + ' 기상 관측소', max_width=300)
    folium.Marker(
        location=[row['stnLatitude'], row['stnLongitude']],
        popup=popup,
        icon=folium.Icon(icon='info-sign', color='red'),
    ).add_to(map)

    
region_colors = {
    '강원북부내륙': 'blue',
    '강원중부내륙': 'red',
    '강원남부내륙': 'green',
    '강원북부산지': 'orange',
    '강원중부산지': 'purple',
    '강원남부산지': 'black',
    '강원북부해안': 'gray',
    '강원중부해안': 'brown',
    '강원남부해안': 'magenta'
}

# Add the regions to the map with different colors
folium.GeoJson(강원북부내륙, style_function=lambda feature: {'fillColor': region_colors['강원북부내륙'], 'color': 'blue'}).add_to(map)
folium.GeoJson(강원중부내륙, style_function=lambda feature: {'fillColor': region_colors['강원중부내륙'], 'color': 'red'}).add_to(map)
folium.GeoJson(강원남부내륙, style_function=lambda feature: {'fillColor': region_colors['강원남부내륙'], 'color': 'green'}).add_to(map)
folium.GeoJson(강원북부해안, style_function=lambda feature: {'fillColor': region_colors['강원북부해안'], 'color': 'gray'}).add_to(map)
folium.GeoJson(강원중부해안, style_function=lambda feature: {'fillColor': region_colors['강원중부해안'], 'color': 'brown'}).add_to(map)
folium.GeoJson(강원남부해안, style_function=lambda feature: {'fillColor': region_colors['강원남부해안'], 'color': 'magenta'}).add_to(map)
folium.GeoJson(강원남부산지, style_function=lambda feature: {'fillColor': region_colors['강원남부산지'], 'color': 'black'}).add_to(map)
folium.GeoJson(강원북부산지, style_function=lambda feature: {'fillColor': region_colors['강원북부산지'], 'color': 'orange'}).add_to(map)
folium.GeoJson(강원중부산지, style_function=lambda feature: {'fillColor': region_colors['강원중부산지'], 'color': 'purple'}).add_to(map)

map

#### 강원도 지역 9 분할 GeoDataFrame 생성

In [18]:
gangwon_regions = {
    'w_regions': ['강원북부내륙', '강원중부내륙', '강원남부내륙', '강원북부산지', '강원중부산지', '강원남부산지', '강원북부해안', '강원중부해안', '강원남부해안'],
    'geometry': [
        gangwon_sample[(gangwon_sample["address"].str.contains("철원군")) | (gangwon_sample["address"].str.contains("화천군"))]['geometry'].unary_union,
        gangwon_sample[(gangwon_sample["address"].str.contains("춘천시")) | (gangwon_sample["address"].str.contains("홍천군") & ~gangwon_sample["address"].str.contains("내면"))]['geometry'].unary_union,
        gangwon_sample[(gangwon_sample["address"].str.contains("원주시")) | (gangwon_sample["address"].str.contains("횡성군"))]['geometry'].unary_union,
        gangwon_sample[(gangwon_sample["address"].str.contains("양구군")) | (gangwon_sample["address"].str.contains("인제군"))]['geometry'].unary_union,
        gangwon_sample[(gangwon_sample["address"].str.contains("홍천군") & gangwon_sample["address"].str.contains("내면")) | (gangwon_sample["address"].str.contains("평창군") & gangwon_sample["address"].str.contains("대관령면")) | (gangwon_sample["address"].str.contains("평창군") & gangwon_sample["address"].str.contains("진부면"))]['geometry'].unary_union,
        gangwon_sample[(gangwon_sample["address"].str.contains("영월군")) | (gangwon_sample["address"].str.contains("정선군")) | (gangwon_sample["address"].str.contains("평창군") & ~gangwon_sample["address"].str.contains("대관령면") & ~gangwon_sample["address"].str.contains("진부면"))]['geometry'].unary_union,
        gangwon_sample[(gangwon_sample["address"].str.contains("고성군")) | (gangwon_sample["address"].str.contains("속초시")) | (gangwon_sample["address"].str.contains("양양군"))]['geometry'].unary_union,
        gangwon_sample[gangwon_sample["address"].str.contains("강릉시")]['geometry'].unary_union,
        gangwon_sample[(gangwon_sample["address"].str.contains("동해시")) | (gangwon_sample["address"].str.contains("삼척시")) | (gangwon_sample["address"].str.contains("태백시"))]['geometry'].unary_union
    ]
}
gangwon_regions = gpd.GeoDataFrame(gangwon_regions, geometry='geometry', crs='EPSG:4326')

In [19]:
save_geodataframe_to_bigquery(gangwon_regions, "PREPROCESSING_DATA", "gangwon_regions", key_path)

Data inserted into table gangwon_regions successfully.


In [20]:
gangwon_regions = get_geodataframe_from_bigquery("PREPROCESSING_DATA", "gangwon_regions", key_path)

#### 강원도 지역 9 분할 관련 파생 변수 "w_regions" 생성
- 기상관측소 위치 정보를 이용

In [21]:
weather_stations["w_regions"] = ""

for i in range(len(weather_stations)):
    if "철원군" in weather_stations["stnAddress"][i] or "화천군" in weather_stations["stnAddress"][i]:
        weather_stations["w_regions"][i] = "강원북부내륙"

    elif "춘천시" in weather_stations["stnAddress"][i] or ("홍천군" in weather_stations["stnAddress"][i] and "내면" not in weather_stations["stnAddress"][i]):
        weather_stations["w_regions"][i] = "강원중부내륙"

    elif "원주시" in weather_stations["stnAddress"][i] or "횡성군" in weather_stations["stnAddress"][i]:
        weather_stations["w_regions"][i] = "강원남부내륙"

    elif "양구군" in weather_stations["stnAddress"][i] or "인제군" in weather_stations["stnAddress"][i]:
        weather_stations["w_regions"][i] = "강원북부산지"

    elif ("홍천군" in weather_stations["stnAddress"][i] and "내면" in weather_stations["stnAddress"][i]) or ("평창군" in weather_stations["stnAddress"][i] and "대관령면" in weather_stations["stnAddress"][i]) or ("평창군" in weather_stations["stnAddress"][i] and "진부면" in weather_stations["stnAddress"][i]):
        weather_stations["w_regions"][i] = "강원중부산지"

    elif "영월군" in weather_stations["stnAddress"][i] or "정선군" in weather_stations["stnAddress"][i] or ("평창군" in weather_stations["stnAddress"][i] and "대관령면" not in weather_stations["stnAddress"][i] and "진부면" not in weather_stations["stnAddress"][i]):
        weather_stations["w_regions"][i] = "강원남부산지"

    elif "고성군" in weather_stations["stnAddress"][i] or "속초시" in weather_stations["stnAddress"][i] or "양양군" in weather_stations["stnAddress"][i]:
        weather_stations["w_regions"][i] = "강원북부해안"

    elif "강릉시" in weather_stations["stnAddress"][i]:
        weather_stations["w_regions"][i] = "강원중부해안"

    elif "동해시" in weather_stations["stnAddress"][i] or "삼척시" in weather_stations["stnAddress"][i] or "태백시" in weather_stations["stnAddress"][i]:
        weather_stations["w_regions"][i] = "강원남부해안"

weather_stations = weather_stations.drop(["startDate", "endDate", "stnNm", "stnAdministrative", "elevation", "barometer", "thermometer", "anemometer", "raingauge"], axis=1)

weather_stations

Unnamed: 0,stnId,stnAddress,stnLatitude,stnLongitude,w_regions
0,90,강원도 고성군토성면 봉포5길9 속초자동기상관측소,38.2509,128.5647,강원북부해안
1,93,강원도 춘천시신북읍 산천리264(장본1길 12) 춘천기상대,37.9474,127.7544,강원중부내륙
2,95,강원도 철원군갈말읍 명성로179번길 26 철원자동기상관측소,38.1479,127.3042,강원북부내륙
3,100,강원도 평창군대관령면 경강로5372 대관령자동기상관측소,37.6771,128.7183,강원중부산지
4,101,강원도 춘천시충열로 91번길12 춘천자동기상관측소,37.9026,127.7357,강원중부내륙
5,104,강원도 강릉시사천면 과학단지로130 강원지방기상청,37.8046,128.8554,강원중부해안
6,105,강원도 강릉시용강동 63-20강릉자동기상관측소,37.7515,128.891,강원중부해안
7,106,강원도 동해시중앙로 31동해자동기상관측소,37.5071,129.1243,강원남부해안
8,114,강원도 원주시단구로 159원주자동기상관측소,37.3375,127.9466,강원남부내륙
9,121,강원도 영월군영월읍 영월로 1894-25 영월자동기상관측소,37.1813,128.4574,강원남부산지


In [22]:
save_dataframe_to_bigquery(weather_stations, "PREPROCESSING_DATA", "weather_stations", key_path)

Data inserted into table weather_stations successfully.


#### 기상 데이터 전처리
- 기상 관측소 정보 결합
- 불필요한 컬럼 제거
- 결측값 처리
    + 강수 결측값의 경우 도메인 지식을 활용 하여 0 으로 처리
    + 기온 / 습도 결측값의 경우 계절성 패턴이 있다고 판단 후 보간 처리
    + 풍속 결측값의 경우 특정 패턴이 보이지 않아 평균으로 처리
- 파생변수 생성
    + 실효습도
    + 최근 7일 강수량 합계
    + 최근 7일 최대 풍속
    + 강수 여부
    + 일 강수량이 0 인 날의 연속된 일 수

In [23]:
weather_days = weather_days.merge(weather_stations, on='stnId')

In [24]:
weather_days = weather_days.drop(['stnNm', 'minTaHrmt', 'maxTaHrmt', 'mi10MaxRn', 'mi10MaxRnHrmt', 'hr1MaxRn', 'hr1MaxRnHrmt', 
                   'sumRnDur', 'hr24SumRws', 'maxWd', 'avgTd', 'avgPv', 'avgPa', 'maxPs', 'maxPsHrmt',
                   'minPs', 'minPsHrmt', 'avgPs', 'ssDur', 'sumSsHr', 'hr1MaxIcsrHrmt', 'hr1MaxIcsr',
                   'sumGsr', 'ddMefs', 'ddMefsHrmt', 'ddMes', 'ddMesHrmt', 'sumDpthFhsc', 'avgTs', 'minTg',
                   'avgCm5Te', 'avgCm10Te', 'avgCm20Te', 'avgCm30Te', 'avgM05Te', 'avgM10Te', 'avgM15Te',
                   'avgM30Te', 'avgM50Te', 'sumLrgEv', 'sumSmlEv', 'n99Rn', 'iscs', 'sumFogDur', 
                   'maxInsWsWd', 'maxInsWsHrmt', 'maxWsWd', 'maxWsHrmt', 'minRhmHrmt', 'avgTca', 'avgLmac'], axis=1)

In [25]:
# 강수 결측값 0 처리
weather_days['sumRn'].fillna(0, inplace=True)

# 기온 결측값 선형 보간 처리 : 계절성 패턴이 있다고 판단
weather_days["avgTa"] = weather_days["avgTa"].interpolate(method="linear")
weather_days["minTa"] = weather_days["minTa"].interpolate(method="linear")
weather_days["maxTa"] = weather_days["maxTa"].interpolate(method="linear")

# 풍속 결측값 평균 처리
weather_days["maxInsWs"] = weather_days["maxInsWs"].fillna(weather_days["maxInsWs"].mean())
weather_days["maxWs"] = weather_days["maxWs"].fillna(weather_days["maxWs"].mean())
weather_days["avgWs"] = weather_days["avgWs"].fillna(weather_days["avgWs"].mean())

# 상대습도 결측값 선형 보간 처리 : 계절성 패턴이 있다고 판단
weather_days["minRhm"] = weather_days["minRhm"].interpolate(method="linear")
weather_days["avgRhm"] = weather_days["avgRhm"].interpolate(method="linear")

In [26]:
# stnId 별로 데이터프레임 분할
dfs = []
for stn_id, group in weather_days.groupby("stnId"):
    # Shift된 열에 처음 값을 추가
    group["h1"] = group["avgRhm"].shift(1)
    group.loc[group.index[0], "h1"] = group["avgRhm"].iloc[0]
    
    group["h2"] = group["h1"].shift(1)
    group.loc[group.index[0], "h2"] = group["avgRhm"].iloc[0]
    
    group["h3"] = group["h2"].shift(1)
    group.loc[group.index[0], "h3"] = group["avgRhm"].iloc[0]
    
    group["h4"] = group["h3"].shift(1)
    group.loc[group.index[0], "h4"] = group["avgRhm"].iloc[0]
    
    # 실효습도
    r = 0.7
    group["effRhm"] = ((group["avgRhm"]) + (r**1)*(group["h1"]) + (r**2)*(group["h2"]) + (r**3)*(group["h3"]) + (r**4)*(group["h4"])) * (1-r)
    
    # 6일전부터 기준일까지 7일간 강수량(mm)
    window_size = 7
    group['sumRn7'] = group['sumRn'].rolling(window_size, min_periods=1).sum()
    
    # 강수 여부, 비 옴 1 / 비 안옴 0
    group['Rntf'] = group['sumRn'].apply(lambda x: 1 if x > 0 else 0)

    # 6일전부터 기준일까지 7일간 최대풍속
    group['maxwind7'] = group['maxWs'].rolling(window_size, min_periods=1).max()

    # 비가 오지 않은 날의 일수를 저장할 새로운 칼럼을 추가
    group['noRn'] = 0

    # 일강수량이 0인 날의 연속된 일수를 계산하여 noRn 칼럼에 저장
    count = 0
    for i, value in enumerate(group['sumRn']):
        if value == 0:
            count += 1
        else:
            group.loc[group.index[i], 'noRn'] = count
            count = 0

    dfs.append(group)

# 데이터프레임 합치기
weather_days = pd.concat(dfs)
weather_days

Unnamed: 0,stnId,tm,avgTa,minTa,maxTa,sumRn,maxInsWs,maxWs,avgWs,minRhm,...,w_regions,h1,h2,h3,h4,effRhm,sumRn7,Rntf,maxwind7,noRn
0,90,2013-01-01,-2.0,-6.7,2.5,0.0,11.1,6.2,2.5,38.0,...,강원북부해안,52.3,52.3,52.3,52.3,43.509939,0.0,0,6.2,0
1,90,2013-01-02,-7.0,-9.8,-1.3,0.0,14.2,7.3,3.9,19.0,...,강원북부해안,52.3,52.3,52.3,52.3,36.249939,0.0,0,7.3,0
2,90,2013-01-03,-7.6,-11.5,-3.1,0.0,9.3,5.0,2.4,18.0,...,강원북부해안,28.1,52.3,52.3,52.3,31.857939,0.0,0,7.3,0
3,90,2013-01-04,-5.2,-10.4,0.3,0.0,7.2,4.9,2.2,25.0,...,강원북부해안,30.4,28.1,52.3,52.3,30.553539,0.0,0,7.3,0
4,90,2013-01-05,-1.8,-7.4,1.9,0.0,10.2,5.8,2.3,25.0,...,강원북부해안,36.3,30.4,28.1,52.3,29.850459,0.0,0,7.3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51896,217,2023-05-29,19.5,16.6,25.6,19.4,4.9,2.4,0.5,60.0,...,강원남부산지,93.5,81.9,51.4,53.4,66.909762,39.1,1,5.6,0
51897,217,2023-05-30,18.7,12.7,24.3,0.0,7.7,3.6,1.5,49.0,...,강원남부산지,87.0,93.5,81.9,51.4,65.444352,38.4,0,5.6,0
51898,217,2023-05-31,18.4,9.9,27.4,0.0,9.2,5.7,1.0,33.0,...,강원남부산지,71.0,87.0,93.5,81.9,62.419407,38.4,0,5.7,0
51899,217,2023-06-01,19.6,13.2,26.6,0.0,7.3,4.8,0.7,46.0,...,강원남부산지,64.0,71.0,87.0,93.5,60.864105,38.4,0,5.7,0


In [27]:
save_dataframe_to_bigquery(weather_days, "PREPROCESSING_DATA", "weather_days", key_path)

Data inserted into table weather_days successfully.


#### 산불 발생 데이터 전처리
- 행정안전부 산불 발생 데이터(2013~2021) 과 산림청 산불 발생 데이터(2022) 결합
- 강원도 지역 9 분할 관련 파생 변수 "w_regions" 생성
- 불필요한 컬럼 제거
- 주소 정보를 통해 위도, 경도 위치 정보를 google geocode 를 통해 적재

In [28]:
forestfire_occurs = pd.concat([forestfire_occurs, forestfire_occurs_add], axis=0)
forestfire_occurs = forestfire_occurs[forestfire_occurs["adres"].str.contains("강원도")].reset_index(drop=True)
forestfire_occurs["occu_date"] = pd.to_datetime(forestfire_occurs['occu_date'], format = "%Y-%m-%d").dt.date

forestfire_occurs = forestfire_occurs.drop([
    'occu_year', 'occu_mt', 'occu_de', 'occu_tm', 'occu_day',
    'end_year', 'end_mt', 'end_de', 'end_tm', 'rn_adres', 'resn',
    'ctprvn_cd', 'sgg_cd', 'emd_cd', "x", "y"], axis=1)

add_lat_lng_to_dataframe(forestfire_occurs, 'adres', get_service_key(servicekey_path, "google_serviceKey"))

forestfire_occurs["w_regions"] = ""

for i in range(len(forestfire_occurs)):
    if "철원군" in forestfire_occurs["adres"][i] or "화천군" in forestfire_occurs["adres"][i]:
        forestfire_occurs["w_regions"][i] = "강원북부내륙"

    elif "춘천시" in forestfire_occurs["adres"][i] or ("홍천군" in forestfire_occurs["adres"][i] and "내면" not in forestfire_occurs["adres"][i]):
        forestfire_occurs["w_regions"][i] = "강원중부내륙"

    elif "원주시" in forestfire_occurs["adres"][i] or "횡성군" in forestfire_occurs["adres"][i]:
        forestfire_occurs["w_regions"][i] = "강원남부내륙"

    elif "양구군" in forestfire_occurs["adres"][i] or "인제군" in forestfire_occurs["adres"][i]:
        forestfire_occurs["w_regions"][i] = "강원북부산지"

    elif ("홍천군" in forestfire_occurs["adres"][i] and "내면" in forestfire_occurs["adres"][i]) or ("평창군" in forestfire_occurs["adres"][i] and "대관령면" in forestfire_occurs["adres"][i]) or ("평창군" in forestfire_occurs["adres"][i] and "진부면" in forestfire_occurs["adres"][i]):
        forestfire_occurs["w_regions"][i] = "강원중부산지"

    elif "영월군" in forestfire_occurs["adres"][i] or "정선군" in forestfire_occurs["adres"][i] or ("평창군" in forestfire_occurs["adres"][i] and "대관령면" not in forestfire_occurs["adres"][i] and "진부면" not in forestfire_occurs["adres"][i]):
        forestfire_occurs["w_regions"][i] = "강원남부산지"

    elif "고성군" in forestfire_occurs["adres"][i] or "속초시" in forestfire_occurs["adres"][i] or "양양군" in forestfire_occurs["adres"][i]:
        forestfire_occurs["w_regions"][i] = "강원북부해안"

    elif "강릉시" in forestfire_occurs["adres"][i]:
        forestfire_occurs["w_regions"][i] = "강원중부해안"

    elif "동해시" in forestfire_occurs["adres"][i] or "삼척시" in forestfire_occurs["adres"][i] or "태백시" in forestfire_occurs["adres"][i]:
        forestfire_occurs["w_regions"][i] = "강원남부해안"
        
forestfire_occurs = forestfire_occurs.sort_values(["occu_date"])

In [29]:
save_dataframe_to_bigquery(forestfire_occurs, "PREPROCESSING_DATA", "forestfire_occurs", key_path)

Data inserted into table forestfire_occurs successfully.


# ANALSIS_DATA 처리

#### 분석 데이터 생성
- 전처리 된 weather_days 와 forestfire_occurs 를 통해 날짜와 지역이 겹치면 해당 지역에 산불 발생 처리
- 이 후 9 개의 데이터프레임 (테이블) 로 분석 데이터 관리

In [30]:
weather_days = get_dataframe_from_bigquery("PREPROCESSING_DATA", "weather_days", key_path).sort_values(["stnId", "tm"])
forestfire_occurs = get_dataframe_from_bigquery("PREPROCESSING_DATA", "forestfire_occurs", key_path).sort_values(["objt_id", "occu_date"])

In [31]:
weather_days

Unnamed: 0,stnId,tm,avgTa,minTa,maxTa,sumRn,maxInsWs,maxWs,avgWs,minRhm,...,w_regions,h1,h2,h3,h4,effRhm,sumRn7,Rntf,maxwind7,noRn
6644,90,2013-01-01,-2.0,-6.7,2.5,0.0,11.1,6.2,2.5,38.0,...,강원북부해안,52.3,52.3,52.3,52.3,43.509939,0.0,0,6.2,0
6645,90,2013-01-02,-7.0,-9.8,-1.3,0.0,14.2,7.3,3.9,19.0,...,강원북부해안,52.3,52.3,52.3,52.3,36.249939,0.0,0,7.3,0
6646,90,2013-01-03,-7.6,-11.5,-3.1,0.0,9.3,5.0,2.4,18.0,...,강원북부해안,28.1,52.3,52.3,52.3,31.857939,0.0,0,7.3,0
6647,90,2013-01-04,-5.2,-10.4,0.3,0.0,7.2,4.9,2.2,25.0,...,강원북부해안,30.4,28.1,52.3,52.3,30.553539,0.0,0,7.3,0
6648,90,2013-01-05,-1.8,-7.4,1.9,0.0,10.2,5.8,2.3,25.0,...,강원북부해안,36.3,30.4,28.1,52.3,29.850459,0.0,0,7.3,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6643,217,2023-05-29,19.5,16.6,25.6,19.4,4.9,2.4,0.5,60.0,...,강원남부산지,93.5,81.9,51.4,53.4,66.909762,39.1,1,5.6,0
43428,217,2023-05-30,18.7,12.7,24.3,0.0,7.7,3.6,1.5,49.0,...,강원남부산지,87.0,93.5,81.9,51.4,65.444352,38.4,0,5.6,0
43429,217,2023-05-31,18.4,9.9,27.4,0.0,9.2,5.7,1.0,33.0,...,강원남부산지,71.0,87.0,93.5,81.9,62.419407,38.4,0,5.7,0
43430,217,2023-06-01,19.6,13.2,26.6,0.0,7.3,4.8,0.7,46.0,...,강원남부산지,64.0,71.0,87.0,93.5,60.864105,38.4,0,5.7,0


In [32]:
forestfire_occurs

Unnamed: 0,objt_id,occu_date,adres,ar,amount,latitude,longitude,w_regions
803,1017,2014-04-16,강원도 춘천시 동면 만천리 산31-15,0.09,0,37.875054,127.788101,강원중부내륙
664,1019,2014-04-17,강원도 춘천시 동면 만천리 산30,0.03,0,37.874114,127.788125,강원중부내륙
156,102,2011-03-14,강원도 영월군 영월읍 정양리 산80,0.1,64,37.150817,128.520017,강원남부산지
311,1023,2014-04-20,강원도 홍천군 서면 동막리 산110-1,0.2,0,37.659113,127.561334,강원중부내륙
160,1028,2014-04-21,강원도 영월군 한반도면 광전리 산57,0.1,0,37.271407,128.350243,강원남부산지
...,...,...,...,...,...,...,...,...
470,,2022-12-04,강원도 고성군 거진 송강리 산40-8,0.9,20218,38.438462,128.388452,강원북부해안
636,,2022-12-05,강원도 강릉시 주문진읍 향호리 산216,0.02,1732,37.898943,128.797257,강원중부해안
34,,2022-12-14,강원도 삼척시 근덕 동막리 산526-1,5.2,141544,37.328813,129.231968,강원남부해안
369,,2022-12-17,강원도 고성군 죽왕 구성리 산34,0.3,9936,38.326925,128.472262,강원북부해안


In [33]:
weather_days["tm"] = pd.to_datetime(weather_days["tm"])
forestfire_occurs["occu_date"] =  pd.to_datetime(forestfire_occurs["occu_date"])

merged_df = pd.merge(weather_days, forestfire_occurs, left_on=["tm", "w_regions"], right_on=["occu_date", "w_regions"], how="left")
merged_df["fire_occur"] = merged_df["occu_date"].notnull().astype(int)
merged_df = merged_df.drop(["objt_id", "occu_date", "adres", "amount", "latitude", "longitude"], axis=1)

merged_df

Unnamed: 0,stnId,tm,avgTa,minTa,maxTa,sumRn,maxInsWs,maxWs,avgWs,minRhm,...,h2,h3,h4,effRhm,sumRn7,Rntf,maxwind7,noRn,ar,fire_occur
0,90,2013-01-01,-2.0,-6.7,2.5,0.0,11.1,6.2,2.5,38.0,...,52.3,52.3,52.3,43.509939,0.0,0,6.2,0,,0
1,90,2013-01-02,-7.0,-9.8,-1.3,0.0,14.2,7.3,3.9,19.0,...,52.3,52.3,52.3,36.249939,0.0,0,7.3,0,,0
2,90,2013-01-03,-7.6,-11.5,-3.1,0.0,9.3,5.0,2.4,18.0,...,52.3,52.3,52.3,31.857939,0.0,0,7.3,0,,0
3,90,2013-01-04,-5.2,-10.4,0.3,0.0,7.2,4.9,2.2,25.0,...,28.1,52.3,52.3,30.553539,0.0,0,7.3,0,,0
4,90,2013-01-05,-1.8,-7.4,1.9,0.0,10.2,5.8,2.3,25.0,...,30.4,28.1,52.3,29.850459,0.0,0,7.3,0,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52015,217,2023-05-29,19.5,16.6,25.6,19.4,4.9,2.4,0.5,60.0,...,81.9,51.4,53.4,66.909762,39.1,1,5.6,0,,0
52016,217,2023-05-30,18.7,12.7,24.3,0.0,7.7,3.6,1.5,49.0,...,93.5,81.9,51.4,65.444352,38.4,0,5.6,0,,0
52017,217,2023-05-31,18.4,9.9,27.4,0.0,9.2,5.7,1.0,33.0,...,87.0,93.5,81.9,62.419407,38.4,0,5.7,0,,0
52018,217,2023-06-01,19.6,13.2,26.6,0.0,7.3,4.8,0.7,46.0,...,71.0,87.0,93.5,60.864105,38.4,0,5.7,0,,0


In [34]:
merged_df[merged_df["fire_occur"] == 1]

Unnamed: 0,stnId,tm,avgTa,minTa,maxTa,sumRn,maxInsWs,maxWs,avgWs,minRhm,...,h2,h3,h4,effRhm,sumRn7,Rntf,maxwind7,noRn,ar,fire_occur
11,90,2013-01-12,3.5,-2.1,5.9,0.0,12.4,6.8,3.5,39.0,...,29.8,28.0,33.4,34.957602,0.0,0,6.8,0,0.1,1
66,90,2013-03-08,13.9,9.1,18.4,0.0,17.4,9.1,4.1,26.0,...,51.8,32.9,59.3,37.177389,0.4,0,10.0,0,1,1
74,90,2013-03-16,8.8,3.3,15.2,0.0,12.5,6.8,2.9,18.0,...,78.9,92.8,50.4,49.338732,18.9,0,8.0,0,0.2,1
116,90,2013-04-27,12.2,8.5,16.6,0.0,7.9,4.9,2.3,42.0,...,50.4,60.1,56.4,49.209582,1.5,0,9.0,0,0.3,1
127,90,2013-05-08,16.3,9.6,24.7,0.0,8.6,5.8,2.3,27.0,...,60.6,29.8,54.1,55.687443,5.0,0,6.7,0,0.01,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51641,217,2022-05-20,19.3,12.0,27.9,0.0,9.9,5.2,1.5,20.0,...,48.9,54.0,49.1,36.621573,0.0,0,9.1,0,1.2,1
51645,217,2022-05-24,22.3,13.8,32.6,0.0,6.8,3.4,0.8,15.0,...,63.4,57.1,43.3,45.293289,0.0,0,8.0,0,0.1,1
51654,217,2022-06-02,20.2,9.2,30.6,0.0,12.3,7.6,1.7,30.0,...,35.4,44.5,38.0,31.884990,0.0,0,8.2,0,0.32,1
51796,217,2022-10-22,12.6,7.3,20.4,0.0,7.3,3.3,1.3,25.0,...,68.1,68.4,66.0,54.338040,0.0,0,5.8,0,0.1,1


In [35]:
merged_df = merged_df.drop(['stnId', 'stnLatitude', 'stnLongitude', 'h1', 'h2', 'h3', 'h4'], axis=1)

averaged_df = merged_df.groupby(["w_regions", "tm"]).agg({
    "avgTa": "mean",
    "minTa": "mean",
    "maxTa": "mean",
    "sumRn": "mean",
    "maxInsWs": "mean",
    "maxWs": "mean",
    "avgWs": "mean",
    "minRhm": "mean",
    "avgRhm": "mean",
    "effRhm": "mean",
    "sumRn7": "mean",
    "Rntf": lambda x: int(np.any(x == 1)),
    "maxwind7": "mean",
    "noRn": "mean",
    "fire_occur": lambda x: int(np.any(x == 1))
}).reset_index()

# 소수점 2자리까지 출력
averaged_df = averaged_df.round({"avgTa": 2, "minTa": 2, "maxTa": 2, "sumRn": 2, "maxInsWs": 2, "maxWs": 2, "avgWs": 2, "minRhm": 2, "avgRhm": 2, "effRhm": 2, "sumRn7": 2})

# 컬럼 타입 설정
averaged_df["Rntf"] = averaged_df["Rntf"].astype(int)
averaged_df["fire_occur"] = averaged_df["fire_occur"].astype(int)

averaged_df

Unnamed: 0,w_regions,tm,avgTa,minTa,maxTa,sumRn,maxInsWs,maxWs,avgWs,minRhm,avgRhm,effRhm,sumRn7,Rntf,maxwind7,noRn,fire_occur
0,강원남부내륙,2013-01-01,-7.00,-13.80,-0.80,2.00,4.50,2.60,0.90,66.0,78.80,65.56,2.00,1,2.60,0.0,0
1,강원남부내륙,2013-01-02,-9.80,-15.80,-1.60,0.10,10.30,5.80,2.80,22.0,39.30,53.71,2.10,1,5.80,0.0,0
2,강원남부내륙,2013-01-03,-14.10,-19.20,-8.60,0.00,5.30,2.90,1.10,29.0,51.80,49.16,2.10,0,5.80,0.0,0
3,강원남부내륙,2013-01-04,-14.20,-20.30,-6.40,0.00,3.70,1.90,0.80,33.0,61.40,48.86,2.10,0,5.80,0.0,0
4,강원남부내륙,2013-01-05,-10.70,-19.10,-2.90,0.00,2.70,1.50,0.70,33.0,65.90,50.00,2.10,0,5.80,0.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34240,강원중부해안,2023-05-29,19.20,16.85,23.10,6.95,3.15,1.85,0.60,71.0,91.60,66.93,15.85,1,5.25,0.0,0
34241,강원중부해안,2023-05-30,17.65,14.45,21.70,0.30,5.70,3.50,1.30,69.0,87.35,70.70,16.15,1,5.25,0.0,0
34242,강원중부해안,2023-05-31,19.45,12.20,25.05,0.00,8.30,4.20,1.35,37.5,65.55,66.33,16.15,0,4.50,0.0,0
34243,강원중부해안,2023-06-01,23.05,18.65,28.20,0.00,5.90,2.90,1.35,42.0,59.95,60.83,16.15,0,4.50,0.0,0


In [36]:
averaged_df[averaged_df["fire_occur"] == 1]

Unnamed: 0,w_regions,tm,avgTa,minTa,maxTa,sumRn,maxInsWs,maxWs,avgWs,minRhm,avgRhm,effRhm,sumRn7,Rntf,maxwind7,noRn,fire_occur
81,강원남부내륙,2013-03-23,6.00,-1.30,14.2,0.0,6.20,3.80,1.30,24.0,61.50,51.32,18.00,0,6.10,0.0,1
127,강원남부내륙,2013-05-08,19.50,11.70,27.7,0.0,6.40,3.70,1.30,23.0,54.10,45.06,2.50,0,4.80,0.0,1
383,강원남부내륙,2014-01-19,-3.90,-10.70,2.4,0.0,4.50,2.50,1.00,31.0,54.60,51.38,0.00,0,5.40,0.0,1
422,강원남부내륙,2014-02-27,6.90,-1.10,15.1,0.0,7.10,4.20,1.60,35.0,61.90,53.38,0.00,0,4.20,0.0,1
426,강원남부내륙,2014-03-03,4.00,-3.00,11.7,0.0,6.40,4.00,1.00,18.0,46.60,45.86,0.00,0,4.80,0.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33872,강원중부해안,2022-05-26,21.30,15.40,25.8,0.0,11.35,5.15,2.65,25.0,43.35,39.08,0.00,0,6.20,0.0,1
33881,강원중부해안,2022-06-04,19.50,15.70,22.8,0.0,6.45,3.35,1.40,64.0,78.70,53.31,0.00,0,6.15,0.0,1
34055,강원중부해안,2022-11-25,15.50,8.25,20.7,0.0,21.15,10.95,3.85,20.5,36.30,48.95,19.55,0,10.95,0.0,1
34065,강원중부해안,2022-12-05,0.80,-1.80,6.1,0.0,6.30,3.70,1.70,17.5,31.00,28.77,2.45,0,6.75,0.0,1


In [37]:
save_dataframe_to_bigquery(averaged_df, "ANALSIS_DATA", "ANALSIS_DATA", key_path)

Data inserted into table ANALSIS_DATA successfully.


In [38]:
# "w_regions" 별로 데이터프레임 분할
dfs = []
for w_region, group in averaged_df.groupby("w_regions"):
    dfs.append(group)

In [39]:
region_dfs = {}  # 각각의 데이터프레임을 저장할 딕셔너리

region_mapping = {
    "강원북부내륙": "GangwonNorthInland",
    "강원중부내륙": "GangwonCentralInland",
    "강원남부내륙": "GangwonSouthInland",
    "강원북부산지": "GangwonNorthMount",
    "강원중부산지": "GangwonCentralMount",
    "강원남부산지": "GangwonSouthMount",
    "강원북부해안": "GangwonNorthCoast",
    "강원중부해안": "GangwonCentralCoast",
    "강원남부해안": "GangwonSouthCoast"
}

# "w_regions" 별로 데이터프레임 분할
for w_region, group in averaged_df.groupby("w_regions"):
    english_name = region_mapping[w_region]
    region_dfs[english_name] = group

dfs = {}

for region, df in region_dfs.items():
    dfs[region] = pd.DataFrame(df).reset_index(drop=True)
    save_dataframe_to_bigquery(dfs[region], "ANALSIS_DATA", region, key_path)

Data inserted into table GangwonSouthInland successfully.
Data inserted into table GangwonSouthMount successfully.
Data inserted into table GangwonSouthCoast successfully.
Data inserted into table GangwonNorthInland successfully.
Data inserted into table GangwonNorthMount successfully.
Data inserted into table GangwonNorthCoast successfully.
Data inserted into table GangwonCentralInland successfully.
Data inserted into table GangwonCentralMount successfully.
Data inserted into table GangwonCentralCoast successfully.
