## **1. Total imports**

In [1]:
## data
import pandas as pd
import numpy as np

## model
from sklearn.model_selection import train_test_split, KFold, GridSearchCV

## visualization
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio

## utilities
import os
import pickle
import warnings
import json

## 렌더링 설정
pio.templates.default = 'plotly_white'
pio.renderers.default = "vscode"

## warnings 처리
warnings.filterwarnings("ignore", category=UserWarning)

## **2. Data**

### **A. 데이터 불러오기**

In [2]:
housing_price = pd.read_csv("kc_house_data.csv")

## 전처리
df_preprocessed = housing_price.drop(["id", "date", "price"], axis = 1)\
.assign(date = pd.to_datetime(housing_price.date)).assign(price = housing_price.price)

## scores recoding
scoring_dict = dict()

### **B. 자료 분할**

In [3]:
def month_days_split(df_train : pd.DataFrame, df_test : pd.DataFrame) :
    """
    train/test set에서 `date` 열을 월별과 일별로 따로 분석하기 위한 함수
    """
    test_month = df_test.date.dt.month.astype(str)
    test_days = df_test.date.map(lambda x : x - df_preprocessed.date.min()).dt.days
    train_month = df_train.date.dt.month.astype(str)
    train_days = df_train.date.map(lambda x : x - df_preprocessed.date.min()).dt.days
    
    return [train_month, train_days], [test_month, test_days]

df_train, df_test = train_test_split(df_preprocessed, test_size = 0.3, shuffle = True, random_state = 14107)
df_train = df_train.reset_index(drop = True)
df_test = df_test.reset_index(drop = True)

train_date, test_date = month_days_split(df_train, df_test)

## 코로플레스

In [None]:
df_feature = df_train.loc[:, ["lat", "long", "price"]]

with open('/root/ML2024/hw/기말 발표/graphics/King County.geojson', 'r') as f:
    king_county_boundary = json.load(f)

fig = px.density_mapbox(
    data_frame = df_feature,
    lat = 'lat',
    lon = 'long',
    radius = 9,  ## 줌 스케일과 무관하게 크기가 상대적으로 설정됨
    center = {'lat' : 47.4421, 'lon' : -121.8089},
    z = 'price',  ## 색상으로 표시할 변수
    #---#
    mapbox_style = 'open-street-map',
    zoom = 8.9,
    width = 1200,
    height = 900
)

# King County 경계선 추가
fig.add_trace(
    go.Choroplethmapbox(
        geojson=king_county_boundary,
        locations=[feature['id'] for feature in king_county_boundary['features']], # geojson ID와 매핑
        z=[1] * len(king_county_boundary['features']), # 동일 값을 지정
        colorscale=[[0, "rgba(255,0,0,100)"], [1, "red"]], # 투명도 + 빨간색 경계
        showscale=False, # 컬러바 숨기기
        marker_opacity=0.2, # 경계 영역 투명도 설정
        marker_line_width=2 # 경계선 두께 설정
    )
)

fig.show(config = {'scrollZoom' : False})