In [22]:
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix  
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import platform
import matplotlib.pyplot as plt
import matplotlib as mpl

In [23]:
# OS에 따라 폰트 설정
if platform.system() == "Darwin":  # MacOS
    mpl.rcParams['font.family'] = 'AppleGothic'
elif platform.system() == "Windows":  # Windows
    mpl.rcParams['font.family'] = 'Malgun Gothic'

# 마이너스 기호 깨짐 방지
mpl.rcParams['axes.unicode_minus'] = False 

In [24]:
file_url = "../data/(최종)_서울열선_광진도로.csv"
file_encoding = "UTF-8"

In [25]:
df = pd.read_csv(file_url, encoding=file_encoding, low_memory=False)

In [26]:
df.columns

Index(['도로명', '열선', '도로 종류', '시작점_위도', '시작점_경도', '종료점_위도', '종료점_경도', '중앙점_위도',
       '중앙점_경도', '도로_길이', '도로폭', '도로규모', '행정구역', '고도의_차이', '경사각', '최근접_시설들_거리',
       '최근접_시설의_평균거리', '2019_평균_온도', '2020_평균_온도', '2021_평균_온도', '2022_평균_온도',
       '2023_평균_온도', '2024_평균_온도', '종합_평균_기온', '행정동', '생활인구', '최근접_시설들_최소거리',
       '최근접_시설들_최대거리'],
      dtype='object')

In [27]:
blackList = ['긴고랑로', '용마산로22길', '아차산로78길', '초원로', '용마산로22길', '용마산로24길', '아차산로78길', '워커힐로',
            '긴고랑로46길', '자양강변길', '능동로4길']
# '광장로1가', '광장로1가길'

In [28]:
data = df.copy()

# 사용할 피처 선택
features = ['도로 종류', '도로_길이', '도로폭', '도로규모', '경사각', '최근접_시설의_평균거리', '최근접_시설들_최소거리', 
            '종합_평균_기온', '2019_평균_온도', '2020_평균_온도', '2021_평균_온도', '2022_평균_온도', '2023_평균_온도', '2024_평균_온도', '생활인구']
target = '열선'

X = data[features]
y = data[target]

X = X.copy()  # 복사본 생성
for col in X.select_dtypes(include=['object']).columns:
    X[col] = LabelEncoder().fit_transform(X[col])

# 데이터 정규화 (0~1 스케일)
scaler = MinMaxScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# XGBoost 모델 학습
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
model = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
model.fit(X_train, y_train)

# 피처 중요도 가져오기
feature_importance = model.feature_importances_
weights = feature_importance / feature_importance.sum()  # 가중치 정규화

# 점수 계산
data["설치_점수"] = np.dot(X_scaled, weights)

data = data.drop(index=data[data["도로명"].isin(blackList)].index)

# 점수 순 정렬
sorted_data = data.sort_values(by="설치_점수", ascending=False)

sorted_data = sorted_data.drop(index=sorted_data[sorted_data["열선"] == 1].index)

sorted_data = sorted_data.reset_index(drop=True)

# print(sorted_data.head())  # 상위 20개 도로 출력

try : 
    save_url = "./make_file/점수제_결과.csv"
    sorted_data.to_csv(save_url, encoding=file_encoding, index=False)
    print(f"{save_url} 파일 저장 완료.")
except OSError as e :
    print(e)

./make_file/점수제_결과.csv 파일 저장 완료.
