In [5]:
import pandas as pd
import streamlit as st
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, cross_val_score
import plotly.express as px
import numpy as np

In [8]:
# 데이터 로드 및 전처리
@st.cache_data
def load_data():
    data = pd.read_csv("../data/pm25_pm10_merged.csv")  # 파일 경로 수정 필요
    data['Date'] = pd.to_datetime(data['Date'])
    data['Month'] = data['Date'].dt.month  # 월 추가
    return data



In [22]:
# XGBoost 모델 학습 (도시별 모델)
def train_model(data):
    pivot_data = data.pivot(index='Date', columns='City', values='PM2.5 (µg/m³)').reset_index().fillna(0)
    pivot_data['Month'] = data.groupby('Date')['Month'].first().values
    
    X = pivot_data[['Beijing', 'Month']]  # 입력 변수
    target_cities = ['Seoul', 'Tokyo', 'Delhi', 'Bangkok']

    # 도시별로 모델 학습
    models = {}  # {city: model}
    X_tests = {}
    y_tests = {}

    for city in target_cities:
        y = pivot_data[city]  # 단일 열 타겟
        if len(X) > 5:
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
            model = XGBRegressor(n_estimators=200, learning_rate=0.05, max_depth=3, random_state=42, objective='reg:squarederror')
            model.fit(X_train, y_train)
            models[city] = model
            X_tests[city] = X_test
            y_tests[city] = y_test

    return models, X_tests, y_tests, pivot_data
    

In [24]:
# 예측 함수
def predict_pm25(models, beijing_pm25, month):
    input_value = [[beijing_pm25, month]]
    predictions = {}
    target_cities = ['Seoul', 'Tokyo', 'Delhi', 'Bangkok']
    
    for city in target_cities:
        predictions[city] = models[city].predict(input_value)[0]
    
    predictions['Beijing'] = beijing_pm25
    return predictions


In [25]:
# 모델 스코어 계산 (교차 검증 포함)
def score_model(models, X_tests, y_tests):
    scores = {}
    cv_scores = {}
    
    for city in models.keys():
        scores[city] = models[city].score(X_tests[city], y_tests[city])
        cv_score = cross_val_score(models[city], X_tests[city], y_tests[city], cv=5, scoring='r2').mean()
        cv_scores[city] = cv_score
    
    return scores, cv_scores

In [26]:
# 등급 계산 함수
def get_grade(pm25):
    if pm25 <= 15:
        return "좋음", "green"
    elif pm25 <= 35:
        return "보통", "blue"
    elif pm25 <= 75:
        return "나쁨", "orange"
    else:
        return "매우 나쁨", "red"


In [27]:
# 도시 좌표 딕셔너리
city_coords = {
    'Seoul': (37.5665, 126.978),
    'Tokyo': (35.6895, 139.6917),
    'Beijing': (39.9042, 116.4074),
    'Delhi': (28.7041, 77.1025),
    'Bangkok': (13.7563, 100.5018)
}

SyntaxError: incomplete input (67702666.py, line 71)