## 0. 데이터 준비

In [None]:
# 임포트
# 데이터 전처리 및 기본 라이브러리
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# 머신러닝 모델 임포트
from sklearn.tree import DecisionTreeClassifier  # 결정 트리
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier   # 랜덤 포레스트 , 그라디언트 부스팅
from xgboost import XGBClassifier  # XGBoost
from lightgbm import LGBMClassifier  # LightGBM
from sklearn.linear_model import LogisticRegression  # 로지스틱 회귀
from sklearn.neighbors import KNeighborsClassifier  # KNN

# 평가 지표
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix

In [None]:
# 1. 데이터 받기
df = pd.read_csv('../data/final_merge_data.csv')

# 2. X, y 분리
x_col_min = []
x_col_sum = []
X = df[]
y = df['final_result'] 

# 3. train, test 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


In [None]:
# 정확도 확인 함수

def evaluate_clf(y_true, y_pred):
    print('정확도 평가')
    print('Accuracy :', accuracy_score(y_true, y_pred))
    print('F1 Score :', f1_score(y_true, y_pred, average='weighted'))
    print('Precision :', precision_score(y_true, y_pred, average='weighted'))
    print('Recall :', recall_score(y_true, y_pred, average='weighted'))
    

## 1. 랜덤포레스트 (영서)

In [None]:
# 모델 생성 및 학습
rf_model_1 = RandomForestClassifier(
        random_state=42,
        n_estimators=124,
        max_depth=30,
        min_samples_split=11,
        min_samples_leaf=1,
        max_features=None,
        criterion='entropy'
    )

rf_model_2 = RandomForestClassifier(
        random_state=42,
        n_estimators=300,
        max_depth=20,
        min_samples_split=10,
        min_samples_leaf=2,
        max_features='sqrt',
        bootstrap=False
    )


rf_model_1.fit(X_train, y_train)
y_pred = rf_model_2.predict(X_test)


# 예측 및 정확도 확인
evaluate_clf(y_test, y_pred)

## 2. 그리디언트 부스팅 (영서-민훈)

In [None]:
gbm_model = GradientBoostingClassifier(learning_rate= 0.15, 
                                       max_depth= 10,
                                       min_samples_leaf= 2, 
                                       min_samples_split= 5,
                                       n_estimators= 150, 
                                       random_state=42)
gbm_model.fit(X_train, y_train)
y_pred = gbm_model.predict(X_test)

# 예측 및 정확도 확인
evaluate_clf(y_test, y_pred)

## 3. 결정트리 (환-영서)

In [None]:
# 모델 생성 및 학습
# Decision Tree 모델 초기화
dt = DecisionTreeClassifier(max_depth= None, 
                            max_features= None,
                            min_samples_leaf= 1,
                            min_samples_split=2
                            )
dt.fit(X_train, y_train)
y_pred = gbm_model.predict(X_test)

# 예측 및 정확도 확인
evaluate_clf(y_test, y_pred)

## 4. KNN (영서)

In [None]:
# 모델 생성 및 학습
model = KNeighborsClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# 예측 및 정확도 확인
evaluate_clf(y_test, y_pred)

## 5. XGBoost (도연)

In [None]:
# 모델 생성 및 학습
xgb_model = XGBClassifier(eval_metric='mlogloss')
xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)


# 예측 및 정확도 확인
evaluate_clf(y_test, y_pred)

## 6. LightGBM (도연)

In [None]:
lgbm = LGBMClassifier(
    num_leaves=40,
    n_estimators=200,
    min_data_in_leaf=20,
    max_depth=10,
    learning_rate=0.2,
    feature_fraction=0.9,
    random_state=42, 
    device="gpu")  # GPU 사용 가능하면 활성화

lgbm.fit(X_train, y_train)
y_pred = lgbm.predict(X_test)


# 예측 및 정확도 확인
evaluate_clf(y_test, y_pred)

## 7. 로지스틱 회귀 (영서)

In [None]:
# 모델 생성 및 학습
model = LogisticRegression(random_state=42, max_iter=1000, C= 0.01, solver= 'liblinear')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# 예측 및 정확도 확인
evaluate_clf(y_test, y_pred)