# Decision_Tree

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier 
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE

# 데이터 로드 
final_merged_data = pd.read_csv('./data/final_merge_data.csv')


sum_clicked_features = ['highest_education', 'imd_band', 'log_sum_click', 'log_studied_credits', 'scaled_score']
mean_clicked_features = ['highest_education', 'imd_band', 'log_mean_click', 'log_studied_credits', 'scaled_score']

sum_x = final_merged_data[sum_clicked_features]
mean_x = final_merged_data[mean_clicked_features]
y = final_merged_data['final_result']

smote = SMOTE(sampling_strategy='auto', random_state=42)

sum_x_resampled, y_resampled = smote.fit_resample(sum_x, y)
mean_x_resampled, y_resampled = smote.fit_resample(mean_x, y)

sum_x_train, sum_x_test, y_train, y_test = train_test_split(sum_x_resampled, y_resampled, test_size=0.1, random_state=42, stratify=y_resampled)
mean_x_train, mean_x_test, y_train, y_test = train_test_split(mean_x_resampled, y_resampled, test_size=0.1, random_state=42, stratify=y_resampled)

# 출력 함수
def evaluate_clf(y_true, y_pred):
    print('정확도 평가')
    print('Accuracy :', accuracy_score(y_true, y_pred))
    print('F1 Score :', f1_score(y_true, y_pred, average='weighted'))
    print('Precision :', precision_score(y_true, y_pred, average='weighted'))
    print('Recall :', recall_score(y_true, y_pred, average='weighted'))

def evaluate_model(model_name, y_true, y_pred):
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred, average='weighted')
    print(f"\n=== {model_name} 성능 ===")
    print(f"Accuracy: {acc:.4f}")
    print(f"F1 Score: {f1:.4f}")
    print("Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

# Decision Tree(sum_click)

In [3]:
dt_1 = DecisionTreeClassifier(max_depth= None, 
                            max_features= None,
                            min_samples_leaf= 1,
                            min_samples_split=2
                            )
dt_1.fit(sum_x_train, y_train)
y_pred = dt_1.predict(sum_x_test)

evaluate_clf(y_test, y_pred)

정확도 평가
Accuracy : 0.8920908183632734
F1 Score : 0.8920654053905375
Precision : 0.8924604342904996
Recall : 0.8920908183632734


# Decision Tree(mean_click)

In [4]:
dt_2 = DecisionTreeClassifier(max_depth= None, 
                            max_features= None,
                            min_samples_leaf= 1,
                            min_samples_split=2
                            )
dt_2.fit(mean_x_train, y_train)
y_pred = dt_2.predict(mean_x_test)

evaluate_clf(y_test, y_pred)

정확도 평가
Accuracy : 0.8825162175648703
F1 Score : 0.8825004868490066
Precision : 0.8827211708564138
Recall : 0.8825162175648703
