In [3]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 데이터 로드
df = pd.read_csv('C:\\workspace\\WASSUP4\\data\\mini.csv', sep=',')
basic_users = df[df['subscription_type'] == 'Basic']

# 주요 변수 선택 및 타겟 변수 설정
X = basic_users[['customer_inquiry_history', 'subscription_duration', 
                 'average_time_per_learning_session', 'monthly_active_learning_days', 
                 'community_engagement_level', 'total_completed_courses']]
y = basic_users['target']

# 데이터 스케일링
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [4]:
rf_model = RandomForestClassifier(random_state=42)
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

# GridSearchCV를 사용하여 최적의 하이퍼파라미터 찾기
grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_scaled, y)

# 최적의 모델로 예측
best_rf_model = grid_search.best_estimator_
y_pred_rf = best_rf_model.predict(X_scaled)


In [5]:
# 성능 평가
accuracy_rf = accuracy_score(y, y_pred_rf)
classification_rf = classification_report(y, y_pred_rf)
conf_matrix_rf = confusion_matrix(y, y_pred_rf)

print("Best Parameters:", grid_search.best_params_)
print("Accuracy:", accuracy_rf)
print("Classification Report:\n", classification_rf)
print("Confusion Matrix:\n", conf_matrix_rf)


Best Parameters: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}
Accuracy: 0.6394045827061382
Classification Report:
               precision    recall  f1-score   support

           0       1.00      0.06      0.11      2296
           1       0.63      1.00      0.77      3683

    accuracy                           0.64      5979
   macro avg       0.82      0.53      0.44      5979
weighted avg       0.77      0.64      0.52      5979

Confusion Matrix:
 [[ 140 2156]
 [   0 3683]]
