## Hyperparameter tuning for Decision Tree

In [4]:
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# 1. Load Data
df = pd.read_csv('../data/telco_churn_processed.csv')
X = df.drop('Churn', axis=1)
y = df['Churn']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Define the "Grid" of settings to test
param_grid = {
    'max_depth': [3, 5, 7, 10, None],
    'min_samples_split': [2, 5, 10],
    'criterion': ['gini', 'entropy']
}

# 3. Setup the Grid Search
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42),
                           param_grid=param_grid,
                           cv=5,
                           verbose=1,
                           scoring='accuracy')

# 4. Run the Search
print("Starting Hyperparameter Tuning...")
grid_search.fit(X_train, y_train)

# 5. The Results
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print(f"\n✅ Best Parameters Found: {best_params}")
print(f"✅ Best Cross-Validation Accuracy: {best_score:.4f}")

# 6. Evaluate the "Optimized" Model on Test Data
best_model = grid_search.best_estimator_
test_acc = accuracy_score(y_test, best_model.predict(X_test))
print(f"Test Set Accuracy of Optimized Tree: {test_acc:.4f}")

Starting Hyperparameter Tuning...
Fitting 5 folds for each of 30 candidates, totalling 150 fits

✅ Best Parameters Found: {'criterion': 'entropy', 'max_depth': 7, 'min_samples_split': 2}
✅ Best Cross-Validation Accuracy: 0.7931
Test Set Accuracy of Optimized Tree: 0.7765
