# Notebook 03 â€“ Decision Tree Model & Hyperparameter Tuning

In [18]:
import pandas as pd
import joblib

from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split


Loads dataset

In [19]:
data_path = r"C:/Users/USER/PycharmProjects/ML/data/Telco-Customer-Churn.csv"
df = pd.read_csv(data_path)

Clean data

In [20]:
# %%
df = df.drop("customerID", axis=1)

df["TotalCharges"] = pd.to_numeric(df["TotalCharges"], errors="coerce")
df["TotalCharges"] = df["TotalCharges"].fillna(df["TotalCharges"].median())

df["Churn"] = df["Churn"].map({"Yes": 1, "No": 0})


Load Preprocessor

In [21]:
preprocessor = joblib.load("results/preprocessor.pkl")
print("Preprocessor loaded successfully.")


Preprocessor loaded successfully.


Separate Features

In [23]:
X = df.drop("Churn", axis=1)
y = df["Churn"]


Transform Features

In [24]:
X_pre = preprocessor.transform(X)

Train-Test Split

In [25]:
X_train, X_test, y_train, y_test = train_test_split(
    X_pre, y, test_size=0.2, random_state=42, stratify=y
)


Train Decision Tree Model

In [26]:
dt_clf = DecisionTreeClassifier(
    criterion="gini",
    max_depth=6,
    min_samples_split=20,
    min_samples_leaf=10,
    random_state=42
)

dt_clf.fit(X_train, y_train)

print("Decision Tree trained successfully.")


Decision Tree trained successfully.


Evaluate Model

In [27]:
y_pred = dt_clf.predict(X_test)

print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

conf_matrix = confusion_matrix(y_test, y_pred)
conf_matrix


Decision Tree Accuracy: 0.7970191625266146

Classification Report:
               precision    recall  f1-score   support

           0       0.83      0.92      0.87      1035
           1       0.67      0.47      0.55       374

    accuracy                           0.80      1409
   macro avg       0.75      0.69      0.71      1409
weighted avg       0.78      0.80      0.78      1409



array([[948,  87],
       [199, 175]])

Save the Model

In [29]:
joblib.dump(dt_clf, "results/decision_tree_model.pkl")
print("Decision Tree Model saved as decision_tree_model.pkl")


Decision Tree Model saved as decision_tree_model.pkl
