In [None]:
# Desicion Tree is used for both classification and regression tasks.
"""Decision Trees work by splitting the data into subsets based on feature values, creating a tree-like structure of decisions.
Decision tree means it makes decisions based on certain conditions to classify data or predict values.
Each internal node represents a decision based on a feature, each branch represents the outcome of that decision, and each leaf node represents a final prediction or class label.
For classification, the tree predicts discrete class labels, while for regression, it predicts continuous values.
Decision Trees are easy to interpret and visualize, making them popular for various applications."""

In [None]:
"""A Decision Tree is a supervised ML algorithm that makes predictions by:

Asking a sequence of yes/no questions (rules)

It looks like a tree:

Root → first question

Branches → answers

Leaves → final prediction

useful:Very easy to understand

Works with numbers + categories

No scaling required

Captures non-linear relationships"""

In [None]:
"""Tries all features

Chooses the best question to split data

Repeats until:

Data is pure OR

Tree is too deep"""

In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import numpy as np

In [2]:
df = pd.read_csv(r"C:\Users\USER\Downloads\Titanic-Dataset.csv") # Load Titanic dataset

In [3]:
# Handle missing values
df["Age"].fillna(df["Age"].median(), inplace=True)

# Encode categorical variables
df["Sex"] = df["Sex"].map({"male": 0, "female": 1})
df = pd.get_dummies(df, columns=["Embarked"], drop_first=True)

# Drop unnecessary columns
df.drop(columns=["Name", "Ticket", "Cabin"], inplace=True, errors="ignore")


In [4]:
X = df.drop("Survived", axis=1)
y = df["Survived"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [5]:
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)


In [6]:
y_pred = dt.predict(X_test)


In [7]:
accuracy_score(y_test, y_pred)
confusion_matrix(y_test, y_pred)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.81      0.84      0.82       134
           1       0.75      0.70      0.72        89

    accuracy                           0.78       223
   macro avg       0.78      0.77      0.77       223
weighted avg       0.78      0.78      0.78       223



In [9]:
#control overfitting with max_depth
dt = DecisionTreeClassifier(
    max_depth=4,
    min_samples_split=10,
    random_state=42
)
dt.fit(X_train, y_train)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.81      0.84      0.82       134
           1       0.75      0.70      0.72        89

    accuracy                           0.78       223
   macro avg       0.78      0.77      0.77       223
weighted avg       0.78      0.78      0.78       223

