In [1]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier, export_text

study_hours = np.array([2, 4, 6, 8, 10])
results = np.array([0, 0, 1, 1, 1])

def gini_impurity(values):
    if len(values) == 0:
        return 0
    p_pass = np.mean(values == 1)
    p_fail = 1 - p_pass
    return 1 - (p_pass**2 + p_fail**2)

split_points = [3, 5, 7, 9]
print("Checking possible splits...\n")

for split in split_points:
    left_group = results[study_hours <= split]
    right_group = results[study_hours > split]
    
    gini_left = gini_impurity(left_group)
    gini_right = gini_impurity(right_group)
    
    weighted_gini = (len(left_group)/len(results))*gini_left + \
                    (len(right_group)/len(results))*gini_right
    
    print(f"Split at {split} hours → Weighted Gini = {weighted_gini:.3f}")

model = DecisionTreeClassifier(criterion="gini", max_depth=1)
model.fit(study_hours.reshape(-1, 1), results)

print("\n✅ Best split found by model:")
print(export_text(model, feature_names=["Study Hours"]))


Checking possible splits...

Split at 3 hours → Weighted Gini = 0.300
Split at 5 hours → Weighted Gini = 0.000
Split at 7 hours → Weighted Gini = 0.267
Split at 9 hours → Weighted Gini = 0.400

✅ Best split found by model:
|--- Study Hours <= 5.00
|   |--- class: 0
|--- Study Hours >  5.00
|   |--- class: 1

