In [None]:

import pandas as pd
import numpy as np

df = pd.read_csv("credit_simple.csv")
X = df.drop("defaut", axis=1)
y = df["defaut"]

X_encoded = pd.get_dummies(X)
y_encoded = y.map({"oui": 1, "non": 0})


In [None]:

class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value


In [None]:

def gini(counts):
    total = sum(counts)
    probs = [c / total for c in counts if total > 0]
    return 1 - sum(p**2 for p in probs)

def best_split(X, y):
    best_gain = 0
    best_feature = None
    best_threshold = None

    parent_impurity = gini([sum(y==0), sum(y==1)])

    for feature in X.columns:
        thresholds = X[feature].unique()
        for t in thresholds:
            left = y[X[feature] <= t]
            right = y[X[feature] > t]
            if len(left) == 0 or len(right) == 0:
                continue

            w_left = len(left) / len(y)
            w_right = len(right) / len(y)

            impurity = (
                w_left * gini([sum(left==0), sum(left==1)]) +
                w_right * gini([sum(right==0), sum(right==1)])
            )

            gain = parent_impurity - impurity

            if gain > best_gain:
                best_gain = gain
                best_feature = feature
                best_threshold = t

    return best_feature, best_threshold


In [None]:

def build_tree(X, y, depth=0, max_depth=3):
    if len(set(y)) == 1 or depth == max_depth:
        return Node(value=y.mode()[0])

    feature, threshold = best_split(X, y)
    if feature is None:
        return Node(value=y.mode()[0])

    left_mask = X[feature] <= threshold
    right_mask = X[feature] > threshold

    left = build_tree(X[left_mask], y[left_mask], depth+1, max_depth)
    right = build_tree(X[right_mask], y[right_mask], depth+1, max_depth)

    return Node(feature, threshold, left, right)


In [None]:

def predict_one(x, node):
    if node.value is not None:
        return node.value
    if x[node.feature] <= node.threshold:
        return predict_one(x, node.left)
    else:
        return predict_one(x, node.right)
