In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

def gini(counts):
    total = sum(counts)
    probs = [c / total for c in counts if total > 0]
    return 1 - sum(p**2 for p in probs)

def entropy(counts):
    total = sum(counts)
    probs = [c / total for c in counts if c > 0]
    return -sum(p * np.log2(p) for p in probs)

def classification_error(counts):
    total = sum(counts)
    return 1 - max(counts) / total


In [None]:

cases = {
    "10/10": [10, 10],
    "18/2": [18, 2],
    "9/1": [9, 1],
    "5/5": [5, 5],
    "1/9": [1, 9]
}

data = []
for name, counts in cases.items():
    data.append([
        name,
        gini(counts),
        entropy(counts),
        classification_error(counts)
    ])

df = pd.DataFrame(data, columns=["Cas", "Gini", "Entropie", "Erreur"])
df


In [None]:

df.set_index("Cas").plot(kind="bar")
plt.title("Comparaison des mesures d'impureté")
plt.ylabel("Valeur")
plt.show()
