In [2]:
import sys
import statistic_lib as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib tk

In [46]:
def scale(df):
    titles = df.columns[1:]
    for col in titles:
        arr = df[col]
        X_std = st.std(arr)
        mean_X = st.mean(arr)
        if X_std != 0:
            scaled_X = (arr - mean_X) / X_std
        df[col] = scaled_X
    return df

In [47]:
df = pd.read_csv('../datasets/dataset_train.csv')

df = df.dropna()

df_numeric = df.select_dtypes(include=['number']).drop(columns=['Index', 'Care of Magical Creatures', 'Potions', 'Arithmancy', 'Astronomy'])

In [48]:
nb_exemples = len(df_numeric)
nb_features = len(df_numeric.columns)
nb_classes = 4

In [49]:
Y = np.zeros((nb_exemples, nb_classes))

houses = ['Gryffindor', 'Slytherin', 'Ravenclaw', 'Hufflepuff']

house_to_index = {house: index for index, house in enumerate(houses)}

for i, house in enumerate(df['Hogwarts House']):
    Y[i, house_to_index[house]] = 1
Y

array([[0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       ...,
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.]])

In [50]:
X = np.array(scale(df_numeric))

In [51]:
df_numeric

Unnamed: 0,Herbology,Defense Against the Dark Arts,Divination,Muggle Studies,Ancient Runes,History of Magic,Transfiguration,Charms,Flying
0,5.727180,1.026164,0.367009,1.020932,0.339678,0.505583,0.225756,1.198566,-0.511677
1,-5.987446,1.149402,-2.147745,-0.545465,-1.204590,0.253318,0.661470,-1.008153,-1.397510
2,7.725017,0.792246,0.712076,1.831295,0.996868,0.129509,1.324351,1.818968,0.074819
3,-6.497214,-1.250661,0.197639,-0.647902,0.259972,-1.748129,-2.489951,-1.538773,1.816807
5,-4.289197,1.267747,-2.386225,-0.449871,-0.937954,0.539078,0.522737,-0.525757,-0.591500
...,...,...,...,...,...,...,...,...,...
1595,-4.541837,-0.591091,0.605490,-0.565874,1.143443,-1.842869,-1.493064,-0.804413,1.665245
1596,6.061064,-0.616538,-0.354516,-0.867109,-0.472718,-0.166207,0.597847,-0.352965,0.221980
1597,-3.203269,-0.955457,0.693825,-0.334673,1.302735,-2.011781,-1.728549,-0.945848,1.794600
1598,3.442831,-0.781967,0.857598,-1.255874,-1.057548,0.187314,1.321186,-0.326054,-1.022546


In [52]:
ones = np.ones(X.shape[0])
X = np.column_stack((ones,X))

print(X.shape)
print(X)

(1251, 10)
[[ 1.          5.7271803   1.02616421 ...  0.22575562  1.19856638
  -0.51167711]
 [ 1.         -5.98744578  1.1494017  ...  0.66147001 -1.0081526
  -1.39751008]
 [ 1.          7.72501661  0.79224618 ...  1.32435095  1.81896781
   0.07481882]
 ...
 [ 1.         -3.20326932 -0.95545673 ... -1.7285494  -0.94584842
   1.79459968]
 [ 1.          3.44283088 -0.78196693 ...  1.3211861  -0.32605358
  -1.02254566]
 [ 1.          5.42104564 -1.23370243 ...  0.11520759 -0.07908455
  -0.79699392]]


### Fonction Softmax

In [53]:
def softmax(Z):
    Z_exp = np.exp(Z - np.max(Z, axis=1, keepdims=True))
    return Z_exp / np.sum(Z_exp, axis=1, keepdims=True)

On soustrait le maximum de chaque ligne de Z avant d'appliquer l'exponentielle.
Cela aide à stabiliser les calculs numériques et à éviter les débordements.
<b>On divise chaque élément exponentié de Z par la somme des exponentielles de sa ligne.</b> Cela transforme chaque ligne de Z en une distribution de probabilité (les éléments de chaque ligne s'additionnent à 1).

### Fonction Coût : entropie croisée

In [54]:
def cross_entropy(Y, Y_hat):
    o = -np.sum(Y * np.log(Y_hat + 1e-9), axis=1)
    # o est un vecteur contenant toutes les sommes des probabilités d'un exemples (d'une ligne)
    return o

def cost_function(Y, Y_hat):
    total_cost = np.sum(cross_entropy(Y, Y_hat))
    # la somme de toutes les entropies croisées de chaque exemples
    return total_cost / nb_exemples

### Descente de gradient

In [91]:
def compute_gradients(X, Y, Y_hat):
    dW = np.dot(X.T, (Y_hat - Y)) / nb_exemples
    db = np.sum(Y_hat - Y, axis=0, keepdims=True) / nb_exemples
    return dW, db

In [101]:
def gradient_descent(X, Y, learning_rate=0.01, num_iterations=1000):
    biais = np.zeros((1, nb_classes))

    sequence = np.arange(0.1, 0.4, 0.1)
    W = np.full((nb_features,nb_classes), sequence)
    W = np.row_stack((biais,W))
    
    for i in range(num_iterations):
        Z = np.dot(X, W) + biais # combinaison linéaire
        Y_hat = softmax(Z)
        
        cost = cost_function(Y, Y_hat)
        
        dW, db = compute_gradients(X, Y, Y_hat)
        
        W -= learning_rate * dW
        biais -= learning_rate * db

        # if i % 1000 == 0:
        #     print(f"Iteration {i}, Cost: {cost}")

    return W, biais, Y_hat

In [102]:
W, biais, Y_hat = gradient_descent(X, Y, learning_rate=0.01, num_iterations=10000)

In [103]:
W

array([[-0.00995087, -0.09950956,  0.09561384,  0.01384659],
       [ 0.14315542,  0.02556527,  0.31809022,  0.51318908],
       [-0.17780475,  0.85679427,  0.99846769, -0.67745721],
       [ 0.76419351, -0.65591798,  0.20302191,  0.68870256],
       [ 0.27302604,  0.11601094,  1.05054984, -0.43958682],
       [ 0.73818493, -0.09992253,  0.87913564, -0.51739804],
       [-0.33920665,  0.45161788,  0.30598265,  0.58160612],
       [-0.37812987,  0.71501229,  0.24947101,  0.41364657],
       [ 0.24789843,  0.27512497,  0.76241555, -0.28543895],
       [ 0.73157974, -0.10883588,  0.20291573,  0.1743404 ]])

In [104]:
Y

array([[0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       ...,
       [1., 0., 0., 0.],
       [0., 0., 0., 1.],
       [0., 0., 0., 1.]])

In [105]:
Y_hat

array([[1.15067088e-02, 1.63643983e-02, 9.47459669e-01, 2.46692239e-02],
       [3.58362838e-04, 9.95738465e-01, 2.45809181e-03, 1.44508069e-03],
       [4.16650782e-03, 1.75377466e-03, 9.89673281e-01, 4.40643693e-03],
       ...,
       [9.93536345e-01, 3.95090084e-04, 3.13933405e-03, 2.92923057e-03],
       [2.89518743e-03, 7.23233914e-03, 1.95948984e-03, 9.87912984e-01],
       [2.67082717e-03, 1.64741138e-03, 2.91483393e-03, 9.92766928e-01]])