In [347]:
import numpy as np
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
import random

# Projet: Modèles linéaires:  Adaline et Regression Logistique

Nous allons nous intéresser à l'implémentation d'un algorithme de descente de gradient pour trouver le meilleur paramètre d'un module Adaline ou de regression logistique.

Pout cela, on implémentera un algorithme de descente de gradient stochastique que nous avons vu au TP précédent et dont le pseudo-code peut être résumé comme suit:

```input: Train, eta, m, MaxEp, modele
init : w
epoque=0
while epoque<=MaxEp
    choisir un exemple (x,y) de Train de façon aléatoire
    calculer h = w*x
    calculer Loss(h, y)
    w <- w - eta*"gradient de Loss(h, y) par rapport à w"
    epoque <- epoque+1
output: w
```
où "eta" est le pas de la descente de gradient (exemple: eta=0.01).

Si on veut imprimer l'erreur tous les "m" pas de gradient:
```input: Train, eta, m, MaxEp, modele
init : w
epoque=0
while epoque<=MaxEp
    err = 0
    for i in range(m):
        choisir un exemple (x,y) de Train de façon aléatoire
        calculer h = w*x
        err += Loss(h, y)
        w <- w - eta*"gradient de Loss(h, y) par rapport à w"
    epoque <- epoque+1
    print(err)
output: w
```

Pour un poids $w$, on définit $h_\mathbf{w}(\mathbf{x})=w_0x_0+w_1x_1+...w_dx_d$. Pour chacun des deux modèles, et pour un exemple $(\mathbf{x},y)$, la prédiction $\hat{y}(\mathbf{w}, \mathbf{x})$ et la fonction de coût  $\mathcal{L}(\mathbf{w}, \mathbf{x})$ sont: 
- Adaline: $\hat{y}(\mathbf{w}, \mathbf{x}) = h_\mathbf{w}(x)$ et $$\mathcal{L}(\mathbf{w})=(y-\hat{y}(\mathbf{w},\mathbf{x}))^2=(y-h_\mathbf{w}(\mathbf{x}))^2,$$
- Régression logistique: $\hat{y}(w, x) = 1/(1+e^{-h_{\mathbf{w}}(\mathbf{x})})$ et $$\mathcal{L}(\mathbf{w}, x) = - y \log \hat{y}(\mathbf{w},\mathbf{x}) - (1-y)\log(1-\hat{y}(\mathbf{w},\mathbf{x})) = \log(1+e^{h_{\mathbf{w}}(\mathbf{x})})-yh_\mathbf{w}(\mathbf{x}),$$



In [348]:
#  Coder Struture Adaline et Regression Logistique # 
class Model:
    model = ""
    
    def __init__(self, model):
        self.model = model    
    
    def prediction(self, w, x):
        """
        w : vecteur de poids
        x : vecteur d'entrée
        Retourne la prediction du modèle
        """
        if self.model == "Adaline":
            return w@x
        elif self.model == "Regression Logistique":
            return 1/(1 + np.exp(-w@x))
        else:
            raise ValueError("Modèle non reconnu")
    
    def loss(self, w, x, y):
        """
        p : prédiction
        y : valeur réelle
        Retourne la perte entre p et ye
        """
        if self.model == "Adaline":
            return (self.prediction(w,x)-y)**2
        elif self.model == "Regression Logistique":
            p = self.prediction(w,x)
            p = np.clip(p, 1e-10, 1-1e-10)
            return -y*np.log(p) - (1-y)*np.log(1-p)
        else:
            raise ValueError("Modèle non reconnu")

    def gradient(self, w, x, y):
        """
        w : vecteur de poids
        x : vecteur d'entrée
        y : valeur réelle
        Retourne le gradient de la fonction loss
        """
        if self.model == "Adaline":
            return 2*(self.prediction(w,x)-y)*x
        elif self.model == "Regression Logistique":
            p = self.prediction(w,x)
            return (p-y)*x
        else:
            raise ValueError("Modèle non reconnu")
    
    def __str__(self):
        return self.model

Nous avons vu les gradients de ces fonctions en TD.

## Partie 1: implémentation de l'algorithme et exemple du "ET logique"

<font color='red'><b>Question 1:</b> le "ET logique".</font> Créer une liste de 4 éléments où chaque élément est un couple de la forme `[x,y]`, avec `x=[1,x1,x2]` et `y = x1 and x2`. Il y a 4 éléments car `x1` et `x2` peuvent chacun prendre la valeur `0` ou `1` (chacun de ces 4 éléments est une liste dont le premier élément est les attributs de l'exemple et le deuxième élément est la classe de l'exemple).

In [349]:
# Creation de liste ET logique #
liste = [0] * 4
for x1 in range(2):
    for x2 in range(2):
        liste[x1*1+x2*2] = [[1,x1,x2], x1 and x2] 

x_ET_logique = np.array([ei[0] for ei in liste])
y_ET_logique = np.array([ei[1] for ei in liste])

x_ET_logique, y_ET_logique

(array([[1, 0, 0],
        [1, 1, 0],
        [1, 0, 1],
        [1, 1, 1]]),
 array([0, 0, 0, 1]))


Codage d'une structure pour stocker les listes de feature et la liste de label 

In [350]:
import random as rd

class collection:
    # Structure de données pour stocker les données
    def __init__(self, x, y):
        if (len(x) != len(y)):
            raise ValueError("Les données x et y n'ont pas la même longueur")
        self.x = x
        self.y = y
    
    def nombre_de_feature(self):
        return len(self.x[0])
    
    def nombre_de_data(self):
        return len(self.x)
    
    def __str__(self):
        return f"x : {self.x}, y : {self.y}"

<font color='red'><b>Question 2:</b></font> Coder un algorithme de descente de gradient stochastique pour les modèles Adaline et le modèle de régression logistique. 


In [351]:
import random as rd

def SGD(model, collection, MaxEp, eta, m = 0):
    """
    Entree: 
    model : model Adaline ou Regression Logistique
    collection : collection de données
    eta : pas d'apprentissage
    
    Sortie:
    w : vecteur de poids
    """
    if m == 0: 
        m = 1
        err_printing = False
    else: 
        err_printing = True
        
    w = np.random.rand(collection.nombre_de_feature()) * 0.01
    for epoque in range(MaxEp):
        err = 0
        for _ in range(m):
            i = rd.randint(0, collection.nombre_de_data()-1)
            err += model.loss(w, collection.x[i], collection.y[i])
            w = w - eta(epoque)*model.gradient(w, collection.x[i], collection.y[i])
        if err_printing:
            print(f"Loss : {err}")
    return w
    
    
def SGD_with_error_printing(model, collection, MaxEp, m, eta):
    return SGD(model, collection, MaxEp, eta, m)


# Learning rate #   
def eta_001(t):
    return 0.01

def eta_01(t):
    return 0.1
#################

# Test de la fonction SGD
adaline = Model("Adaline")
regression_logistique = Model("Regression Logistique")
collection_et_logique = collection(x_ET_logique, y_ET_logique)
w = SGD(adaline, collection_et_logique, 100, eta_01)
w

array([-0.2922714 ,  0.42566849,  0.65849419])

Tourner les 2 modèles sur la collection de "ET logique".

In [352]:
# Training Adaline on "ET logique"
w_adaline = SGD(adaline, collection_et_logique, 100, eta_001)
print("Final weights for Adaline on 'ET logique':", w_adaline)

# Training Logistic Regression on "ET logique"
w_logistic = SGD_with_error_printing(regression_logistique, collection_et_logique, 100, 3, eta_001)
print("Final weights for Logistic Regression on 'ET logique':", w_logistic)

Final weights for Adaline on 'ET logique': [0.05376666 0.18279177 0.18097205]
Loss : 2.075848867120881
Loss : 2.097394701161613
Loss : 2.08179514723291
Loss : 2.0767976867242037
Loss : 2.071824819698938
Loss : 2.062951920113816
Loss : 2.0657660211472404
Loss : 2.052002858681609
Loss : 2.0883885955382873
Loss : 2.0564060045376893
Loss : 2.0288660564634795
Loss : 1.9938458636892509
Loss : 2.0461323829420666
Loss : 2.054949220713589
Loss : 1.9661941427518153
Loss : 2.0360138073707112
Loss : 1.9350743810653646
Loss : 2.0390923426037095
Loss : 1.904836335971243
Loss : 2.021688351567139
Loss : 2.017907628799755
Loss : 2.0134183224294393
Loss : 2.0306947812984273
Loss : 2.1525643453400254
Loss : 2.0127545162242306
Loss : 1.8594788163763627
Loss : 1.9894789395118186
Loss : 2.0058257090709466
Loss : 1.9910599897242507
Loss : 2.1113149307239665
Loss : 1.8575505006739803
Loss : 1.8528626978344072
Loss : 1.8204329084261954
Loss : 2.007934417012048
Loss : 1.8023869517714406
Loss : 2.163694799604449

Calculer le taux d'erreur de votre algorithme sur cette base (où une erreur est comptabilisé si la prédiction est plus proche de la fausse classe que de la vraie classe). 

In [353]:

def calculate_error_rate(model, collection, w, rate_printing=False):
    """
    Entree:
    model : model Adaline ou Regression Logistique
    collection : collection de données
    w : vecteur de poids
    
    Sortie:
    errors : taux d'erreur
    """
    errors = 0
    n = collection.nombre_de_data()
    for i in range(n):
        pred = model.prediction(w, collection.x[i])
        if rate_printing:
            print(f"P: {pred}, Y: {collection.y[i]}")
        if abs(pred - (1 - collection.y[i])) < abs(pred - collection.y[i]):
            errors += 1
    return errors / n

# Function to calculate error rate multiple times
def calculate_multiple_error_rates(model, train_collection, test_collection, num_iterations, eta, Maxep, error_rate_printing=False):
    """
    Calculer le taux d'erreur plusieurs fois
    
    Objectif:
    - Entrainer le modèle plusieurs fois
    - Calculer le taux d'erreur à chaque itération
    - Verifier la stabilité et la convergence de l'algorithme
    """

    if str(model) == 'Adaline':
        print("Training Adaline...") 
    elif str(model) == 'Regression Logistique':
        print("Training Regression Logistique...")
        
    error_rates = np.zeros(num_iterations)
    for _ in range(num_iterations):
        w = SGD(model, train_collection, Maxep, eta)
        error_rate = calculate_error_rate(model, test_collection, w)
        error_rates[_] = (error_rate)
        if error_rate_printing:
            print(f"Error rate: {error_rate}")
    print(error_rates)
    mean_error_rate = np.mean(error_rates)
    return mean_error_rate
    # return str(round((mean_error_rate * 100), 2)) + "%" 


# Calculate error rates for Adaline and Logistic Regression
num_iterations = 25

#####################################
# Calculate error rates for Adaline #
# calculate_multiple_error_rates(adaline, collection_et_logique, collection_et_logique, num_iterations, eta_001, 100, True)
# calculate_multiple_error_rates(adaline, collection_et_logique, collection_et_logique, num_iterations, eta_01, 100, True)

#################################################
# Calculate error rates for Logistic Regression #
# calculate_multiple_error_rates(regression_logistique, collection_et_logique, collection_et_logique, num_iterations, eta_001, 100, True)
# calculate_multiple_error_rates(regression_logistique, collection_et_logique, collection_et_logique, num_iterations, eta_01, 100, True)
# calculate_multiple_error_rates(regression_logistique, collection_et_logique, collection_et_logique, num_iterations, eta_01, 200, True)
# calculate_multiple_error_rates(regression_logistique, collection_et_logique, collection_et_logique, num_iterations, eta_01, 250, True)
# calculate_multiple_error_rates(regression_logistique, collection_et_logique, collection_et_logique, num_iterations, eta_01, 300, True)


## Partie 2: premiers tests avec une base de donnée réelle

<font color='red'><b>Question 3:</b></font> Nous allons maintenant nous intéresser au comportement de ces modèles sur la base SONAR de la collection UCI (http://archive.ics.uci.edu/ml/index.php). Cette base contient 208 exemples en dimension 60 séparés par `,` et la dernière élément correspond à la classe de l'exemple.

    1. Télécharger la collection avec la fonction read_table de la librairie pandas (https://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_table.html). Les options nécessaires sont `sep=','` et `header=None`  
    2. Créer une liste de listes correspondant à la collection; pour cela initialiser la première liste et en parcourant chaque ligne de la matrice de données; créer une liste associée en remplaçant le dernier élément par `0` ou `+1` et insérer la dans la première liste. 
    Indication: Utiliser la fonction `loc`. 
    3. Écrire une fonction qui génère deux listes de données `x_train` (75%) and `x_test` (25%) en la mélangeant aléatoirement au préalable (indication: on pourra utiliser les fonctions `shuffle` de la librairie `random` et `train_test_split` de la librairie `sklearn.model_selection`)
    

Codage la Structure dataFrame

In [354]:
class dataFrame:
    def __init__(self, df, target_column, class_type):
        if not isinstance(df, pd.DataFrame):
            raise ValueError("L'objet n'est pas un DataFrame")
        self.df = df
        self.target_column = target_column
        self.class_type = class_type
        self.num_data = self.df.shape[0]
        self.num_features = self.df.shape[1]
    
    def produce_collection(self):
        x = np.zeros((self.num_data, self.num_features - 1))
        y = np.zeros(self.num_data)
        
        features_idx = 0
        for i in range(self.num_features):
            if i == self.target_column:
                continue
            x[:, features_idx] = self.df[i]
            features_idx += 1
            
        if self.class_type == None:
            for i in range(self.num_data):
                y[i] = self.df[self.target_column][i]
        else:
            for i in range(self.num_data):
                if self.df[self.target_column][i] == self.class_type[0]: y[i] = 0
                elif self.df[self.target_column][i] == self.class_type[1]: y[i] = 1
                else: raise ValueError("Class type a plus de 2 classes")
        
        return collection(x, y)

Charger data frame

In [355]:
df = pd.read_table('Data/Sonar/sonar.all-data', sep = ',', header = None)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.0140,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.2280,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.0180,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.0100,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.0150,0.0085,0.0073,0.0050,0.0044,0.0040,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.0590,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.0110,0.0015,0.0072,0.0048,0.0107,0.0094,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,0.0187,0.0346,0.0168,0.0177,0.0393,0.1630,0.2028,0.1694,0.2328,0.2684,...,0.0116,0.0098,0.0199,0.0033,0.0101,0.0065,0.0115,0.0193,0.0157,M
204,0.0323,0.0101,0.0298,0.0564,0.0760,0.0958,0.0990,0.1018,0.1030,0.2154,...,0.0061,0.0093,0.0135,0.0063,0.0063,0.0034,0.0032,0.0062,0.0067,M
205,0.0522,0.0437,0.0180,0.0292,0.0351,0.1171,0.1257,0.1178,0.1258,0.2529,...,0.0160,0.0029,0.0051,0.0062,0.0089,0.0140,0.0138,0.0077,0.0031,M
206,0.0303,0.0353,0.0490,0.0608,0.0167,0.1354,0.1465,0.1123,0.1945,0.2354,...,0.0086,0.0046,0.0126,0.0036,0.0035,0.0034,0.0079,0.0036,0.0048,M


Fonction qui génère deux listes de données `x_train` (75%) and `x_test` (25%) en la mélangeant aléatoirement au préalable

In [356]:
# Fonction pour generer train et test random
def generate_train_test_random(self, test_size=0.25):
    data = list(zip(self.x, self.y))
    rd.shuffle(data)
    new_X = np.array([i for i, j in data])
    new_Y = np.array([j for i, j in data])
    x_train, x_test, y_train, y_test = train_test_split(new_X, new_Y, test_size=test_size)
    return collection(x_train, y_train), collection(x_test, y_test)

# Ajout de la fonction à la classe collection
collection.generate_train_test_random = generate_train_test_random

<font color='red'><b>Question 4:</b></font> Appliquer ces modèles sur cette base (on pourra prendre $MaxEp\approx1000$ et le pas d'apprentissage $\eta\approx0.1$) et en choisissant les bases Train et Test de façon aléatoire. Reporter l'erreur moyenne de ces modèles obtenues sur les exemples de donnés de "test"? 


In [357]:
MaxEp = 1000
sonar_data_frame = dataFrame(df, 60, ['R', 'M'])
sonar_collection = sonar_data_frame.produce_collection()

sonar_train, sonar_test = sonar_collection.generate_train_test_random()


calculate_multiple_error_rates(adaline, sonar_train, sonar_test, 1, eta_01, MaxEp, False),calculate_multiple_error_rates(regression_logistique, sonar_train, sonar_test, 1, eta_01, MaxEp, False)

Training Adaline...
[0.42307692]
Training Regression Logistique...
[0.25]


(0.4230769230769231, 0.25)

Refaire l'opération 3 fois avec trois randomisations différentes. 

In [358]:
for _ in range(3):
    print("\nIteration:", _+1)
    sonar_train, sonar_test = sonar_collection.generate_train_test_random()
    round(calculate_multiple_error_rates(adaline, sonar_train, sonar_test, 1, eta_01, MaxEp, True) * 100, 2) 
    round(calculate_multiple_error_rates(regression_logistique, sonar_train, sonar_test, 1, eta_01, MaxEp, True) * 100, 2)


Iteration: 1
Training Adaline...
Error rate: 0.38461538461538464
[0.38461538]
Training Regression Logistique...
Error rate: 0.28846153846153844
[0.28846154]

Iteration: 2
Training Adaline...
Error rate: 0.34615384615384615
[0.34615385]
Training Regression Logistique...
Error rate: 0.19230769230769232
[0.19230769]

Iteration: 3
Training Adaline...
Error rate: 0.4807692307692308
[0.48076923]
Training Regression Logistique...
Error rate: 0.21153846153846154
[0.21153846]


  | Collection | Adaline     | Régression Logistique |
  |------------|-------------|-----------------------|
  |   SONAR (réplica 1)   |$\approx45$%|$\approx27$%|
  |   SONAR (réplica 2)   |$\approx45$%|$\approx27$%|
  |   SONAR (réplica 3)   |$\approx45$%|$\approx27$%|


## Partie 3: normalisation

Nous allons étudier l'impact de la nomralisation sur les prédictions. Pour cela nous considérons deux stratégies de normalisation communément utilisées dans la littérature:
* Stratégie <i>max</i>: consiste à normaliser chaque caractéristique du vecteur réprésentatif d'une observation par la valeur maximale de cette caractéristiques
* Stratégie <i>norme</i>: consiste à normaliser chaque caractéristique du vecteur réprésentatif d'une observation par la norme de ce vecteur.

Nous considérons ces trois autres collections de la base UCI:

        * https://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+%28Diagnostic%29
        * https://archive.ics.uci.edu/ml/datasets/spambase
        * https://archive.ics.uci.edu/ml/datasets/ionosphere

        

In [359]:
# Importer dataset #
breast_cancer = dataFrame(pd.read_table('Data/BreastCancer/wdbc.data', header=None, sep=','), 1, ['B', 'M'])
ionosphere = dataFrame(pd.read_csv('Data/Ionosphere/ionosphere.data', header=None, sep=','), 34, ['g', 'b'])
spambase = dataFrame(pd.read_csv('Data/Spambase/spambase.data', header=None, sep=','), 57, None)


<font color='red'><b>Question 5:</b></font> Ecrire une fonction qui prend en entrée la collection des données et qui retourne la collections normalisée suivant les stratégies <i>max</i> et <i>norme</i>. 

- Choisir tous les colonnes contenant des données numériques et les convertir en type `float` pour é viter les problème de division.
-  <i>Max</i>: Diviser par la valeur maximale en valeur absolu
-  <i>Norme</i>: Norme L2 

In [360]:
def produce_normalized_collection(self, strategies):
    normalized_collection = self.df.copy()
    
    numeric_cols = normalized_collection.select_dtypes(include=[np.number]).columns
    
    numeric_df = normalized_collection[numeric_cols].astype(float).copy()
    
    if strategies == 'max':
        max_value = np.max(np.abs(numeric_df), axis=0)
        max_value[max_value == 0] = 1 
        numeric_df = numeric_df / max_value
    
    elif strategies == 'norme': 
        l2_norm = np.linalg.norm(numeric_df, ord=2, axis=0)
        l2_norm[l2_norm == 0] = 1  
        numeric_df = numeric_df / l2_norm
        
    else:
        raise ValueError("Invalid normalization strategy")
    
    normalized_collection[numeric_cols] = numeric_df
    normalized_df = dataFrame(normalized_collection, self.target_column, self.class_type)
    return normalized_df.produce_collection() 


dataFrame.produce_normalized_collection = produce_normalized_collection

<font color='red'><b>Question 6:</b></font> Compléter les tableaux comparatifs suivants en repertant les erreurs moyennes sur 20 lancements des modèles de l'Adaline et de la Régression Logistique et pour les trois cas:




In [361]:
num_iterations = 20
def mean_error(num_iterations, collection, debug=DEBUG):
    ada_err = np.zeros(num_iterations)  
    rel_err = np.zeros(num_iterations)
    for _ in range(num_iterations):
        if debug:
            print("\nIteration:", _+1)
        trains, tests = collection.generate_train_test_random()
        ada_err[_] = (calculate_multiple_error_rates(adaline, trains, tests, 1, eta_01, MaxEp, False))
        rel_err[_] = (calculate_multiple_error_rates(regression_logistique, trains, tests, 1, eta_01, MaxEp, False))
    return round(np.mean(ada_err) * 100, 2), round(np.mean(rel_err) * 100, 2)


print("Breast Cancer")
print(mean_error(num_iterations, breast_cancer.produce_collection()))
print("Ionosphere")
print(mean_error(num_iterations, ionosphere.produce_collection()))
print("Sonar")
print(mean_error(num_iterations, sonar_data_frame.produce_collection()))
print("Spambase")
print(mean_error(num_iterations, spambase.produce_collection()))

Breast Cancer

Iteration: 1
Training Adaline...
[0.]
Training Regression Logistique...
[0.34265734]

Iteration: 2
Training Adaline...
[0.]
Training Regression Logistique...
[0.63636364]

Iteration: 3
Training Adaline...
[0.]
Training Regression Logistique...
[0.37762238]

Iteration: 4
Training Adaline...
[0.]
Training Regression Logistique...
[0.66433566]

Iteration: 5
Training Adaline...
[0.]
Training Regression Logistique...
[0.38461538]

Iteration: 6
Training Adaline...
[0.]
Training Regression Logistique...


  return (self.prediction(w,x)-y)**2
  return w@x
  w = w - eta(epoque)*model.gradient(w, collection.x[i], collection.y[i])
  return 1/(1 + np.exp(-w@x))
  return 2*(self.prediction(w,x)-y)*x


[0.3986014]

Iteration: 7
Training Adaline...
[0.]
Training Regression Logistique...
[0.40559441]

Iteration: 8
Training Adaline...
[0.]
Training Regression Logistique...
[0.59440559]

Iteration: 9
Training Adaline...
[0.]
Training Regression Logistique...
[0.34965035]

Iteration: 10
Training Adaline...
[0.]
Training Regression Logistique...
[0.43356643]

Iteration: 11
Training Adaline...
[0.]
Training Regression Logistique...
[0.58741259]

Iteration: 12
Training Adaline...
[0.]
Training Regression Logistique...
[0.3986014]

Iteration: 13
Training Adaline...
[0.]
Training Regression Logistique...
[0.60839161]

Iteration: 14
Training Adaline...
[0.]
Training Regression Logistique...
[0.34965035]

Iteration: 15
Training Adaline...
[0.]
Training Regression Logistique...
[0.37762238]

Iteration: 16
Training Adaline...
[0.]
Training Regression Logistique...
[0.65034965]

Iteration: 17
Training Adaline...
[0.]
Training Regression Logistique...
[0.32867133]

Iteration: 18
Training Adaline...


  return 2*(self.prediction(w,x)-y)*x
  return w@x


[0.43874891]

Iteration: 6
Training Adaline...
[0.]
Training Regression Logistique...
[0.56559513]

Iteration: 7
Training Adaline...
[0.]
Training Regression Logistique...
[0.55864466]

Iteration: 8
Training Adaline...
[0.]
Training Regression Logistique...
[0.37880104]

Iteration: 9
Training Adaline...
[0.]
Training Regression Logistique...
[0.54126846]

Iteration: 10
Training Adaline...
[0.]
Training Regression Logistique...
[0.5534318]

Iteration: 11
Training Adaline...
[0.]
Training Regression Logistique...
[0.36663771]

Iteration: 12
Training Adaline...
[0.]
Training Regression Logistique...
[0.59165943]

Iteration: 13
Training Adaline...
[0.]
Training Regression Logistique...
[0.47958297]

Iteration: 14
Training Adaline...
[0.]
Training Regression Logistique...


  return 2*(self.prediction(w,x)-y)*x


[0.4378801]

Iteration: 15
Training Adaline...
[0.]
Training Regression Logistique...
[0.58992181]

Iteration: 16
Training Adaline...
[0.]
Training Regression Logistique...
[0.57775847]

Iteration: 17
Training Adaline...
[0.]
Training Regression Logistique...
[0.47697654]

Iteration: 18
Training Adaline...
[0.]
Training Regression Logistique...
[0.41268462]

Iteration: 19
Training Adaline...
[0.]
Training Regression Logistique...
[0.37271937]

Iteration: 20
Training Adaline...
[0.]
Training Regression Logistique...
[0.43266725]
(0.0, 49.42)



 '*' Les vecteurs ne sont pas normalisés
     
  | Collection |   Adaline   |  Régression Logistique |
  |------------|-------------|------------------------|
  |   BREAST   |             |                        |
  |   IONO     |             |                        |
  |   SONAR    |             |                        |
  |   SPAM     |             |                        |


In [368]:
# Normalisation avec strategie norme #
# print("Breast Cancer")
# print(mean_error(num_iterations, breast_cancer.produce_normalized_collection('norme')))
print("Ionosphere")
print(mean_error(num_iterations, ionosphere.produce_normalized_collection('max')))
print("Sonar")
print(mean_error(num_iterations, sonar_data_frame.produce_normalized_collection('max')))
# print("Spambase")
# print(mean_error(num_iterations, spambase.produce_normalized_collection('max')))

Ionosphere

Iteration: 1
Training Adaline...
[0.]
Training Regression Logistique...
[0.17045455]

Iteration: 2
Training Adaline...
[0.]
Training Regression Logistique...
[0.125]

Iteration: 3
Training Adaline...
[0.]
Training Regression Logistique...
[0.18181818]

Iteration: 4
Training Adaline...
[0.]
Training Regression Logistique...
[0.125]

Iteration: 5
Training Adaline...
[0.]
Training Regression Logistique...
[0.13636364]

Iteration: 6
Training Adaline...
[0.]
Training Regression Logistique...
[0.18181818]

Iteration: 7
Training Adaline...
[0.]
Training Regression Logistique...
[0.20454545]

Iteration: 8
Training Adaline...
[0.]
Training Regression Logistique...
[0.18181818]

Iteration: 9
Training Adaline...
[0.]
Training Regression Logistique...
[0.15909091]

Iteration: 10
Training Adaline...
[0.]
Training Regression Logistique...
[0.15909091]

Iteration: 11
Training Adaline...
[0.]
Training Regression Logistique...
[0.14772727]

Iteration: 12
Training Adaline...
[0.]
Training Re

 
 $^n$ Normalisation suivant la stratégie <i>norme</i>
     
  | Collection |   Adaline   |  Régression Logistique |
  |------------|-------------|------------------------|
  |   BREAST   |             |                        |
  |   IONO     |             |                        |
  |   SONAR    |             |                        |
  |   SPAM     |             |                        |

  


In [372]:
# print("Breast Cancer")
# print(mean_error(num_iterations, breast_cancer.produce_normalized_collection('max')))
# print("Ionosphere")
print(mean_error(num_iterations, ionosphere.produce_normalized_collection('max')))
# print("Sonar")
print(mean_error(num_iterations, sonar_data_frame.produce_normalized_collection('max')))
# print("Spambase")
# print(mean_error(num_iterations, spambase.produce_normalized_collection('max')))


Iteration: 1
Training Adaline...
[0.]
Training Regression Logistique...
[0.14772727]

Iteration: 2
Training Adaline...
[0.]
Training Regression Logistique...
[0.13636364]

Iteration: 3
Training Adaline...
[0.]
Training Regression Logistique...
[0.19318182]

Iteration: 4
Training Adaline...
[0.]
Training Regression Logistique...
[0.19318182]

Iteration: 5
Training Adaline...
[0.]
Training Regression Logistique...
[0.23863636]

Iteration: 6
Training Adaline...
[0.]
Training Regression Logistique...
[0.125]

Iteration: 7
Training Adaline...
[0.]
Training Regression Logistique...
[0.21590909]

Iteration: 8
Training Adaline...
[0.]
Training Regression Logistique...
[0.22727273]

Iteration: 9
Training Adaline...
[0.]
Training Regression Logistique...
[0.20454545]

Iteration: 10
Training Adaline...
[0.]
Training Regression Logistique...
[0.10227273]

Iteration: 11
Training Adaline...
[0.]
Training Regression Logistique...
[0.15909091]

Iteration: 12
Training Adaline...
[0.]
Training Regressi

 $^m$ Normalisation suivant la stratégie <i>max</i>
    
  | Collection |   Adaline   |  Régression Logistique |
  |------------|-------------|------------------------|
  |   BREAST   |             |                        |
  |   IONO     |             |                        |
  |   SONAR    |             |                        |
  |   SPAM     |             |                        |
