<a href="https://colab.research.google.com/github/Tianarandr/python-for-datascience/blob/master/LDA_QDA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#FONCTION MATRICE DE CONFUSION, PRECISION, RAPPEL, F-MESURE
import numpy as np
#fonction sous Python qui permet de calculer la matrice de confusion à partir de deux vecteurs 𝑦 et y_estime
def comp_confmat(actuel, predicted):
    classes = np.unique(actuel) # extraire les differentes classes
    matrix = np.zeros((len(classes), len(classes))) # initializer une matrice avec des zeros
    
    for i in range(len(classes)):
        for j in range(len(classes)):
            matrix[i, j] = np.sum((actuel == classes[i]) & (predicted == classes[j]))

    return matrix

# une fonction qui calcule la précision globale.
def precision_globale(matriceDeConfusion):
    (m,n) = matriceDeConfusion.shape
    somme = matriceDeConfusion.sum()
    P = 0
    
    somme_par_row_p = 0
    precision_classe_arr = []
    somme_par_row = matriceDeConfusion.sum(1)
    
    
    for row in range(m):
        for col in range(n):
            if row == col:
                P += matriceDeConfusion[row][col]
                somme_par_row_p += matriceDeConfusion[row][col]
        
        precision_classe =  somme_par_row_p/somme_par_row[row]
        precision_classe_arr.append(precision_classe)
        somme_par_row_p = 0
    
    precision_globale = P/somme
    return precision_globale

# une fonction qui permet de calculer le rappel globale
def rappel_globale(matriceDeConfusion):
    (m,n) = matriceDeConfusion.shape
    somme = matriceDeConfusion.sum()
    P = 0
    
    somme_par_col = 0
    rappel_classe_arr = []
    somme_par_col = matriceDeConfusion.sum(0)
    
    for col in range(n):
        for row in range(m):
            if row == col:
                P += matriceDeConfusion[row][col]
                VP = matriceDeConfusion[row][col]
        
        rappel_classe =  VP/somme_par_col[col]
        rappel_classe_arr.append(rappel_classe)
        somme_par_row_p = 0
    
    rappel_globale = P/somme
    return rappel_globale

# une fonction pour calculer la F-mesure globale.
def f_mesure_globale(matrice_conf):
    precision = precision_globale(matrice_conf)
    rappel = rappel_globale(matrice_conf)
    return 2 * ((precision * rappel)/ (precision + rappel))



In [None]:
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn import metrics
from sklearn.metrics import classification_report, confusion_matrix, precision_score, f1_score, recall_score, accuracy_score
from sklearn import datasets
from numpy.random import randint
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from numpy.linalg import multi_dot, inv, det



def LDA_ALGO(X, y_train):
    classes = list(np.unique(y_train)) 
    estimates = [] 
    for c in classes:
        estimate = []
        estimate.append(c)
        indices_of_rows = np.where(np.isin(y_train,c))
        X_train_subset = X[indices_of_rows]
        pi = float(len(X_train_subset))/float(len(X))
        estimate.append(pi)
        mean = (np.sum(X_train_subset,axis=0) / float(len(X_train_subset))).reshape(-1,1)
        estimate.append(mean)
        def take_cov(row,mean):
            return (row.reshape(-1,1) - mean).dot((row.reshape(-1,1) - mean).T)
        variance = (1./(len(X_train_subset) - len(classes))) * (sum([take_cov(row,mean) for row in X_train_subset]))
        estimate.append(np.absolute(variance))
        estimates.append(tuple(estimate))
    variance = sum([estimate[3] for estimate in estimates])
    variance_estimate = variance
    probabilities = []
    for estimate in estimates:
        pi = estimate[1]
        mean = estimate[2]
        sigma_inv = inv(variance_estimate) 
        prob = multi_dot([X,sigma_inv,mean]) - (.5 * multi_dot([mean.T,sigma_inv,mean])) + np.log(pi)
        probabilities.append(prob)
    indices_of_highest_prob = np.argmax(np.concatenate(probabilities,axis=1),axis=1)

    def predict_class(index):
        return estimates[index][0] #l'indexe 0 de estimates contient la classe
    predict_class_vec = np.vectorize(predict_class)
    predictions = predict_class_vec(indices_of_highest_prob)
    return predictions

def QDA_ALGO(X,y_train):
    classes = list(np.unique(y_train)) 
    estimates = [] 
    for c in classes:
        estimate = []
        estimate.append(c)
        indices_of_rows = np.where(np.isin(y_train,c))
        X_train_subset = X[indices_of_rows]
        pi = float(len(X_train_subset))/float(len(X))
        estimate.append(pi)
        mean = (np.sum(X_train_subset,axis=0) / float(len(X_train_subset))).reshape(-1,1)
        estimate.append(mean)
        def take_cov(row,mean):
            return (row.reshape(-1,1) - mean).dot((row.reshape(-1,1) - mean).T)
        variance = (1./(len(X_train_subset) - len(classes))) * (sum([take_cov(row,mean) for row in X_train_subset]))
        estimate.append(np.absolute(variance))
        estimates.append(tuple(estimate))
    variance = sum([estimate[3] for estimate in estimates])
    probabilities = []
    for estimate in estimates:
        pi = estimate[1]
        mean = estimate[2]
        variance = estimate[3]
        log_variance = np.log(variance)
        sigma_inv = inv(log_variance)  
        probs = []
        for row in X:
            x = row.reshape(-1,1)
            prob = (-.5 * multi_dot([(x-mean).T,(sigma_inv),(x-mean)])[0][0]) - (.5 * np.log(np.absolute(det(log_variance)))) + np.log(pi)
            probs.append(prob)
        probabilities.append(np.array(probs).reshape(-1,1))
    indices_of_highest_prob = np.argmax(np.concatenate(probabilities, axis=1), axis=1)
    
    def predict_class(index):
        return estimates[index][0] 
    predict_class_vec = np.vectorize(predict_class)
    predictions = predict_class_vec(indices_of_highest_prob)
    return predictions

In [None]:

def main():
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.2, random_state=5)
    # LDA SANS BIBLIOTHEQUE
    results = LDA_ALGO(X_test, y_test)
    matrice_conf = comp_confmat(y_test,results)
    precision = precision_globale(matrice_conf)
    recall = rappel_globale(matrice_conf)
    fscore = f_mesure_globale(matrice_conf)
    accuracy = accuracy_score(y_test, results)
    print("IMPLEMENTATION ALGORITHME LDA SANS BIBLIOTHEQUE:")
    print("Taux de bonne classification :", accuracy)
    print("La précision :", precision)
    print("La rappel :", recall)
    print("F-mesure :", fscore)
    print("Matrice de confusion LDA: \n", matrice_conf)


    # QDA SANS BIBLIOTHEQUE
    results = QDA_ALGO(X_test, y_test)
    matrice_conf = comp_confmat(y_test,results)
    precision = precision_globale(matrice_conf)
    recall = rappel_globale(matrice_conf)
    fscore = f_mesure_globale(matrice_conf)
    accuracy = accuracy_score(y_test, results)
    print("\n")
    print("IMPLEMENTATION ALGORITHME QDA  SANS BIBLIOTHEQUE:")
    print("Taux de bonne classification :", accuracy)
    print("La précision :", precision)
    print("La rappel :", recall)
    print("F-mesure :", fscore)
    #2.3 Matrice de confusion sans bibliothèque
    print("Matrice de confusion QDA: \n", matrice_conf)

    #UTILISATION D'UNE BIBLIOTHEQUE
    #2.5 Refaire 2.2 et 2.3 en utilisant une bibliothèque existante
    print("\n")
    print("UTILISANT UNE BIBLIOTHEQUE LDA :")
    classifier_bibil = LinearDiscriminantAnalysis() 
    classifier_bibil.fit(X_train, y_train)
    results_b = classifier_bibil.predict(X_test)
    # METHODE DE VALIDATION CROISÉE AVEC BIBLIOTHEQUE(10-fold)
    cv = KFold(n_splits=10, random_state=1, shuffle=True)
    precision = cross_val_score(classifier_bibil, X_train, y_train, cv=cv, scoring='precision_micro').mean()
    recall= cross_val_score(classifier_bibil, X_train, y_train, cv=cv, scoring='recall_micro').mean()
    f1 = cross_val_score(classifier_bibil, X_train, y_train, cv=cv, scoring='f1_micro').mean()
    accuracy = cross_val_score(classifier_bibil, X_train, y_train, cv=cv, scoring='accuracy').mean()
    print("Taux de bonne classification :", accuracy)
    print("Precision :", precision)
    print("Rapelle :", recall)
    print("F-mesure :", f1)
    y_pred=classifier_bibil.predict(X_test)
    print("Matrice de confusion : \n",confusion_matrix(y_test,results_b)) 
    

    #UTILISATION D'UNE BIBLIOTHEQUE
    #2.5 Refaire 2.2 et 2.3 en utilisant une bibliothèque existante
    print("\n")
    print("UTILISANT UNE BIBLIOTHEQUE QDA :")
    classifier_bibil = QuadraticDiscriminantAnalysis() 
    classifier_bibil.fit(X_train, y_train)
    results_b = classifier_bibil.predict(X_test)
    # METHODE DE VALIDATION CROISÉE AVEC BIBLIOTHEQUE(10-fold)
    cv = KFold(n_splits=10, random_state=1, shuffle=True)
    precision = cross_val_score(classifier_bibil, X_train, y_train, cv=cv, scoring='precision_micro').mean()
    recall= cross_val_score(classifier_bibil, X_train, y_train, cv=cv, scoring='recall_micro').mean()
    f1 = cross_val_score(classifier_bibil, X_train, y_train, cv=cv, scoring='f1_micro').mean()
    accuracy = cross_val_score(classifier_bibil, X_train, y_train, cv=cv, scoring='accuracy').mean()
    print("Taux de bonne classification :", accuracy)
    print("Precision :", precision)
    print("Rapelle :", recall)
    print("F-mesure :", f1)
    y_pred=classifier_bibil.predict(X_test)
    print("Matrice de confusion: \n",confusion_matrix(y_test,results_b)) 

main()

IMPLEMENTATION ALGORITHME LDA SANS BIBLIOTHEQUE:
Taux de bonne classification : 0.9666666666666667
La précision : 0.9666666666666667
La rappel : 0.9666666666666667
F-mesure : 0.9666666666666667
Matrice de confusion LDA: 
 [[ 8.  0.  0.]
 [ 0. 10.  1.]
 [ 0.  0. 11.]]


IMPLEMENTATION ALGORITHME QDA  SANS BIBLIOTHEQUE:
Taux de bonne classification : 0.8666666666666667
La précision : 0.8666666666666667
La rappel : 0.8666666666666667
F-mesure : 0.8666666666666667
Matrice de confusion QDA: 
 [[ 8.  0.  0.]
 [ 0.  7.  4.]
 [ 0.  0. 11.]]


UTILISANT UNE BIBLIOTHEQUE LDA :
Taux de bonne classification : 0.9916666666666666
Precision : 0.9916666666666666
Rapelle : 0.9916666666666666
F-mesure : 0.9916666666666666
Matrice de confusion : 
 [[ 8  0  0]
 [ 0 10  1]
 [ 0  1 10]]


UTILISANT UNE BIBLIOTHEQUE QDA :
Taux de bonne classification : 0.9833333333333332
Precision : 0.9833333333333332
Rapelle : 0.9833333333333332
F-mesure : 0.9833333333333332
Matrice de confusion: 
 [[ 8  0  0]
 [ 0 10  1]
 