In [29]:
import tensorflow as tf
from tensorflow.keras import datasets
import numpy as np
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,accuracy_score
from sklearn.model_selection import train_test_split

## Chargez les données en uttilisant tensorflow.keras

In [30]:
def get_cifar10_data():
    
    #Importation des données
    (x_train , y_train) , (x_test, y_test) = datasets.cifar10.load_data()

    #Changer la dimension du train et test set
    ## xtrain et xtest
    xtrain = x_train.reshape(50000, 3072)
    xtest = x_test.reshape(10000,3072)

    ## ytrain et ytest
    ytrain  = y_train.reshape(50000)
    ytest = y_test.reshape(10000)

    ## normalisation des données
    xtrain = xtrain/255.
    xtest = xtest/255.

    print("Dimension du l'ensemble d'apprentissage :",xtrain.shape,"\nDimension du l'ensemble de test :",xtest.shape)
   
    return xtrain,xtest, ytrain, ytest

In [31]:
xtrain, xtest, ytrain, ytest = get_cifar10_data()


Dimension du l'ensemble d'apprentissage : (50000, 3072) 
Dimension du l'ensemble de test : (10000, 3072)


## Fonction pour choisir les classes

In [32]:
def choose_classe(indice, xtrain, xtest, ytrain, ytest):
     
    # Determiner l'indice de chaque classe
    index_classe1_train = np.where(ytrain==indice[0])[0]
    index_classe2_train = np.where(ytrain==indice[1])[0]
    index_classe1_test = np.where(ytest==indice[0])[0]
    index_classe2_test = np.where(ytest==indice[1])[0]

    # Merger les indices de deux classes dans un tableau array
    classes_train = np.concatenate((index_classe1_train,index_classe2_train))
    classes_test = np.concatenate((index_classe1_test,index_classe2_test))

    # filtrer l'ensemble d'apprentissage xtrain et l'ensemble de test 
    x_train = xtrain[classes_train]
    x_test = xtest[classes_test]

    y_train = ytrain[classes_train]
    y_test = ytest[classes_test]

    # merger les deux partie en un seule dataset "dataX"
    dataX = np.concatenate((x_train,x_test))

    dataY = np.concatenate((y_train,y_test))

    # la dimension du dataset finale
    print("La dimension du dataset finale est : ",dataX.shape)
    return dataX, dataY

In [33]:
dataX, dataY= choose_classe(indice=(7,8),xtrain=xtrain, xtest=xtest, ytrain=ytrain,ytest=ytest)

La dimension du dataset finale est :  (12000, 3072)


## Fonction pour diviser les données

In [34]:
def split_data():
    dataX, dataY= choose_classe(indice=(7,8),xtrain=xtrain, xtest=xtest, ytrain=ytrain,ytest=ytest)
       
    # train représente désormais 70% de l'ensemble des données
    X_train, X_test, Y_train, Y_test = train_test_split(dataX, dataY, test_size=0.3)
    
    # test représente désormais 15% de l'ensemble des données
    # validation représente désormais 15% de l'ensemble des données
    X_val, X_test, Y_val, Y_test = train_test_split(X_test, Y_test, test_size=0.5) 
    
    return X_train, Y_train, X_val, Y_val, X_test, Y_test

In [35]:
X_train, Y_train, X_val, Y_val, X_test, Y_test=split_data()

La dimension du dataset finale est :  (12000, 3072)


In [36]:
X_train.shape

(8400, 3072)

In [37]:
X_test.shape

(1800, 3072)

In [38]:
X_val.shape

(1800, 3072)

## Prédiction avec Random Forest

In [39]:
## Training
rf = RandomForestClassifier()
rf.fit(X_train, Y_train)

RandomForestClassifier()

In [40]:
## Predicting
y_pred_rf = rf.predict(X_test)
print(classification_report(Y_test,y_pred_rf))

              precision    recall  f1-score   support

           7       0.91      0.92      0.91       898
           8       0.92      0.91      0.91       902

    accuracy                           0.91      1800
   macro avg       0.91      0.91      0.91      1800
weighted avg       0.91      0.91      0.91      1800



## Prédiction avec KNN

In [41]:
## Training 
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, Y_train)

KNeighborsClassifier()

In [42]:
## Predicting
y_pred_knn = knn.predict(X_test)
print(classification_report(Y_test,y_pred_knn))

              precision    recall  f1-score   support

           7       0.95      0.73      0.83       898
           8       0.78      0.96      0.86       902

    accuracy                           0.85      1800
   macro avg       0.87      0.85      0.84      1800
weighted avg       0.87      0.85      0.84      1800



## Prédiction avec Logistic Regression

In [43]:
## Training 
lr = LogisticRegression(max_iter=1000)
lr.fit(X_train, Y_train)

LogisticRegression(max_iter=1000)

In [44]:
## Predicting
y_pred_lr = lr.predict(X_test)
print(classification_report(Y_test,y_pred_lr))

              precision    recall  f1-score   support

           7       0.90      0.86      0.88       898
           8       0.87      0.90      0.88       902

    accuracy                           0.88      1800
   macro avg       0.88      0.88      0.88      1800
weighted avg       0.88      0.88      0.88      1800



## Prédiction avec SVM

In [45]:
## Training
svc = svm.SVC()
svc.fit(X_train, Y_train)

SVC()

In [46]:
## Predicting 
y_pred_svm = svc.predict(X_test)
print(classification_report(Y_test,y_pred_svm))

              precision    recall  f1-score   support

           7       0.95      0.93      0.94       898
           8       0.93      0.95      0.94       902

    accuracy                           0.94      1800
   macro avg       0.94      0.94      0.94      1800
weighted avg       0.94      0.94      0.94      1800



## Comparaison entre divers classificateurs

In [47]:
print("RandomForestClassifier Accuracy:    ", round( accuracy_score(Y_test, y_pred_rf),2))
print("K Nearest Neighbors Accuracy:       ", round(accuracy_score(Y_test, y_pred_knn),2))
print("Logistic Regression Accuracy:       ", round(accuracy_score(Y_test, y_pred_lr),2))
print("Support Vector Classifier Accuracy: ", round(accuracy_score(Y_test, y_pred_svm),2))

RandomForestClassifier Accuracy:     0.91
K Nearest Neighbors Accuracy:        0.85
Logistic Regression Accuracy:        0.88
Support Vector Classifier Accuracy:  0.94


## Appliquons le classificateur choisi avec l'ensemble de validation

In [48]:
round(svc.score(X_val,Y_val),2)

0.93