In [23]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import cv2 as cv

from skimage.feature import hog 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF, Matern
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from sklearn import metrics

import os
import shutil as sh

sns.set_theme(style="darkgrid")

## Modèle 1 : binaire entre chaque dessin

### Préparation du jeux de données

In [79]:
def create_dataset1(source_dir, dest_dir) :
    myfolder = os.listdir(source_dir)
    for folder in myfolder : 
        path2 = os.path.join(source_dir,folder)
        classes = os.listdir(path2)
        for classe in classes : 
            path3 = os.path.join(path2,classe)
            myimages = os.listdir(path3)
            myclass = source_dir+"_"+classe
            for image in myimages : 
                all_path = os.path.join(path3,image)
                path_dest1 = os.path.join(dest_dir,folder)
                path_dest_final = os.path.join(path_dest1,myclass)
                if not os.path.isdir(path_dest_final) : 
                    os.makedirs(path_dest_final) # si il n'existe pas il le crée
                sh.copy(all_path, path_dest_final)

In [98]:
spiral_path = "spiral"
wave_path = "wave"
dest_path_spiral = "Dataset_bin_spiral"
dest_path_wave = "Dataset_bin_wave"
create_dataset1(spiral_path,dest_path_spiral)
create_dataset1(wave_path,dest_path_wave)

### Chargement et binarisation des images

In [2]:
def loadimages(source_dir) :
    myimages = list()
    mylabels = list()
    myclasses = os.listdir(source_dir)
    for classe in myclasses :
        mypath = os.path.join(source_dir,classe)
        mesimages = os.listdir(mypath)
        images_path = [os.path.join(mypath,x) for x in mesimages]
        for path in images_path :
            test_image = cv.cvtColor(cv.imread(path), cv.COLOR_BGR2GRAY)
            histo_test = cv.calcHist([test_image], channels=[0], mask=None,histSize=[256], ranges=[0, 256])
            hist_argmax = histo_test.argmax()
            test_image2 = test_image.copy()
            test_image_filtre1 = test_image2>hist_argmax-30
            test_image_filtre2 = test_image2<=hist_argmax-30
            test_image[~test_image_filtre1] = 0
            test_image[~test_image_filtre2] = 255
            test_image = cv.resize(test_image,(227,512))
            myimages.append(test_image)
            mylabels.append(classe)
    return myimages, mylabels

## 1) Wave

In [3]:
train_path = "./Dataset_bin_wave/training/"
test_path = "./Dataset_bin_wave/testing/"
train_images, train_labels = loadimages(train_path)
test_images, test_labels = loadimages(test_path)

### HoG

In [4]:
def hog_list (listimage) :
    fv = []
    hog_image = []
    for i in range(len(listimage)) :
        a, b = hog(listimage[i], orientations=9, pixels_per_cell=(8, 8),
                	cells_per_block=(2, 2), visualize=True)
        fv.append(a)
        hog_image.append(b)
    return pd.DataFrame(fv),hog_image

In [5]:
features_train, hog_images_train = hog_list(train_images)
features_test, hog_images_test = hog_list(test_images)

### Classifieur

In [26]:
def allclassifier(features_train, train_labels, features_test, test_labels) : 
    # KNN
    parameters = {'n_neighbors':np.arange(1,20,1), 'p' : np.arange(1,3,1)}
    knn=KNeighborsClassifier()
    clf = GridSearchCV(knn, parameters)
    clf.fit(features_train,train_labels)
    knn=KNeighborsClassifier(**clf.best_params_)
    knn.fit(features_train,train_labels)
    yknn=knn.predict(features_test)
    print("======================================================KNN======================================================")
    print(clf.best_params_)
    print("Accuracy : ",metrics.accuracy_score(test_labels, yknn))
    print(metrics.classification_report(test_labels, yknn))

    # SVM
    parameters = {'degree':np.arange(1,12,1)}
    svm=SVC(kernel='poly')
    clf = GridSearchCV(svm, parameters)
    clf.fit(features_train,train_labels)
    svm=SVC(**clf.best_params_)
    svm.fit(features_train,train_labels)
    ysvm=svm.predict(features_test)
    print("======================================================SVM======================================================")
    print(clf.best_params_)
    print("Accuracy : ",metrics.accuracy_score(test_labels, ysvm))
    print(metrics.classification_report(test_labels, ysvm))

    # Random Forest
    forest=RandomForestClassifier(n_estimators=73, criterion='entropy', random_state=123)
    forest.fit(features_train,train_labels)
    yforest=forest.predict(features_test)
    print("=================================================Random Forest=================================================")
    print("Accuracy : ",metrics.accuracy_score(test_labels, yforest))
    print(metrics.classification_report(test_labels, yforest))

    # SGD
    # parameters = {'los':['log_loss','squared_hinge','perceptron'], 'penalty':['l1','elasticnet']}
    # sgd = SGDClassifier()
    # clf = GridSearchCV(sgd, parameters)
    # clf.fit(features_train,train_labels)
    # sgd=SGDClassifier(**clf.best_params_)
    # sgd.fit(features_train,train_labels)
    # ysgd=sgd.predict(features_test)
    # print("======================================================SGD======================================================")
    # print(clf.best_params_)
    # print("Accuracy : ",metrics.accuracy_score(test_labels, ysgd))
    # print(metrics.classification_report(test_labels, ysgd))

    # Gaussian Process
    rbf = 1.0*RBF(1.3)
    matern = Matern()
    parameters = {'kernel':[rbf,matern]}
    gpc =GaussianProcessClassifier()
    clf = GridSearchCV(gpc, parameters)
    clf.fit(features_train,train_labels)
    gpc=GaussianProcessClassifier(**clf.best_params_)
    gpc.fit(features_train,train_labels)
    ygpc = gpc.predict(features_test)
    print("======================================================GPC======================================================")
    print(clf.best_params_)
    print("Accuracy : ",metrics.accuracy_score(test_labels, ygpc))
    print(metrics.classification_report(test_labels, ygpc))

    # Naive Bayes
    nbc = GaussianNB()
    nbc.fit(features_train,train_labels)
    ynbc = nbc.predict(features_test)
    print("==================================================Naive Bayes==================================================")
    print("Accuracy : ",metrics.accuracy_score(test_labels, ynbc))
    print(metrics.classification_report(test_labels, ynbc))

    # XGBoost
    xgb = XGBClassifier()
    xgb.fit(features_train,train_labels)
    yxgb = xgb.predict(features_test)
    print("====================================================XGBoost====================================================")
    print(clf.best_params_)
    print("Accuracy : ",metrics.accuracy_score(test_labels, yxgb))
    print(metrics.classification_report(test_labels, yxgb))

    return yknn, ysvm, yforest, ygpc, ynbc, yxgb

In [27]:
yknn, ysvm, yforest, ygpc, ynbc, yxgb = allclassifier(features_train, train_labels, features_test, test_labels)