In [1]:
#imporation des bibiotheques necessaires
import cv2
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
import imutils
import glob

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix,accuracy_score


from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

#ignore les messages d'erreurs
import warnings
warnings.filterwarnings('ignore')

In [4]:
#fonction qui extrait les 2 primeirs moments (mean et std) de chaque composante R,G,B d'une image

def getMoments(image):
    
    #extraction des composants RGB de l'image
    R=image[:,:,0]
    G=image[:,:,1]
    B=image[:,:,2]
    
    #calcule des moyennes et des ecart-type de chaque composants
    colorFeature=[np.round(R.mean()),np.round(R.std()),np.round(G.mean()),np.round(G.std()),
                  np.round(B.mean()),np.round(B.std())]
    
    #normalisation
    colorFeature=colorFeature/np.array(colorFeature).mean()
    
    return colorFeature

In [5]:
#fonction qui quantifie l'histogramme dans un espace couleur hsv en 8x2x2 cases identiques
#L'Histogramme dans l'espace de couleur HSV est obtenu utilisant une quantification par niveau:
# 8 pour H(hue), 2 pour S(saturation), et 2 pour V(Value).
# Le vecteur descripteur de taille 1x32 est calculé puis normalisé

def hsvHistogram(image):
    
    #convertir l'image RGB en HSV
    image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    
    

    bins=(8,2,2)
    
    hist = cv2.calcHist([image], [0, 1, 2],None, bins,[0, 256, 0, 256, 0, 256])
    
    #normalisation des histogrammes de couleur afin que chaque histogramme soit représenté par le nombre de pourcentages 
    #relatifs pour un groupe particulier et non par le nombre entier pour chaque groupe.La normalisation garantira que les images 
    #ayant un contenu similaire mais des dimensions radicalement différentes seront toujours «similaires» une fois que nous
    #aurons appliqué notre fonction de similarité
    
    if imutils.is_cv2():
        hist = cv2.normalize(hist).flatten()# otherwise handle for OpenCV 3+
    else:
        hist = cv2.normalize(hist, hist).flatten()
        # return the histogram

    return hist


    bins=(8,2,2)
    
    hist = cv2.calcHist([image], [0, 1, 2],None, bins,[0, 256, 0, 256, 0, 256])
    
    #normalisation des histogrammes de couleur afin que chaque histogramme soit représenté par le nombre de pourcentages 
    #relatifs pour un groupe particulier et non par le nombre entier pour chaque groupe.La normalisation garantira que les images 
    #ayant un contenu similaire mais des dimensions radicalement différentes seront toujours «similaires» une fois que nous
    #aurons appliqué notre fonction de similarité
    
    if imutils.is_cv2():
        hist = cv2.normalize(hist).flatten()# otherwise handle for OpenCV 3+
    else:
        hist = cv2.normalize(hist, hist).flatten()
        # return the histogram

    return hist


In [6]:


from skimage.feature import greycomatrix, greycoprops

#fonction qui extrait les caracteristique de la matrice GLCM
def textureFeature(image):
    
    lf = []
    
    img=cv2.cvtColor(image,cv2.COLOR_RGB2GRAY)
    
    props = ['contrast', 'correlation', 'homogeneity', 'energy',]

    # left nearest neighbor
    glcm = greycomatrix(img, [1], [0], 256, symmetric=True, normed=True)
    
    for f in props:
        lf.append( greycoprops(glcm, f)[0,0] )
        
    
    lf=np.asarray(lf)
   
    
    #amenons dans la même echelle les valeurs du vecteurs avec la formule : H(i)=-sign(H(i))*log(H(i))
    for i in range(lf.size):
        lf[i] = -1*np.copysign(1.0,lf[i]) * np.log10(abs(lf[i]))
        
    return lf

In [7]:
#fonction qui extrait les caracteristique de la forme c.a.d les moments de Hu

def shapeFeatures(image):
    
    #convertisse l'image en image de gris
    
    img=cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    
    
    #Calculer les moments de Hu
    moments = cv2.moments(img)
    
    huMoments = cv2.HuMoments(moments)
    
    #les moments de Hu peuvent etre dans des echelles differents,une transformation de log donnée ci-dessous peut
    #les amener dans la même echelle : H(i)=-sign(H(i))*log(H(i))
    
    
    for i in range(0,7):
        huMoments[i] = -1*np.copysign(1.0,huMoments[i]) * np.log10(abs(huMoments[i]))
    
    #redimension 
    huMoments.reshape(7,)
    return huMoments
    

In [47]:
#fonction qui cree une dataset contenant les caracteristiques des images

def createFeatures(Folder):
    
    dataframe=pd.DataFrame() #dataframe global qui va stocker les caracteristique de chaque images
    
    
    id=1 
    
    #lister les sous dossiers contenues dans le dossier Folder
    for class_fold in glob.glob(Folder+"/*"):
        
        #Pour chaque sous-dossiers on liste liste les images "jpg" qu'il contient
        
        for imagePath in glob.glob(class_fold+ "/*.jpg"):
                        #recupere le nom de l'image courant
            image = cv2.imread(imagePath) #lecture de l'image courant
            
            
            
            
            moments=getMoments(image) #recupere le moments de l'image
            hist=hsvHistogram(image) #recupere l'histogramme
            texture=textureFeature(image) 
            shape=shapeFeatures(image)
            
            #concatenation du vecteurs de moments avec l'histogramme dans un seul vecteur
            features= np.concatenate((moments,hist,texture,shape.reshape(7)))
            
            #creation d'un dataframe contenant les caracteristique de chaque image avec le nom et l'id de sa classe
            df=pd.DataFrame([features])
            df["class_name"]=class_fold.split("\\")[1]
            df["class_id"]=id
            
            #ajout du dataframe de l'image dans le dataframe globale
            dataframe=dataframe.append(df)
            
        id=id+1    
    
    dataframe.index=range(dataframe.index.size) #reorganise les index du dataframe
    
    return dataframe

In [48]:
train_data="train"
dataset=createFeatures(train_data)

In [49]:
dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,class_name,class_id
0,1.601036,0.746114,1.523316,0.357513,1.305699,0.466321,0.104579,0.042679,0.000151,0.0,...,1.527159,2.733494,7.587931,10.172389,10.923425,-21.585243,-14.722353,-21.665897,malade,1
1,1.601036,0.746114,1.507772,0.373057,1.305699,0.466321,0.106646,0.042232,0.000640,0.0,...,1.601736,2.732728,7.572273,10.170634,10.935458,-21.577404,-14.722313,-21.725365,malade,1
2,1.601036,0.746114,1.507772,0.373057,1.305699,0.466321,0.109835,0.042756,0.005768,0.0,...,1.619283,2.733400,7.594161,10.161211,10.920143,-21.564377,-14.724070,-21.671331,malade,1
3,1.358491,0.889488,1.342318,0.646900,1.115903,0.646900,0.342132,0.037188,0.040072,0.0,...,0.964392,2.740408,7.838716,11.069508,10.922884,-21.949046,-14.864704,-22.363948,malade,1
4,1.592677,0.508009,1.729977,0.425629,1.276888,0.466819,0.122409,0.352102,0.000000,0.0,...,1.616563,2.836578,7.892372,10.532612,11.956265,-23.202314,-15.944895,-24.266333,malade,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
177,1.538462,0.512821,1.555556,0.358974,1.521368,0.512821,0.015645,0.001663,0.000000,0.0,...,1.605943,2.703472,8.108920,12.042392,10.603558,22.066029,16.157354,-22.088656,saine,2
178,1.649351,0.571429,1.467532,0.480519,1.350649,0.480519,0.370251,0.881671,0.000677,0.0,...,1.690547,2.818554,8.196428,10.468326,10.288778,22.346113,-14.511306,20.667426,saine,2
179,1.358071,0.567718,1.435993,0.545455,1.480519,0.612245,0.004074,0.000161,0.000000,0.0,...,1.954551,2.854688,8.196802,11.107718,10.543157,21.509715,14.664706,-21.528930,saine,2
180,1.654737,0.581053,1.490526,0.442105,1.364211,0.467368,0.115904,0.841882,0.000386,0.0,...,1.717579,2.828522,8.440576,10.975463,10.425963,21.246944,15.628079,21.312343,saine,2


In [54]:
X_train=dataset.drop(["class_name","class_id"],axis=1) #recuperation des variables d'entrées
Y_train=dataset["class_name"] #recupere les variables de sortie


In [56]:
std_scaler=StandardScaler().fit(X_train)
X_train_std=std_scaler.transform(X_train)
print(X_train_std)

[[ 1.05179995  0.85771624  0.21444327 ... -1.1182459  -0.64162221
  -1.07946646]
 [ 1.05179995  0.85771624  0.08523044 ... -1.11788656 -0.64161943
  -1.08217611]
 [ 1.05179995  0.85771624  0.08523044 ... -1.11728942 -0.64174376
  -1.07971405]
 ...
 [-0.42012207 -0.55117915 -0.51144996 ...  0.85718651  1.43780518
  -1.0732256 ]
 [ 1.37712585 -0.4458679  -0.05812779 ...  0.84514134  1.50597344
   0.87881795]
 [ 0.84487012 -0.10507822 -0.16469077 ...  0.84218659  1.49260227
   0.87331744]]


In [58]:
from sklearn.svm import SVC

In [59]:
svc=SVC(kernel="rbf",#gaussian kernel
        decision_function_shape='ovo' #Train with SVM one-vs-one encoding scheme
        ,gamma=0.01
       )


svc.fit(X_train_std,Y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovo', degree=3, gamma=0.01, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [61]:
test_data=createFeatures('validation')

In [62]:
test_data

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,class_name,class_id
0,1.317391,0.652174,1.46087,0.521739,1.473913,0.573913,0.003602,0.000755,0.0,0.0,...,1.595639,2.817217,7.822616,12.8035,9.992819,-21.521056,14.007989,21.564057,malade,1
1,1.314534,0.663774,1.444685,0.533623,1.470716,0.572668,0.002866,0.000898,0.0,0.0,...,1.639517,2.817724,7.83113,12.815717,9.993433,-21.462551,14.029126,21.692932,malade,1
2,1.403042,0.684411,1.48289,0.479087,1.346008,0.604563,0.004259,0.014164,0.001387,0.0,...,1.720944,2.845988,8.236089,10.738558,11.199528,-22.330614,-15.317908,-22.308139,malade,1
3,1.190283,0.838057,1.311741,0.728745,1.153846,0.777328,0.325956,0.018897,0.031321,0.0,...,0.938527,2.851803,8.20841,10.716002,11.180145,23.604149,-15.302893,22.128461,malade,1
4,1.516981,0.486792,1.516981,0.486792,1.630189,0.362264,0.045753,0.851107,0.0,0.0,...,1.507953,2.878312,8.702381,11.508459,11.268103,22.713693,15.629068,22.973674,malade,1
5,1.508897,0.498221,1.743772,0.24911,1.66548,0.33452,0.000624,0.911075,0.0,0.0,...,0.250457,3.137617,10.217093,13.934501,12.913056,26.685244,18.53601,-26.385558,malade,1
6,1.399504,0.521092,1.60794,0.416873,1.593052,0.461538,3.2e-05,0.0,0.0,0.0,...,1.547907,2.801711,7.821746,10.326081,10.852726,-22.235463,-14.986992,-21.447828,malade,1
7,1.39604,0.519802,1.60396,0.430693,1.574257,0.475248,3.2e-05,0.0,0.0,0.0,...,1.606591,2.801947,7.815231,10.324081,10.856554,-22.308695,-15.026692,-21.451015,malade,1
8,1.39604,0.519802,1.60396,0.430693,1.574257,0.475248,3.2e-05,0.0,0.0,0.0,...,1.621747,2.802194,7.833245,10.326708,10.865856,-22.212734,-14.974234,-21.469096,malade,1
9,1.162791,0.666667,1.410853,0.713178,1.348837,0.697674,0.261422,0.000169,0.003382,0.0,...,0.919686,2.817602,7.974595,11.270804,11.075797,22.458369,-15.329488,22.353415,malade,1


In [86]:
X_test=test_data.drop(["class_name","class_id"],axis=1) #recuperation des variables d'entrées
Y_test=test_data["class_name"] #recupere les variables de sortie


In [87]:
X_test

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,39,40,41,42,43,44,45,46,47,48
0,1.317391,0.652174,1.46087,0.521739,1.473913,0.573913,0.003602,0.000755,0.0,0.0,...,0.010367,0.568307,1.595639,2.817217,7.822616,12.8035,9.992819,-21.521056,14.007989,21.564057
1,1.314534,0.663774,1.444685,0.533623,1.470716,0.572668,0.002866,0.000898,0.0,0.0,...,0.010092,0.625979,1.639517,2.817724,7.83113,12.815717,9.993433,-21.462551,14.029126,21.692932
2,1.403042,0.684411,1.48289,0.479087,1.346008,0.604563,0.004259,0.014164,0.001387,0.0,...,0.057111,0.753752,1.720944,2.845988,8.236089,10.738558,11.199528,-22.330614,-15.317908,-22.308139
3,1.190283,0.838057,1.311741,0.728745,1.153846,0.777328,0.325956,0.018897,0.031321,0.0,...,0.034009,0.587,0.938527,2.851803,8.20841,10.716002,11.180145,23.604149,-15.302893,22.128461
4,1.516981,0.486792,1.516981,0.486792,1.630189,0.362264,0.045753,0.851107,0.0,0.0,...,0.012898,0.549488,1.507953,2.878312,8.702381,11.508459,11.268103,22.713693,15.629068,22.973674
5,1.508897,0.498221,1.743772,0.24911,1.66548,0.33452,0.000624,0.911075,0.0,0.0,...,0.016304,0.168239,0.250457,3.137617,10.217093,13.934501,12.913056,26.685244,18.53601,-26.385558
6,1.399504,0.521092,1.60794,0.416873,1.593052,0.461538,3.2e-05,0.0,0.0,0.0,...,0.007641,0.481787,1.547907,2.801711,7.821746,10.326081,10.852726,-22.235463,-14.986992,-21.447828
7,1.39604,0.519802,1.60396,0.430693,1.574257,0.475248,3.2e-05,0.0,0.0,0.0,...,0.026671,0.563309,1.606591,2.801947,7.815231,10.324081,10.856554,-22.308695,-15.026692,-21.451015
8,1.39604,0.519802,1.60396,0.430693,1.574257,0.475248,3.2e-05,0.0,0.0,0.0,...,0.012298,0.586412,1.621747,2.802194,7.833245,10.326708,10.865856,-22.212734,-14.974234,-21.469096
9,1.162791,0.666667,1.410853,0.713178,1.348837,0.697674,0.261422,0.000169,0.003382,0.0,...,0.010345,0.428623,0.919686,2.817602,7.974595,11.270804,11.075797,22.458369,-15.329488,22.353415


In [88]:
X_test_std=std_scaler.transform(X_test)
X_test_std

array([[-0.66656257,  0.11581762, -0.30465523, ..., -1.11530362,
         1.39133588,  0.89028717],
       [-0.68387482,  0.20743334, -0.43918866, ..., -1.1126218 ,
         1.39283153,  0.89615931],
       [-0.14767923,  0.37040958, -0.12160835, ..., -1.152413  ,
        -0.68376369, -1.10872993],
       ...,
       [-0.50536321,  0.27239559, -0.21213135, ...,  0.84223829,
         1.47274025, -1.07043494],
       [ 1.49988301, -0.09879034, -0.18719328, ...,  0.83834845,
        -0.64449946,  0.91181163],
       [ 2.07178002, -0.93714137,  1.70712364, ...,  0.92658443,
         1.52314626,  0.96747126]])

In [89]:
y_pred_svm=svc.predict(X_test_std)
y_pred_svm

array(['malade', 'malade', 'malade', 'malade', 'malade', 'malade',
       'malade', 'malade', 'malade', 'malade', 'malade', 'malade',
       'malade', 'malade', 'malade', 'malade', 'malade', 'malade',
       'malade', 'malade', 'malade', 'malade', 'malade', 'malade',
       'malade', 'malade', 'malade', 'malade', 'malade', 'malade',
       'malade', 'malade', 'saine', 'saine', 'saine', 'saine', 'saine',
       'malade', 'saine', 'saine', 'saine', 'saine', 'saine', 'saine',
       'saine', 'saine', 'saine', 'saine', 'saine', 'saine', 'saine',
       'saine', 'saine'], dtype=object)

In [90]:
acc_score_svm=accuracy_score(Y_test,y_pred_svm) #accuracy
acc_score_svm


0.9622641509433962

In [91]:
matrix_conf_svm=confusion_matrix(y_pred_svm,Y_test) #matrice de confusion
print(matrix_conf_svm)

[[32  1]
 [ 1 19]]


In [75]:
def createFeatures2(Folder):
    
    dataframe=pd.DataFrame() #dataframe global qui va stocker les caracteristique de chaque images
    
    
    
    #Pour chaque sous-dossiers on liste liste les images "jpg" qu'il contient
    for imagePath in glob.glob(Folder+ "/*.jpg"):
        #recupere le nom de l'image courant
        image = cv2.imread(imagePath) #lecture de l'image courant
            
            
        moments=getMoments(image) #recupere le moments de l'image
        hist=hsvHistogram(image) #recupere l'histogramme
        texture=textureFeature(image) 
        shape=shapeFeatures(image)
            
        #concatenation du vecteurs de moments avec l'histogramme dans un seul vecteur
        features= np.concatenate((moments,hist,texture,shape.reshape(7)))
            
        #creation d'un dataframe contenant les caracteristique de chaque image avec son nom
        df=pd.DataFrame([features])
        df["name"]=imagePath.split("\\")[1]
            
            
        #ajout du dataframe de l'image dans le dataframe globale
        dataframe=dataframe.append(df)
        
    dataframe.index=range(dataframe.index.size) #reorganise les index du dataframe
    
    return dataframe

In [92]:
essaie=createFeatures2('test')

In [93]:
essaie

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,name
0,1.365854,0.670732,1.52439,0.47561,1.353659,0.609756,0.009679,0.008442,0.002329,0.0,...,1.061867,1.902711,2.818942,9.752062,12.181044,11.331027,23.287005,16.846782,23.197342,0ca16873-eeac-47e9-9a87-1859950daab7___JR_FrgE...
1,1.147253,0.817582,1.358242,0.751648,1.147253,0.778022,0.358153,0.009158,0.023095,0.0,...,0.701942,0.937475,2.825432,8.815378,12.112082,11.279975,23.329575,15.740462,-23.023449,0ca16873-eeac-47e9-9a87-1859950daab7___JR_FrgE...
2,1.383016,0.655113,1.372617,0.571924,1.403813,0.613518,0.010257,0.002803,0.0,0.0,...,0.976048,1.898489,2.878527,7.932341,10.87832,10.43956,21.274712,-14.966913,-21.226041,13575fc3-1674-4511-a124-2ed065f00cda___RS_HL 7...
3,1.700461,0.304147,1.700461,0.304147,1.769585,0.221198,0.378921,0.835821,0.0,0.0,...,0.418466,1.342576,2.862883,9.600777,11.373121,11.028077,-22.232372,-15.837952,23.115054,1a69060b-e45e-4d95-881c-f6d1960dffcd___FREC_C....
4,1.453027,0.51357,1.440501,0.51357,1.54071,0.538622,0.534072,0.564137,0.000328,0.0,...,0.559802,1.605397,2.836195,8.214251,10.827146,10.54666,-21.236308,-14.755685,-22.184059,1b567c7d-1041-4240-a56c-8ea94e6252b4___FREC_C....
5,1.429787,0.548936,1.417021,0.548936,1.544681,0.510638,0.464269,0.625786,3.2e-05,0.0,...,0.512467,1.565127,2.84281,7.894742,10.362516,10.162746,-20.627941,-14.110675,-20.533939,1cfc6e73-1d86-4fb9-bffb-010163531711___FREC_C....
6,1.585714,0.428571,1.575,0.428571,1.575,0.407143,0.16764,0.954856,0.000348,0.0,...,0.738573,1.639262,2.940407,9.002431,11.87211,10.97111,22.404284,-15.47621,23.035307,1d4dee76-8af5-4bce-ba44-ef87ffd664de___FREC_C....
7,1.317073,0.787992,1.418386,0.506567,1.317073,0.652908,0.019091,0.005468,0.001375,0.0,...,0.943244,1.844645,2.819928,9.441079,11.882559,10.928225,22.395127,-15.707106,22.637552,2f6a52cd-21f5-41d0-870e-f8a67d1f5c56___JR_FrgE...
8,1.303371,0.786517,1.41573,0.52809,1.303371,0.662921,0.018876,0.006468,0.00227,0.0,...,1.037599,1.901998,2.820049,9.164244,11.87908,10.914939,22.39531,-15.582382,22.560191,2f6a52cd-21f5-41d0-870e-f8a67d1f5c56___JR_FrgE...
9,1.383562,0.657534,1.410959,0.534247,1.356164,0.657534,0.024955,0.023432,0.001246,0.0,...,0.700851,1.593982,2.726058,8.372684,11.803892,9.940911,20.845475,-14.436258,21.243902,34960f92-a973-4d36-b388-f63b339b86bc___RS_HL 6...


In [94]:
X=essaie.drop(["name"],axis=1) #recuperation des variables d'entrées
Y=essaie["name"] #recupere les variables de sortie


In [95]:
X_std=std_scaler.transform(X)
X_std

array([[-0.37297056,  0.26237916,  0.22337261, ...,  0.93865583,
         1.59220883,  0.96470708],
       [-1.69728673,  1.42214337, -1.15776859, ...,  0.94060718,
         1.51392565, -1.14132274],
       [-0.26900104,  0.13902652, -1.03827176, ...,  0.8464142 ,
        -0.65892731, -1.05942461],
       ...,
       [ 0.36269719,  0.98879917,  0.12629384, ...,  0.86457328,
         1.49775361,  0.89376657],
       [ 0.36269719,  0.98879917,  0.12629384, ...,  0.86457328,
         1.49775361, -1.07830557],
       [-0.60409461,  0.48819144, -1.24312984, ..., -1.15376214,
         1.46863665, -1.09799032]])

In [96]:
y_svm=svc.predict(X_std)
y_svm

array(['malade', 'malade', 'saine', 'malade', 'malade', 'malade', 'saine',
       'malade', 'malade', 'saine', 'saine', 'saine', 'malade', 'malade',
       'malade', 'saine', 'saine', 'malade', 'saine', 'saine', 'saine',
       'malade', 'malade', 'malade', 'saine', 'saine', 'saine', 'saine'],
      dtype=object)

In [85]:
from sklearn.model_selection import KFold