In [1]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from random import shuffle
from sklearn.model_selection import train_test_split
from sklearn import metrics,svm
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import tree
from skimage.measure import regionprops
from skimage.filters import threshold_otsu
from sklearn.preprocessing import MaxAbsScaler

## Preparando o dataset 

In [2]:
def load_data(datadir, classes, img_size=100):
    training_data = []
    label = []
    for classe in range(len(classes)):
        path = os.path.join(datadir, classes[classe])
        shufled_list  = list(os.listdir(path))
        shuffle(shufled_list)
        for img in shufled_list:
            img_array = cv2.imread(os.path.join(path, img), cv2.IMREAD_GRAYSCALE)
            img_array = cv2.resize(img_array, (img_size, img_size))
            unique = np.unique(img_array)
            if len(unique) == 1:
                continue
            training_data.append(img_array)
            label.append(classe)
    return training_data , label

In [3]:
data , label = load_data('dataset/geometric',['circle','square','star','triangle'])

## Extração de atributos

In [4]:
def get_contours_param(contour):
    contour_area = contour[0].filled_area
    contour_perimeter = contour[0].perimeter
    contour_convex_area = contour[0].convex_area
    diameter = contour[0].equivalent_diameter
    return contour_area , contour_perimeter, contour_convex_area, diameter

In [5]:
def features_extraction(images):
    features_list = []
    for image in images:
        thresh = threshold_otsu(image)
        binary = np.array(image > thresh).astype(int)
        white_pixel = np.where(binary > 0)
        if len(white_pixel[0]) > 7000:
            binary = abs(1-binary) # ajuste de imagens negativas
        regions = regionprops(binary)
        contour_area , contour_perimeter, contour_convex_area, diameter = get_contours_param(regions)
        features_list.append([contour_area , contour_perimeter, contour_convex_area, diameter])
    norm =  MaxAbsScaler()
    norm.fit(features_list)
    norm_features = norm.transform(features_list)
    return norm_features

In [6]:
features = features_extraction(data)

## Treinamento e Teste dos Classificadores

In [7]:
def generate_svm_model(train_data,label_train_data,test_data):
    clf = svm.SVC(kernel='linear')
    clf.fit(train_data, label_train_data)
    predicted = clf.predict(test_data)
    return predicted
def generate_SGDC_model(train_data,label_train_data,test_data):
    clf = SGDClassifier(loss="hinge", penalty="l2", max_iter=200)
    clf.fit(train_data, label_train_data)
    predicted = clf.predict(test_data)
    return predicted
def generate_naive_bayes_model(train_data,label_train_data,test_data):
    gnb = GaussianNB()
    gnb.fit(train_data, label_train_data)
    predicted = gnb.predict(test_data)
    return predicted
def generate_decision_tree_model(train_data,label_train_data,test_data):
    clf = tree.DecisionTreeClassifier()
    clf = clf.fit(train_data, label_train_data)
    predicted = clf.predict(test_data)
    return predicted
def generate_random_forest_model(X_train, y_train,test_data):
    rfc = RandomForestClassifier(criterion= 'entropy', max_depth= 8, max_features='auto', n_estimators=200)
    rfc.fit(X_train,y_train)
    predicted = rfc.predict(test_data)
    return predicted
def generate_MLP_model(X_train, y_train,test_data):
    classifier = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=300,activation = 'relu',solver='adam',random_state=1)
    classifier.fit(X_train, y_train)
    predicted = classifier.predict(test_data)
    return predicted
def generate_knn_model(train_data,label_train_data,test_data):
    knn = KNeighborsClassifier()
    knn.fit(train_data,label_train_data)
    predicted = knn.predict(test_data)
    return predicted

In [8]:
def gen_classifiers(train_data,label_train_data,test_data):
    return generate_knn_model(train_data,label_train_data,test_data),\
    generate_MLP_model(train_data,label_train_data,test_data),\
    generate_SGDC_model(train_data,label_train_data,test_data),\
    generate_svm_model(train_data,label_train_data,test_data),\
    generate_decision_tree_model(train_data,label_train_data,test_data),\
    generate_naive_bayes_model(train_data,label_train_data,test_data),\
    generate_random_forest_model(train_data,label_train_data,test_data),


In [9]:
X_train, X_test, y_train, y_test = train_test_split(features, label, test_size=0.3)

results = gen_classifiers(X_train, y_train, X_test)

  warn(


## Avaliação dos classificadores

In [10]:
Classificadores = ["kNN", "MLP", "SGDC", "SVM", "DT", "NB", "RF"]
len(Classificadores)

7

In [11]:
#Acurácia

Resultados_acc = []
for i in range(len(Classificadores)):
    acc = metrics.accuracy_score(y_test, results[i])
    Resultados_acc.append(acc)
resultados_acc = np.array(Resultados_acc)
resultados_acc

array([0.97445255, 0.98540146, 0.71715328, 0.65693431, 0.93978102,
       0.52737226, 0.91240876])

In [12]:
# Revogação

Resultados_rev = []
for i in range(len(Classificadores)):
    recall = metrics.recall_score(y_test,results[i],average=None)
    Resultados_rev.append(recall)
Resultados_rev

[array([0.92982456, 0.93333333, 1.        , 1.        ]),
 array([0.94736842, 0.97777778, 1.        , 1.        ]),
 array([0.69298246, 0.        , 1.        , 0.82857143]),
 array([0.48245614, 0.        , 0.76923077, 1.        ]),
 array([0.96491228, 0.76666667, 0.98224852, 0.97142857]),
 array([0.24561404, 0.25555556, 0.50887574, 0.86857143]),
 array([0.86842105, 0.63333333, 1.        , 1.        ])]

In [13]:
# criando um dataframe para visualização

import pandas as pd

    

df = pd.DataFrame([resultados_acc], columns = [Classificadores], index = ["accuracy"])

display(df)

Unnamed: 0,kNN,MLP,SGDC,SVM,DT,NB,RF
accuracy,0.974453,0.985401,0.717153,0.656934,0.939781,0.527372,0.912409


In [14]:
# Precisão

for i in range(len(Classificadores)):
    precision = metrics.precision_score(y_test,results[i],average=None)
    precisao = f" A precisão do {Classificadores[i]} é {precision}"
    print(precisao)

 A precisão do kNN é [0.96363636 0.91304348 1.         0.98870056]
 A precisão do MLP é [0.98181818 0.93617021 1.         1.        ]
 A precisão do SGDC é [0.60769231 0.         0.87564767 0.64444444]
 A precisão do SVM é [0.74324324 0.         1.         0.50872093]
 A precisão do DT é [0.90909091 0.88461538 1.         0.92896175]
 A precisão do NB é [0.65116279 0.32394366 1.         0.43678161]
 A precisão do RF é [0.91666667 0.96610169 1.         0.8254717 ]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
# F1-Score

for i in range(len(Classificadores)):
    f1_score = metrics.f1_score(y_test, results[i], average=None)
    score = f" Os valores encontrado do {Classificadores[i]} é {f1_score}"
    print(score)

 Os valores encontrado do kNN é [0.94642857 0.92307692 1.         0.99431818]
 Os valores encontrado do MLP é [0.96428571 0.95652174 1.         1.        ]
 Os valores encontrado do SGDC é [0.64754098 0.         0.93370166 0.725     ]
 Os valores encontrado do SVM é [0.58510638 0.         0.86956522 0.6743738 ]
 Os valores encontrado do DT é [0.93617021 0.82142857 0.99104478 0.94972067]
 Os valores encontrado do NB é [0.3566879  0.28571429 0.6745098  0.58126195]
 Os valores encontrado do RF é [0.89189189 0.76510067 1.         0.90439276]
