In [12]:
import numpy as np
import pandas as pd
from classifiers import train_svm, train_rf
from extractors import extract_hog_features, extract_lda_features, extract_pca_features
from sklearn.model_selection import train_test_split


In [19]:
# Carregar a base de dados



data = pd.read_csv('ocr_car_numbers_rotulado.txt', header=None, delim_whitespace=True)

# Verificar as primeiras linhas do DataFrame para garantir que os dados foram lidos corretamente
# Separar os atributos e os rótulos
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Verificar as dimensões
print(X.shape, y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

(3352, 1225) (3352,)


In [3]:
from keras.models import Sequential
from keras.layers import Dense, Flatten

def create_deep_learning_model(input_shape):
    model = Sequential()
    model.add(Flatten(input_shape=input_shape))
    model.add(Dense(128, activation='relu'))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [20]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, confusion_matrix
import tensorflow as tf

def evaluate_model(X_train, y_train, X_test, y_test, extractor, classifier, n_splits=10):
    skf = StratifiedKFold(n_splits=n_splits)
    accuracies = []
    conf_matrices = []

    for train_index, val_index in skf.split(X_train, y_train):
        X_train_fold, X_val_fold = X_train[train_index], X_train[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        # Extrair atributos
        if extractor.__name__ == 'extract_lda_features':
            X_train_extracted = extractor(X_train_fold, y_train_fold)
            X_val_extracted = extractor(X_val_fold, y_val_fold)
            X_test_extracted = extractor(X_test, y_test)
        else:
            X_train_extracted = extractor(X_train_fold)
            X_val_extracted = extractor(X_val_fold)
            X_test_extracted = extractor(X_test)

        # Treinar o classificador
        if classifier == 'svm':
            model = train_svm(X_train_extracted, y_train_fold)
        elif classifier == 'rf':
            model = train_rf(X_train_extracted, y_train_fold)
        elif classifier == 'dl':
            model = create_deep_learning_model(X_train_extracted.shape[1:])
            model.fit(X_train_extracted, y_train_fold, epochs=10, batch_size=32, verbose=0)
            y_pred = np.argmax(model.predict(X_test_extracted), axis=1)
        else:
            raise ValueError("Classificador desconhecido")

        # Avaliar o modelo
        if classifier != 'dl':
            y_pred = model.predict(X_test_extracted)
        
        accuracies.append(accuracy_score(y_test, y_pred))
        conf_matrices.append(confusion_matrix(y_test, y_pred))

    mean_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)
    mean_conf_matrix = np.mean(conf_matrices, axis=0)

    return mean_accuracy, std_accuracy, mean_conf_matrix

# Avaliar combinações
extractors = [extract_pca_features, extract_lda_features, extract_hog_features]
classifiers = ['svm', 'rf', 'dl']

results = []
for extractor in extractors:
    for classifier in classifiers:
        mean_accuracy, std_accuracy, mean_conf_matrix = evaluate_model(X_train, y_train, X_test, y_test, extractor, classifier)
        results.append((extractor.__name__, classifier, mean_accuracy, std_accuracy, mean_conf_matrix))

# Exibir resultados
for result in results:
    print(f"Extractor: {result[0]}, Classifier: {result[1]}, Mean Accuracy: {result[2]}, Std Accuracy: {result[3]}")
    print(f"Mean Confusion Matrix:\n{result[4]}")

Extractor: extract_pca_features, Classifier: svm, Mean Accuracy: 0.811177347242921, Std Accuracy: 0.09487817393126204
Mean Confusion Matrix:
[[59.5  0.   0.   0.   0.1  0.   0.7  0.   0.4  0.3]
 [ 0.  79.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.  45.  18.   0.   0.   0.   0.   0.   0. ]
 [ 0.   1.   5.7 63.1  0.   0.   0.   0.   0.   0.2]
 [ 0.   0.1  0.   0.  82.9  0.   0.   0.   0.   0. ]
 [ 0.   0.   0.   0.   0.  48.2  0.7  0.   0.  16.1]
 [ 1.1  0.   0.   0.   0.   5.9 48.7  0.  11.1  6.2]
 [ 0.   0.   0.   0.   0.   0.   0.  55.   0.   0. ]
 [ 1.2  0.   0.   0.   0.   0.5 13.6  0.  40.3  7.4]
 [ 0.   0.   0.   0.1  0.   5.6 12.2  0.  18.5 22.6]]
Extractor: extract_pca_features, Classifier: rf, Mean Accuracy: 0.9132637853949328, Std Accuracy: 0.10407635567790818
Mean Confusion Matrix:
[[59.7  0.   0.   0.   0.   0.   0.9  0.   0.4  0. ]
 [ 0.  79.   0.   0.   0.   0.   0.   0.   0.   0. ]
 [ 0.   0.  57.4  5.6  0.   0.   0.   0.   0.   0. ]
 [ 0.   1.   6.7 62.3  0.   