# Avaliando os classificadores

## Constantes

In [4]:
rg_folder = '../RG-Dataset'
BID_folder = '../BID Dataset'

img_size = (150, 112)

## Importações

In [31]:
from image_preprocessing.filters import to_gray, decrease_noise
from image_preprocessing.rotations import rotate_90_if_vertical_rectangle

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import OneClassSVM
from sklearn.linear_model import LogisticRegression
from joblib import dump, load

import os

## Funções

In [18]:
def load_image(path):
    img = cv2.imread(path)
    img_gray = to_gray(img)
    filtered_image = decrease_noise(img_gray)
    filtered_image = rotate_90_if_vertical_rectangle(filtered_image)
    resized_image = cv2.resize(filtered_image, img_size)
    return resized_image

def load_dataset():
    X = []
    y = []
    rg_files_path = f'{rg_folder}/files'
    rg_files = os.listdir(rg_files_path)

    for file_path in rg_files:
        if 'in.jpg' in file_path:
            X.append(load_image(f'{rg_files_path}/{file_path}').flatten())
            y.append(1)
    
    for folder in ['CNH_Aberta', 'CNH_Frente', 'CNH_Verso', 'CPF_Frente', 'CPF_Verso']:
        folder_path = f'{BID_folder}/{folder}'
        document_files = os.listdir(folder_path)
        for document_path in document_files:
            if 'in.jpg' in document_path:
                X.append(load_image(f'{folder_path}/{document_path}').flatten())
                y.append(0)
    
    return np.array(X), np.array(y)

def evaluate_model(y_test, y_pred):
    return {
        'acurácia': accuracy_score(y_test, y_pred),
        'precisão': precision_score(y_test, y_pred),
        'revocação': recall_score(y_test, y_pred),
        'f-medida': f1_score(y_test, y_pred)
    }


In [7]:
X, y = load_dataset()

X = np.array(X)
y = np.array(y)

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, stratify=y)

In [13]:
X_train_only_rgs = X_train[y_train==1]

## Testando modelos

In [21]:
results = {}

### SVM One Class

In [16]:
model = OneClassSVM()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [19]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 0.8656438865643886,
 'precisão': 0.6148148148148148,
 'revocação': 0.47293447293447294,
 'f-medida': 0.5346215780998389}

In [22]:
results['SMV One Class'] = result

## Logistic regression

In [26]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [27]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 1.0, 'precisão': 1.0, 'revocação': 1.0, 'f-medida': 1.0}

In [29]:
results['Logistic regression'] = result

In [34]:
y_pred = model.predict(X_train)
result = evaluate_model(y_train, y_pred)
result
print(y_pred)

[0 0 1 ... 1 0 1]


In [33]:
dump(model, 'image_classification/lr_model.joblib')

['image_classification/lr_model.joblib']