# Avaliando os classificadores

## Constantes

In [5]:
rg_folder = '../RG-Dataset'
BID_folder = '../BID Dataset'

img_size = (150, 112)

## Importações

In [6]:
from image_preprocessing.filters import to_gray, decrease_noise
from image_preprocessing.rotations import rotate_90_if_vertical_rectangle

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import OneClassSVM
from sklearn.linear_model import LogisticRegression, SGDOneClassSVM
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from joblib import dump, load

import os

## Funções

In [7]:
def load_image(path):
    img = cv2.imread(path)
    img_gray = to_gray(img)
    filtered_image = decrease_noise(img_gray)
    filtered_image = rotate_90_if_vertical_rectangle(filtered_image)
    resized_image = cv2.resize(filtered_image, img_size)
    return resized_image

def load_dataset():
    X = []
    y = []
    rg_files_path = f'{rg_folder}/files'
    rg_files = os.listdir(rg_files_path)

    for file_path in rg_files:
        if 'in.jpg' in file_path:
            X.append(load_image(f'{rg_files_path}/{file_path}').flatten())
            y.append(1)
    
    for folder in ['CNH_Aberta', 'CNH_Frente', 'CNH_Verso', 'CPF_Frente', 'CPF_Verso']:
        folder_path = f'{BID_folder}/{folder}'
        document_files = os.listdir(folder_path)
        for document_path in document_files:
            if 'in.jpg' in document_path:
                X.append(load_image(f'{folder_path}/{document_path}').flatten())
                y.append(0)
    
    return np.array(X), np.array(y)

def evaluate_model(y_test, y_pred):
    return {
        'acurácia': accuracy_score(y_test, y_pred),
        'precisão': precision_score(y_test, y_pred),
        'revocação': recall_score(y_test, y_pred),
        'f-medida': f1_score(y_test, y_pred)
    }


In [8]:
X, y = load_dataset()

X = np.array(X)
y = np.array(y)

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, stratify=y)

In [10]:
X_train_only_rgs = X_train[y_train==1]

## Testando modelos

In [11]:
results = {}

### [SVM One Class](https://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html#sklearn.svm.OneClassSVM)

In [12]:
model = OneClassSVM()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [13]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 0.8656438865643886,
 'precisão': 0.6148148148148148,
 'revocação': 0.47293447293447294,
 'f-medida': 0.5346215780998389}

In [14]:
results['SVM One Class'] = result

In [15]:
dump(model, 'image_classification/svm_model.joblib')

['image_classification/svm_model.joblib']

### [SGD One Class SVM](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDOneClassSVM.html#sklearn.linear_model.SGDOneClassSVM)

In [16]:
model = SGDOneClassSVM()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [17]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 0.16317991631799164,
 'precisão': 0.16317991631799164,
 'revocação': 1.0,
 'f-medida': 0.28057553956834536}

In [18]:
results['SGD SVM One Class'] = result

### [Isolation forest](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html#sklearn.ensemble.IsolationForest)

In [19]:
model = IsolationForest()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [20]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 0.7666201766620176,
 'precisão': 0.4007884362680683,
 'revocação': 0.8689458689458689,
 'f-medida': 0.5485611510791367}

In [21]:
results['Isolation Forest'] = result

### [Local Outlier Factor](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.LocalOutlierFactor.html#sklearn.neighbors.LocalOutlierFactor)

In [22]:
model = LocalOutlierFactor(novelty=True)
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [23]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 0.8075313807531381,
 'precisão': 0.4588235294117647,
 'revocação': 1.0,
 'f-medida': 0.6290322580645161}

In [None]:
dump(model, 'image_classification/lof_model.joblib')

In [24]:
results['Local Outlier Factor'] = result

### [Logistic regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)

In [25]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [26]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 1.0, 'precisão': 1.0, 'revocação': 1.0, 'f-medida': 1.0}

In [27]:
results['Logistic regression'] = result

In [28]:
y_pred = model.predict(X_train)
result = evaluate_model(y_train, y_pred)
result
print(y_pred)

[0 0 1 ... 1 0 1]


In [29]:
# dump(model, 'image_classification/lr_model.joblib')

## Agregando resultados

In [30]:
results_df = pd.DataFrame(results)
results_df

Unnamed: 0,SVM One Class,SGD SVM One Class,Isolation Forest,Local Outlier Factor,Logistic regression
acurácia,0.865644,0.16318,0.76662,0.807531,1.0
precisão,0.614815,0.16318,0.400788,0.458824,1.0
revocação,0.472934,1.0,0.868946,1.0,1.0
f-medida,0.534622,0.280576,0.548561,0.629032,1.0


In [31]:
with open('image_classification/results.tex', 'w') as results_file:
    results_file.write(results_df.to_latex())

  results_file.write(results_df.to_latex())
