# Avaliando os classificadores

## Constantes

In [1]:
rg_folder = '../RG-Dataset'
BID_folder = '../BID Dataset'

img_size = (150, 112)

## Importações

In [12]:
from image_preprocessing.filters import to_gray, decrease_noise
from image_preprocessing.rotations import rotate_90_if_vertical_rectangle

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import OneClassSVM
from sklearn.linear_model import LogisticRegression, SGDOneClassSVM
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from sklearn.covariance import EllipticEnvelope
from joblib import dump, load

import os

## Funções

In [3]:
def load_image(path):
    img = cv2.imread(path)
    img_gray = to_gray(img)
    filtered_image = decrease_noise(img_gray)
    filtered_image = rotate_90_if_vertical_rectangle(filtered_image)
    resized_image = cv2.resize(filtered_image, img_size)
    return resized_image

def load_dataset():
    X = []
    y = []
    rg_files_path = f'{rg_folder}/files'
    rg_files = os.listdir(rg_files_path)

    for file_path in rg_files:
        if 'in.jpg' in file_path:
            X.append(load_image(f'{rg_files_path}/{file_path}').flatten())
            y.append(1)
    
    for folder in ['CNH_Aberta', 'CNH_Frente', 'CNH_Verso', 'CPF_Frente', 'CPF_Verso']:
        folder_path = f'{BID_folder}/{folder}'
        document_files = os.listdir(folder_path)
        for document_path in document_files:
            if 'in.jpg' in document_path:
                X.append(load_image(f'{folder_path}/{document_path}').flatten())
                y.append(0)
    
    return np.array(X), np.array(y)

def evaluate_model(y_test, y_pred):
    return {
        'acurácia': accuracy_score(y_test, y_pred),
        'precisão': precision_score(y_test, y_pred),
        'revocação': recall_score(y_test, y_pred),
        'f-medida': f1_score(y_test, y_pred)
    }


In [4]:
X, y = load_dataset()

X = np.array(X)
y = np.array(y)

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1, stratify=y)

In [6]:
X_train_only_rgs = X_train[y_train==1]

## Testando modelos

In [7]:
results = {}

### [SVM One Class](https://scikit-learn.org/stable/modules/generated/sklearn.svm.OneClassSVM.html#sklearn.svm.OneClassSVM)

In [8]:
model = OneClassSVM()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [9]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 0.8656438865643886,
 'precisão': 0.6148148148148148,
 'revocação': 0.47293447293447294,
 'f-medida': 0.5346215780998389}

In [10]:
results['SVM One Class'] = result

### [SGD One Class SVM](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDOneClassSVM.html#sklearn.linear_model.SGDOneClassSVM)

In [13]:
model = SGDOneClassSVM()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [14]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 0.16317991631799164,
 'precisão': 0.16317991631799164,
 'revocação': 1.0,
 'f-medida': 0.28057553956834536}

In [15]:
results['SGD SVM One Class'] = result

### [Isolation forest](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html#sklearn.ensemble.IsolationForest)

In [16]:
model = IsolationForest()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [17]:
result = evaluate_model(y_test, y_pred)
result

{'acurácia': 0.7415155741515574,
 'precisão': 0.37454100367197063,
 'revocação': 0.8717948717948718,
 'f-medida': 0.523972602739726}

In [18]:
results['Isolation Forest'] = result

### [Elliptic Envelope](https://scikit-learn.org/stable/modules/generated/sklearn.covariance.EllipticEnvelope.html#sklearn.covariance.EllipticEnvelope)

In [19]:
model = EllipticEnvelope()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [None]:
result = evaluate_model(y_test, y_pred)
result

In [None]:
results['Elliptic Envelope'] = result

### [Local Outlier Factor](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.LocalOutlierFactor.html#sklearn.neighbors.LocalOutlierFactor)

In [None]:
model = LocalOutlierFactor()
model.fit(X_train_only_rgs)
y_pred = model.predict(X_test)
y_pred[y_pred==-1] = 0

In [None]:
result = evaluate_model(y_test, y_pred)
result

In [None]:
results['Local Outlier Factor'] = result

### [Logistic regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
result = evaluate_model(y_test, y_pred)
result

In [None]:
results['Logistic regression'] = result

In [None]:
y_pred = model.predict(X_train)
result = evaluate_model(y_train, y_pred)
result
print(y_pred)

In [None]:
# dump(model, 'image_classification/lr_model.joblib')

## Agregando resultados

In [None]:
results_df = pd.DataFrame(results)
results_df

In [None]:
with open('image_classification', 'w') as results_file:
    results_file.write(results_df.to_latex())