# Lista 4
Arthur Pontes de Miranda Ramos Soares

In [None]:
import os
from pathlib import Path
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import pandas as pd

## Funções Auxiliares

In [None]:
def show_images(
    *images: np.ndarray, titles: list[str] | None = None, columns: int = 2, scale: int = 5
) -> None:
    num_images = len(images)

    if titles is None:
        titles = [f'Image {i + 1}' for i in range(num_images)]

    rows = (num_images + columns - 1) // columns

    fig, axes = plt.subplots(rows, columns, figsize=(scale * columns, scale * rows))
    axes = np.array(axes).reshape(rows, columns)

    for ax, img, title in zip(axes.flat, images, titles):
        ax.imshow(img, cmap='gray')
        ax.set_title(title)

    for i in range(num_images, rows * columns):
        fig.delaxes(axes.flat[i])

    plt.tight_layout()


def show_image(image: np.ndarray, title: str = None, dpi: int = 100) -> None:
    height, width, _ = image.shape

    figsize = (width / dpi, height / dpi)

    plt.figure(figsize=figsize, dpi=dpi)
    plt.imshow(image, cmap='gray' if len(image.shape) == 2 else None)
    plt.title(title if title else '')

    plt.tight_layout()

# Datasets
https://www.kaggle.com/datasets/utkarshsaxenadn/car-vs-bike-classification-dataset

https://www.kaggle.com/datasets/kipshidze/apple-vs-orange-binary-classification

In [None]:
def extract_features(hog: cv.HOGDescriptor, root: Path) -> dict:
    data = []
    for label, folder in enumerate(root.iterdir()):
        if not os.path.isdir(folder):
            continue
        for fname in os.listdir(folder):
            img_path = folder / fname
            img = cv.imread(img_path, cv.COLOR_BGR2GRAY)
            img = cv.resize(img, (128, 128))
            data.append({'features': hog.compute(img), 'label': label, 'path': img_path})
    return pd.DataFrame(data)

In [None]:
hog = cv.HOGDescriptor(
    _winSize=(64, 64),
    _blockSize=(32, 32),
    _blockStride=(16, 16),
    _cellSize=(16, 16),
    _nbins=9,
)

df = extract_features(hog, Path('./dataset1'))

In [None]:
df_train, df_test = train_test_split(df, test_size=0.3, stratify=df['label'], random_state=42)

X_train = np.stack(df_train['features'].to_numpy())
y_train = df_train['label'].to_numpy()

X_test = np.stack(df_test['features'].to_numpy())
y_test = df_test['label'].to_numpy()

# Treinamento
clf = SVC(kernel='rbf', gamma='scale', C=1)
# clf = SVC(kernel='linear')
clf.fit(X_train, y_train)

# Avaliação
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
# Adiciona as previsões no dataframe de teste
df_test = df_test.copy()
df_test['pred'] = y_pred

# Separar acertos e erros
df_correct = df_test[df_test['label'] == df_test['pred']]
df_wrong = df_test[df_test['label'] != df_test['pred']]

labels_names = ['Moto', 'Carro']


def show_examples(df_subset, title, max_images=5):
    plt.figure(figsize=(15, 3))
    for i, (_, row) in enumerate(df_subset.sample(min(max_images, len(df_subset))).iterrows()):
        img = cv.imread(row['path'])
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        plt.subplot(1, max_images, i + 1)
        plt.imshow(img)
        plt.title(f'Real: {labels_names[row["label"]]}\nPred: {labels_names[row["pred"]]}')
        plt.axis('off')
    plt.suptitle(title)
    plt.show()


show_examples(df_correct, 'Exemplos classificados corretamente')
show_examples(df_wrong, 'Exemplos classificados incorretamente')

In [None]:
# from sklearn.model_selection import GridSearchCV

# param_grid = {'C': [0.1, 1, 10], 'gamma': ['scale', 0.01, 0.001], 'kernel': ['rbf']}

# grid = GridSearchCV(SVC(), param_grid, cv=5)
# grid.fit(X_train, y_train)
# print('Melhor SVM:', grid.best_params_)

In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print(f'Acurácia no conjunto de teste: {accuracy:.2f}')

In [None]:
import seaborn as sns
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(
    cm,
    annot=True,
    fmt='d',
    cmap='Blues',
    xticklabels=['Moto', 'Carro'],
    yticklabels=['Moto', 'Carro'],
)
plt.xlabel('Predito')
plt.ylabel('Real')
plt.title('Matriz de Confusão')
plt.show()