# Classificação dos personagens dos Simpsons

Alunos: Alexandre Borges Baccarini Júnior - 2515520
        Leonardo
        Mateus

## Objetivo

Desenvolver um sistema inteligente para classificação de personagens da série Os Simpsons utilizando os classificadores k-NN e SVM.


### Baixar as bibliotecas

In [None]:
#RODAR UMA VEZ PARA BAIXAR AS BIBLIOTECAS

%pip install numpy pandas Pillow scikit-learn keras

### Extração de Características

In [4]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from keras.applications.vgg16 import VGG16, preprocess_input

def load_img(img_path, target_size):
    img = Image.open(img_path)
    img = img.convert('RGB')
    img = img.resize(target_size)
    return img

def extract_features(directory):
    model = VGG16(weights='imagenet', include_top=False)
    features = []
    labels = []
    for filename in os.listdir(directory):
        if filename.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp')):
            img_path = os.path.join(directory, filename)
            label = directory.split(os.sep)[-1]
            try:
                image = load_img(img_path, target_size=(224, 224))
                image = np.array(image)
                image = np.expand_dims(image, axis=0)
                image = preprocess_input(image)
                feature = model.predict(image).flatten()
                features.append(feature)
                labels.append(label)
            except Exception as e:
                print(f"Error processing image: {img_path}")
                print(f"Exception: {e}")
    return features, labels

# TREINO
bart_dir = "./imgs/Train/bart"
homer_dir = "./imgs/Train/homer"
lisa_dir = "./imgs/Train/lisa"
maggie_dir = "./imgs/Train/maggie"
marge_dir = "./imgs/Train/marge"

bart_features, bart_labels = extract_features(bart_dir)
homer_features, homer_labels = extract_features(homer_dir)
lisa_features, lisa_labels = extract_features(lisa_dir)
maggie_features, maggie_labels = extract_features(maggie_dir)
marge_features, marge_labels = extract_features(marge_dir)

all_features = bart_features + homer_features
all_labels = bart_labels + homer_labels

data = {'class': all_labels}
for i in range(len(all_features[0])):
    data[f'feature_{i+1}'] = [feat[i] for feat in all_features]

df = pd.DataFrame(data)

csv_train_path = "./data/Train/feats.csv"
df.to_csv(csv_train_path, index=False)

# TESTE
bart_dir = "./imgs/Valid/bart"
homer_dir = "./imgs/Valid/homer"
lisa_dir = "./imgs/Valid/lisa"
maggie_dir = "./imgs/Valid/maggie"
marge_dir = "./imgs/Valid/marge"

bart_features, bart_labels = extract_features(bart_dir)
homer_features, homer_labels = extract_features(homer_dir)
lisa_features, lisa_labels = extract_features(lisa_dir)
maggie_features, maggie_labels = extract_features(maggie_dir)
marge_features, marge_labels = extract_features(marge_dir)

all_features = bart_features + homer_features
all_labels = bart_labels + homer_labels

data = {'class': all_labels}
for i in range(len(all_features[0])):
    data[f'feature_{i+1}'] = [feat[i] for feat in all_features]

df = pd.DataFrame(data)

csv_test_path = "./data/Valid/feats.csv"
df.to_csv(csv_test_path, index=False)


  image = np.array(image)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 540ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 309ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 322ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 303ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 291ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 330ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 279ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 279ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 297ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 295ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

### Treinamento e Avaliação dos Modelos
#### k-NN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import GridSearchCV
import pandas as pd

# Carrega os dados
csv_train_path = "./data/Train/feats.csv"
df_train = pd.read_csv(csv_train_path)

# Separa os dados de treino e teste
X_train = df_train.drop('class', axis=1)
y_train = df_train['class']

# Codifica as classes para números
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)

# Normaliza os dados
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Grid de parâmetros
valores_grid = [
    {'n_neighbors': [1, 3, 5, 7, 9, 11, 15, 20], 'metric': ['euclidean']},
    {'n_neighbors': [1, 3, 5, 7, 9, 11, 15, 20], 'metric': ['manhattan']},
    {'n_neighbors': [1, 3, 5, 7, 9, 11, 15, 20], 'metric': ['minkowski'], 'p': [1,2,3,4,5]}
]

# Criando o modelo
modelo = KNeighborsClassifier()

# Criando os grids
gridKNN = GridSearchCV(estimator=modelo, param_grid=valores_grid, cv=5, verbose=1, n_jobs=-1)

# Treinando os grids com dados codificados
gridKNN.fit(X_train_scaled, y_train_encoded)

# Imprimindo os melhores parâmetros
print(f"Melhor acurácia: {gridKNN.best_score_}")
print(f"Melhor K: {gridKNN.best_estimator_.n_neighbors}")
print(f"Melhor distância: {gridKNN.best_estimator_.metric}")
print(f"Melhor p: {getattr(gridKNN.best_estimator_, 'p', 'Não aplicável')}")

Fitting 5 folds for each of 56 candidates, totalling 280 fits
Melhor acurácia: 0.797883597883598
Melhor K: 15
Melhor distância: minkowski
Melhor p: 4


#testeando o modelo

#### VSM

In [None]:
#VSM - code

### Comparação dos Modelos

In [None]:
#comparação -code

### Análise de Erros

In [None]:
#Análise de Erros - code