# Laboratorio 1 de Aprendizaje Automatico SVM 
Nombre: David Valero Croma
Profesora: Violeta Chang Camacho
Ayudante: 

## Preeliminares
Aquí se realizan los import, cargan los archivos de las imagenes y preprocesamiento de los datos (vectores, vectores-pca, vectores-umap).

In [1]:
#Imports
import numpy as np
import pandas as pd
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
import umap
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
#Cargamos archivos

# Rutas
ROOT_DIR = "QuickDraw-10"
IMAGES_DIR = os.path.join(ROOT_DIR, "images")
TRAIN_PATH = os.path.join(ROOT_DIR, "train_40.txt")
TEST_PATH = os.path.join(ROOT_DIR, "test.txt")
MAPPING_PATH = os.path.join(ROOT_DIR, "mapping.txt")

# Cargar mapping
mapping_df = pd.read_csv(MAPPING_PATH, sep="\t", header=None, names=["class", "label"])
label2name = dict(zip(mapping_df.label, mapping_df["class"]))
name2label = dict(zip(mapping_df["class"], mapping_df.label))

# Función para cargar imágenes
def load_dataset(txt_file):
    X, y = [], []
    with open(txt_file, 'r') as f:
        for line in tqdm(f, desc=f"Leyendo {txt_file}"):
            rel_path, label = line.strip().split('\t')
            full_path = os.path.join(ROOT_DIR, rel_path)
            img = cv2.imread(full_path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                X.append(img.flatten())  # vector de 256x256 → 65536
                y.append(int(label))
    return np.array(X), np.array(y)

# Carga
X_train, y_train = load_dataset(TRAIN_PATH)
X_test, y_test = load_dataset(TEST_PATH)

print(f"Train shape: {X_train.shape}, Test shape: {X_test.shape}")


Leyendo QuickDraw-10\train_40.txt: 3930it [00:23, 166.21it/s]
Leyendo QuickDraw-10\test.txt: 1166it [00:07, 163.13it/s]

Train shape: (3930, 65536), Test shape: (1166, 65536)





In [3]:
def aplicar_pca(X_train, X_test, n_componentes=256):
    pca = PCA(n_components=n_componentes, random_state=42)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)
    return X_train_pca, X_test_pca, pca


def aplicar_umap(X_train, X_test, n_componentes=256):
    umap_model = umap.UMAP(n_components=n_componentes)
    X_train_umap = umap_model.fit_transform(X_train)
    X_test_umap = umap_model.transform(X_test)
    return X_train_umap, X_test_umap, umap_model


In [4]:
def entrenar_svm(X_train, y_train, kernel='rbf', C=1.0, gamma='scale'):
    """
    Entrena una SVM con el kernel especificado.

    Parámetros:
    - X_train: características de entrenamiento
    - y_train: etiquetas de entrenamiento
    - kernel: tipo de kernel (e.g., 'rbf', 'linear', 'poly', 'sigmoid')
    - C: parámetro de penalización
    - gamma: coeficiente del kernel (solo para 'rbf', 'poly', 'sigmoid')

    Retorna:
    - modelo entrenado (SVC)
    """
    clf = SVC(kernel=kernel, C=C, gamma=gamma, random_state=42)
    clf.fit(X_train, y_train)
    return clf

In [5]:
def evaluar_modelo(modelo, X_test, y_test, label2name):
    """
    Evalúa el modelo y entrega métricas de desempeño.

    Parámetros:
    - modelo: modelo SVM ya entrenado
    - X_test: datos de prueba
    - y_test: etiquetas de prueba
    - label2name: diccionario de id → nombre clase

    Retorna:
    - accuracy_total: precisión global
    - accuracy_por_clase: diccionario clase → accuracy
    - y_pred: predicciones
    """
    y_pred = modelo.predict(X_test)
    accuracy_total = accuracy_score(y_test, y_pred)

    reporte = classification_report(y_test, y_pred, output_dict=True, zero_division=0)
    accuracy_por_clase = {label2name[int(k)]: v['precision'] for k, v in reporte.items() if k.isdigit()}

    return accuracy_total, accuracy_por_clase, y_pred

In [6]:
def graficar_accuracy_por_clase(accuracy_dict, titulo):
    """
    Grafica accuracy por clase.

    Parámetros:
    - accuracy_dict: diccionario clase → valor de accuracy
    - titulo: título del gráfico
    """
    clases = list(accuracy_dict.keys())
    valores = list(accuracy_dict.values())

    plt.figure(figsize=(10, 5))
    plt.bar(clases, valores)
    plt.ylabel("Accuracy")
    plt.ylim(0, 1)
    plt.title(titulo)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

In [None]:
# Entrenar con imágenes originales
modelo_rbf = entrenar_svm(X_train, y_train, kernel='rbf')

acc_total, acc_clase, pred = evaluar_modelo(modelo_rbf, X_test, y_test, label2name)

print(f"Accuracy Total (RBF): {acc_total:.4f}")
graficar_accuracy_por_clase(acc_clase, "Accuracy por clase - Kernel RBF")

In [None]:
# === PCA ===
X_train_pca, X_test_pca, modelo_pca = aplicar_pca(X_train, X_test, n_componentes=256)

modelo_pca_rbf = entrenar_svm(X_train_pca, y_train, kernel='rbf')
acc_total_pca_rbf, acc_clase_pca_rbf, _ = evaluar_modelo(modelo_pca_rbf, X_test_pca, y_test, label2name)

modelo_pca_linear = entrenar_svm(X_train_pca, y_train, kernel='linear')
acc_total_pca_linear, acc_clase_pca_linear, _ = evaluar_modelo(modelo_pca_linear, X_test_pca, y_test, label2name)


In [None]:
# === UMAP ===
X_train_umap, X_test_umap, modelo_umap = aplicar_umap(X_train, X_test, n_componentes=256)

modelo_umap_rbf = entrenar_svm(X_train_umap, y_train, kernel='rbf')
acc_total_umap_rbf, acc_clase_umap_rbf, _ = evaluar_modelo(modelo_umap_rbf, X_test_umap, y_test, label2name)

modelo_umap_linear = entrenar_svm(X_train_umap, y_train, kernel='linear')
acc_total_umap_linear, acc_clase_umap_linear, _ = evaluar_modelo(modelo_umap_linear, X_test_umap, y_test, label2name)


In [None]:
# Accuracy total por enfoque y kernel
resultados_totales = {
    "Original - RBF": acc_total,
    "PCA - RBF": acc_total_pca_rbf,
    "PCA - Linear": acc_total_pca_linear,
    "UMAP - RBF": acc_total_umap_rbf,
    "UMAP - Linear": acc_total_umap_linear
}

# Gráfico resumen
plt.figure(figsize=(8, 5))
plt.bar(resultados_totales.keys(), resultados_totales.values())
plt.ylim(0, 1)
plt.ylabel("Accuracy Total")
plt.title("Comparación de accuracy total por técnica y kernel")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
