In [9]:
import numpy as np
import torch
import torchvision.transforms as T
from PIL import Image
import os
import cv2
import json
import glob
from tqdm.notebook import tqdm
import torchvision.datasets as datasets
from datasets import load_dataset


In [10]:
ds = load_dataset("C:/Thesis/Dataset4classes", num_proc=3)
cwd = os.getcwd()

ROOT_DIR = os.path.join(cwd, "C:/Thesis/Dataset4classes/train")

labels = {}

for folder in os.listdir(ROOT_DIR):
    for file in os.listdir(os.path.join(ROOT_DIR, folder)):
        if file.endswith(".jpg") or file.endswith(".png"):
            full_name = os.path.join(ROOT_DIR, folder, file)
            labels[full_name] = folder

files = labels.keys()

Resolving data files:   0%|          | 0/59926 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/1174 [00:00<?, ?it/s]

In [11]:
dinov2_vits14 = torch.hub.load("facebookresearch/dinov2", "dinov2_vitl14")
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
dinov2_vits14.to(device)
transform_image = T.Compose([
    T.Resize(244),                # Escala el lado más corto a 244 píxeles
    T.CenterCrop(224),            # Recorta una región central de 224x224
    T.ToTensor(),                 # Convierte la imagen a tensor
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normaliza con mean y std de ImageNet
])

Using cache found in C:\Users\abadd/.cache\torch\hub\facebookresearch_dinov2_main


In [12]:
# Crea un tensor de entrada y muévelo al dispositivo
dummy_input = torch.rand(1, 3, 224, 224).to(device)

# Genera los embeddings
embeddings = dinov2_vits14(dummy_input)

# Verifica las dimensiones de los embeddings
print(embeddings.shape)                 # Verificar dimensiones

torch.Size([1, 1024])


In [13]:
def load_image(img: str) -> torch.Tensor:
    """
    Load an image and return a tensor that can be used as an input to DINOv2.
    """
    img = Image.open(img)

    transformed_img = transform_image(img)[:3].unsqueeze(0)

    return transformed_img

def compute_embeddings(files: list) -> dict:
    """
    Create an index that contains all of the images in the specified list of files.
    """
    all_embeddings = {}
    
    with torch.no_grad():
      for i, file in enumerate(tqdm(files)):
        embeddings = dinov2_vits14(load_image(file).to(device))

        all_embeddings[file] = np.array(embeddings[0].cpu().numpy()).reshape(1, -1).tolist()

    with open("all_embeddings.json", "w") as f:
        f.write(json.dumps(all_embeddings))

    return all_embeddings

In [14]:
embeddings = compute_embeddings(files)

  0%|          | 0/38591 [00:00<?, ?it/s]

In [15]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from random import shuffle

y = [labels[file] for file in files]

embedding_list = list(embeddings.values())

combined = list(zip(embedding_list, y))

# Barajar
shuffle(combined)

# Separar nuevamente
embedding_list, y = zip(*combined)
embedding_list = np.array(embedding_list).squeeze(1)  # Quita la dimensión de tamaño 1

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report

# Supongamos que ya tienes tus embeddings y etiquetas en arrays
# embedding_array: numpy array con tus datos de entrada
# y_array: numpy array con tus etiquetas

# Definir el modelo SVM
svm = SVC()

# Definir el espacio de búsqueda para los hiperparámetros
param_grid = {
    'C': [10],             # Regularización
    'gamma': [ 0.001],    # Parámetro del kernel RBF
    'kernel': ['rbf']                  # Usaremos kernel RBF en este ejemplo
}

# Configurar GridSearchCV
grid_search = GridSearchCV(
    estimator=svm,
    param_grid=param_grid,
    scoring='accuracy',                # Métrica de evaluación (puedes cambiarla según el problema)
    cv=5,                              # Validación cruzada (5 pliegues en este caso)
    verbose=2,
    n_jobs=-1                          # Usar todos los núcleos disponibles
)

# Ejecutar la búsqueda de hiperparámetros
grid_search.fit(embedding_list, y)

# Imprimir los mejores hiperparámetros y la puntuación asociada
print("Mejores hiperparámetros:", grid_search.best_params_)
print("Mejor puntuación:", grid_search.best_score_)

# Evaluar en datos de entrenamiento (opcional)
y_pred = grid_search.best_estimator_.predict(embedding_list)
print(classification_report(y, y_pred))

Fitting 5 folds for each of 1 candidates, totalling 5 fits


In [13]:
from sklearn.metrics import classification_report, accuracy_score

In [None]:
ROOT_DIR = os.path.join(cwd, "C:/Thesis/Dataset4classes/valid")


labels = {}

for folder in os.listdir(ROOT_DIR):
    for file in os.listdir(os.path.join(ROOT_DIR, folder)):
        if file.endswith(".jpg") or file.endswith(".png"):
            full_name = os.path.join(ROOT_DIR, folder, file)
            labels[full_name] = folder

testfiles = labels.keys()
testembeddings = compute_embeddings(testfiles)

y_val = [labels[file] for file in testfiles]
y_val_pred = grid_search.best_estimator_.predict(np.array(list(testembeddings.values())).reshape(-1, 384))





In [None]:
print("Accuracy:", accuracy_score(y_val, y_val_pred))
print("Classification Report:\n", classification_report(y_val, y_val_pred))

In [16]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import numpy as np

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# Calcular matriz de confusión
cm = confusion_matrix(y_val, y_val_pred)

# Mostrar la matriz de confusión
labels_names = ds['train'].features['label'].names  # Nombres de las clases
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=labels_names)
disp.plot(cmap=plt.cm.Blues)

plt.title("Matriz de Confusión")
plt.show()

In [None]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)


model = XGBClassifier(n_estimators=495, 
                      max_depth=9, 
                      learning_rate= 0.07513291174284646, 
                      subsample=0.8103606987549683, 
                      colsample_bytree = 0.7897168158886071, 
                      gamma = 0.2932845334464851, 
                      reg_alpha = 8.968869453424377,
                      reg_lambda = 6.468487200733069, 
                      min_child_weight = 8)

model.fit(embedding_list, y_encoded)

In [None]:
y_val_encoded = label_encoder.fit_transform(y_val)
y_pred = model.predict(np.array(list(testembeddings.values())).squeeze(1))

print("Accuracy:", accuracy_score(y_val_encoded, y_pred))
print("Classification Report:\n", classification_report(y_val_encoded, y_pred))

In [20]:
param_grid = {
    'n_estimators': [300],  # Número de árboles en el bosque
    'max_depth': [7, 11],               # Profundidad máxima del árbol
    'learning_rate': [0.15],    # Tasa de aprendizaje
    'subsample': [0.9],           # Proporción de muestras utilizadas para entrenar cada árbol
    'colsample_bytree': [0.7, 0.9],    # Proporción de características utilizadas para entrenar cada árbol
    'gamma': [0.4],            # Reducción mínima de la pérdida requerida para hacer una división
    'reg_alpha': [0.5, 1.0],                  # Término de regularización L1 en pesos
    'reg_lambda': [1.0],                 # Término de regularización L2 en pesos
    'min_child_weight': [1]               # Peso mínimo necesario para crear un nuevo nodo en el árbol
}


In [None]:
grid_search_xgb = GridSearchCV(
    estimator=XGBClassifier(tree_method='gpu_hist', predictor='gpu_predictor'),
    param_grid=param_grid,
    scoring='accuracy',
    cv=4,
    verbose=2,
    n_jobs=-1
)

# Ejecutar la búsqueda de hiperparámetros
grid_search_xgb.fit(embedding_list, y_encoded)


In [None]:
print("Mejores hiperparámetros:", grid_search_xgb.best_params_)
print("Mejor puntuación:", grid_search_xgb.best_score_)


In [None]:
y_val_pred_xgb = grid_search_xgb.best_estimator_.predict(np.array(list(testembeddings.values())).squeeze(1))

print("Accuracy en datos de validación:", accuracy_score(y_val_encoded, y_val_pred_xgb))
print("Classification Report en datos de validación:\n", classification_report(y_val_encoded, y_val_pred_xgb))
