In [1]:
import thop
import torch
import torch.nn.functional as F
import torchmetrics #conda install -c conda-forge torchmetrics
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
import os
import cv2
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from tqdm import tqdm
import time
import json
import torchviz
import graphviz
from torchsummary import summary

In [8]:
DATA_TYPE = 'data1'

if DATA_TYPE == 'data2':
    DATA_PATH = os.path.join('..', 'data', 'ASL','asl_alphabet_train')
elif DATA_TYPE == 'data3':
    DATA_PATH = os.path.join('..', 'data', 'unified_data','unified_data')
elif DATA_TYPE == 'data1':
    DATA_PATH = os.path.join('..', 'data', 'webcam_data','unified_data')
else:
    raise ValueError(f"Data {DATA_TYPE} not found.")

In [9]:
DATA_PATH

'..\\data\\webcam_data\\unified_data'

In [12]:
# Function to create the DataFrame from the dataset
# Uncomment to use. The output is a dataframe stored in asl_dataset_info.csv

def create_dataframe(data_path):
    """
    Crea un DataFrame con las rutas de las imágenes y sus etiquetas.
    
    Args:
        data_path (str): Ruta al directorio que contiene las carpetas de clases (0-29)
    
    Returns:
        pd.DataFrame: DataFrame con columnas ['Filepaths', 'Labels', 'Label_idx']
    """
    # Convertir a Path object para mejor manejo de rutas
    data_path = Path(data_path)
    
    if not data_path.exists():
        raise ValueError(f"El directorio {data_path} no existe")
    
    # Listas para almacenar datos
    filepaths = []
    labels = []
    label_indices = []
    img_sizes = []
    
    # Obtener todas las carpetas y ordenarlas numéricamente
    folders = sorted([f for f in data_path.iterdir() if f.is_dir()], 
                    key=lambda x: int(x.name))
    
    print("Creando DataFrame...")
    # Usar tqdm para mostrar progreso
    for folder in tqdm(folders, desc="Procesando carpetas"):
        label_idx = int(folder.name)
        
        # Obtener todas las imágenes en la carpeta
        valid_extensions = {'.jpg', '.jpeg', '.png'}
        images = [f for f in folder.iterdir() 
                 if f.suffix.lower() in valid_extensions]
        
        for img_path in images:
            # Verificar que la imagen se puede leer
            try:
                img = cv2.imread(str(img_path))
                if img is None:
                    print(f"Advertencia: No se pudo leer {img_path}")
                    continue
                
                height, width = img.shape[:2]
                
                filepaths.append(str(img_path))
                labels.append(folder.name)
                label_indices.append(label_idx)
                img_sizes.append((width, height))
                
            except Exception as e:
                print(f"Error procesando {img_path}: {str(e)}")
    
    # Crear DataFrame
    df = pd.DataFrame({
        'Filepaths': filepaths,
        'Labels': labels,
        'Label_idx': label_indices,
        'Image_size': img_sizes
    })
    
    # Mostrar información del dataset
    print("\nResumen del Dataset:")
    print(f"Total de imágenes: {len(df)}")
    print(f"Número de clases: {len(df['Labels'].unique())}")
    print("\nDistribución de clases:")
    print(df['Labels'].value_counts().sort_index())
    
    # Verificar balance de clases
    min_samples = df['Labels'].value_counts().min()
    max_samples = df['Labels'].value_counts().max()
    print(f"\nMínimo de muestras por clase: {min_samples}")
    print(f"Máximo de muestras por clase: {max_samples}")
    
    # Verificar tamaños de imagen
    sizes = pd.DataFrame(df['Image_size'].tolist(), columns=['width', 'height'])
    print("\nTamaños de imagen:")
    print(f"Mínimo: {sizes.min().values}")
    print(f"Máximo: {sizes.max().values}")
    print(f"Moda: {sizes.mode().iloc[0].values}")
    
    return df

try:
    # Images in 'data/...'  
    df = create_dataframe(DATA_PATH)
    
    # Save dataframe of images paths and labels
    if DATA_TYPE == 'data2':
        df.to_csv('asl_dataset_info.csv', index=False)
    elif DATA_TYPE == 'data3':
        df.to_csv('unified_data_dataset_info.csv', index=False)
    elif DATA_TYPE == 'data1':
        df.to_csv('unified_webcam_dataset_info.csv', index=False)

    print("\nPrimeras filas del DataFrame:")
    print(df.head())
    
except Exception as e:
    print(f"Error: {str(e)}")

Creando DataFrame...


Procesando carpetas: 100%|██████████| 29/29 [00:25<00:00,  1.14it/s]


Resumen del Dataset:
Total de imágenes: 11600
Número de clases: 29

Distribución de clases:
Labels
0     400
1     400
10    400
11    400
12    400
13    400
14    400
15    400
16    400
17    400
18    400
19    400
2     400
20    400
21    400
22    400
23    400
24    400
25    400
26    400
27    400
28    400
3     400
4     400
5     400
6     400
7     400
8     400
9     400
Name: count, dtype: int64

Mínimo de muestras por clase: 400
Máximo de muestras por clase: 400

Tamaños de imagen:
Mínimo: [640 480]
Máximo: [640 480]
Moda: [640 480]

Primeras filas del DataFrame:
                                      Filepaths Labels  Label_idx  Image_size
0  ..\data\webcam_data\unified_data\0\0 (2).jpg      0          0  (640, 480)
1  ..\data\webcam_data\unified_data\0\0 (3).jpg      0          0  (640, 480)
2  ..\data\webcam_data\unified_data\0\0 (4).jpg      0          0  (640, 480)
3      ..\data\webcam_data\unified_data\0\0.jpg      0          0  (640, 480)
4  ..\data\webcam_data




In [13]:
if DATA_TYPE == 'data2':
    df = pd.read_csv('asl_dataset_info.csv')
elif DATA_TYPE == 'data3':
    df = pd.read_csv('unified_data_dataset_info.csv')
elif DATA_TYPE == 'data1':
    df = pd.read_csv('unified_webcam_dataset_info.csv')
else:
    raise ValueError(f"Data {DATA_TYPE} not found.")

print(df.head())

                                      Filepaths  Labels  Label_idx  Image_size
0  ..\data\webcam_data\unified_data\0\0 (2).jpg       0          0  (640, 480)
1  ..\data\webcam_data\unified_data\0\0 (3).jpg       0          0  (640, 480)
2  ..\data\webcam_data\unified_data\0\0 (4).jpg       0          0  (640, 480)
3      ..\data\webcam_data\unified_data\0\0.jpg       0          0  (640, 480)
4  ..\data\webcam_data\unified_data\0\1 (2).jpg       0          0  (640, 480)


In [14]:
# Configure the device for training
def setup_device():
    if torch.cuda.is_available():
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        torch.backends.cudnn.benchmark = True  # Optimiza el rendimiento
        print(f"Using GPU: {torch.cuda.get_device_name(0)}")
        print(f"GPU memory available: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    else:
        device = torch.device('cpu')
        print("GPU not available, using CPU")
    return device

DEVICE = setup_device()

Using GPU: NVIDIA GeForce RTX 3070 Ti Laptop GPU
GPU memory available: 8.59 GB


In [15]:
class ASLDataset(Dataset):
    """
    Custom Dataset for loading ASL (American Sign Language) images.
    
    This dataset class handles loading and preprocessing of ASL hand gesture images.
    It supports on-the-fly data augmentation and preprocessing for model training.
    
    Attributes:
        df (pd.DataFrame): DataFrame containing image paths and labels
        transform (callable): Torchvision transforms for image preprocessing
        is_training (bool): Flag to enable/disable data augmentation
    """

    def __init__(self, dataframe, transform=None):
        """
        Initialize the ASL Dataset.
        
        Args:
            df (pd.DataFrame): DataFrame with columns ['Filepaths', 'Labels']
            transform (callable, optional): Transform to be applied to images
            is_training (bool): If True, enables data augmentation
        """
        self.dataframe = dataframe
        self.transform = transform
        self.labels = pd.Categorical(dataframe['Labels']).codes
    
    def __len__(self):
        """Returns the total number of images in the dataset."""
        return len(self.dataframe)
    
    def __getitem__(self, idx):
        """
        Fetch and preprocess a single image item from the dataset.
        
        Args:
            idx (int): Index of the image to fetch
            
        Returns:
            tuple: (image, label) where image is the preprocessed tensor
                  and label is the corresponding class index
        """
        img_path = self.dataframe.iloc[idx]['Filepaths']
        label = self.dataframe.iloc[idx]['Label_idx']  # Asegúrate de que esto sea un número entero
        
        try:
            # Read and preprocess image
            image = cv2.imread(img_path)
            if image is None:
                raise ValueError(f"Image not found: {img_path}")
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            
            if self.transform:
                image = self.transform(image)

            # Defining data type for labels
            label = torch.tensor(int(label), dtype=torch.long)
            
            return image, label
    
        except Exception as e:
            print(f"Error loading imagen {img_path}: {str(e)}")
            # Retorning a black image
            if self.transform:
                dummy_image = torch.zeros((3, 384, 384))
            else:
                dummy_image = np.zeros((384, 384, 3))
            return dummy_image, label

In [58]:
def create_test_loaders(df, transforms=None, batch_size=32, train_split=0.8, val_split=0.1):
    """
    Create train, validation, and test data loaders.
    
    Args:
        df (pd.DataFrame): DataFrame containing image paths and labels
        transform (callable): Torchvision transforms for image preprocessing
        batch_size (int): Batch size for data loaders
        train_split (float): Proportion of data used for training (default: 0.8)
        val_split (float): Proportion of data used for validation (default: 0.1)
        
    Returns:
        tuple: (train_loader, val_loader, test_loader)
    """
    dataset = ASLDataset(df, transform=transforms)

    # Calculate sizes
    total_size = len(dataset)
    train_size = int(train_split * total_size)
    val_size = int(val_split * total_size)
    test_size = total_size - train_size - val_size

    # Create splits
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        dataset,
        [train_size, val_size, test_size],
        generator=torch.Generator().manual_seed(42)
    )

    # Adjust workers according to your CPU cores (generally num_cores - 1)
    # num_workers = min(4, os.cpu_count() - 1) if os.cpu_count() > 1 else 0

    # Configure a common DataLoader for training, validation, and testing dataloaders
    dataloader_kwargs = {
        'batch_size': batch_size,
        'num_workers': 0,#num_workers
        'pin_memory': torch.cuda.is_available(),
        'persistent_workers': False#if num_workers > 0 else False
    }

    train_loader = DataLoader(train_dataset, shuffle=True, **dataloader_kwargs)
    val_loader = DataLoader(val_dataset, shuffle=False, **dataloader_kwargs)
    test_loader = DataLoader(test_dataset, shuffle=False, **dataloader_kwargs)

    return train_loader, val_loader, test_loader

In [75]:
def generate_evaluation_metrics(model, test_loader, evaluation_path):
    """
    Generate and save comprehensive evaluation metrics for the model.
    
    This function creates various visualizations and metrics including:
    - Training/validation loss curves
    - Accuracy plots
    - Confusion matrix
    - Classification report
    - Per-class performance metrics
    
    Evaluation Components:
    ---------------------
    1. Model Performance Metrics:
        - Test Loss (Cross-Entropy)
        - Test Accuracy
        - Per-class Precision, Recall, and F1-score
    
    2. Visualizations:
        - Confusion Matrix: Shows prediction patterns across all classes
        - Training History Plots:
            * Loss curves (training and validation)
            * Accuracy curves (training and validation)
    
    3. Saved Outputs:
        - classification_metrics.csv: Detailed per-class metrics
        - training_history.json: Complete training history
        - confusion_matrix.png: Visual representation of model predictions
        - training_curves.png: Learning curves from both training phases
    
    Args:
        model (nn.Module): Trained model to evaluate
        test_loader (DataLoader): DataLoader for test data
        history_phase1 (dict): Training history from phase 1
        history_phase2 (dict): Training history from phase 2
        evaluation_path (str): Directory to save evaluation results
        
    Returns:
        dict: A dictionary containing all evaluation metrics and history:
            {
                'training_history': {
                    'phase1': {train_losses, train_accuracies, val_losses, val_accuracies},
                    'phase2': {train_losses, train_accuracies, val_losses, val_accuracies}
                },
                'final_metrics': {
                    'test_loss': float,
                    'test_accuracy': float,
                    'classification_report': dict
                }
            }
    """

    # Evaluate on test set
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Evaluando'):
            inputs = inputs.to(DEVICE)
            labels = labels.to(DEVICE, dtype=torch.long)
            
            outputs = model(inputs)
            outputs = outputs.float()
            
            # Loss function using cross entropy 
            loss = F.cross_entropy(outputs, labels)
            test_loss += loss.item()
            
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate final metrics 
    test_loss = test_loss / len(test_loader)
    test_accuracy = 100 * correct / total
    
    # Load a label mapping 
    label_mapping = {
        0: "A", 1: "B", 2: "C", 3: "D", 4: "E", 
        5: "F", 6: "G", 7: "H", 8: "I", 9: "J", 
        10: "K", 11: "L", 12: "M", 13: "N", 14: "O", 
        15: "P", 16: "Q", 17: "R", 18: "S", 19: "T", 
        20: "U", 21: "V", 22: "W", 23: "X", 24: "Y", 
        25: "Z", 26: "DEL", 27: "NOTHING", 28: "SPACE"}
    
    # Calculate confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    classification_rep = classification_report(all_labels, all_preds, 
                                            target_names=list(label_mapping.values()),
                                            output_dict=True)
    
    # Save visualizations
    _save_confusion_matrix(cm, label_mapping, evaluation_path)
    
    # Prepare complete history
    history_data = {
        'final_metrics': {
            'test_loss': float(test_loss),
            'test_accuracy': float(test_accuracy),
            'classification_report': classification_rep
        }
    }
    
    # Save history in a JSON
    with open(os.path.join(evaluation_path, 'test_results_extra.json'), 'w') as f:
        json.dump(history_data, f, indent=4)
    
    # Show summary
    print("\nResumen Final del Entrenamiento:")
    print(f"Precisión en test: {test_accuracy:.2f}%")
    print(f"Pérdida en test: {test_loss:.4f}")
    
    return history_data

In [81]:
def get_model_config (option):
    model_configs = {
        1: {
            'model_type': 'efficientnet',
            'evaluation_path': '../results/efficientnet_v2_s_data2/evaluation_20241206_174129',
            'model_path': '../results/efficientnet_v2_s_data2/evaluation_20241206_174129/checkpoints/Fine_Tuning_best_model.pth'
        },
        2: {
            'model_type': 'efficientnet',
            'evaluation_path': '../results/efficientnet_v2_s_data3/evaluation_20241206_152358',
            'model_path': '../results/efficientnet_v2_s_data3/evaluation_20241206_152358/checkpoints/Fine_Tuning_best_model.pth'
        },
        3: {
            'model_type': 'mobilenet',
            'evaluation_path': '../results/mobilenet_v2_data2/evaluation_20241207_144721',
            'model_path': '../results/mobilenet_v2_data2/evaluation_20241207_144721/checkpoints/Fine_Tuning_best_model.pth'
        },
        4: {
            'model_type': 'mobilenet',
            'evaluation_path': '../results/mobilenet_v2_data3/evaluation_20241207_112444',
            'model_path': '../results/mobilenet_v2_data3/evaluation_20241207_112444/checkpoints/Fine_Tuning_best_model.pth'
        }
    }
    return model_configs[option]

In [73]:
def _save_confusion_matrix(cm, label_mapping, evaluation_path):
    """Save confusion matrix of model evaluation"""

    plt.figure(figsize=(15, 15))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=list(label_mapping.values()),
                yticklabels=list(label_mapping.values()))
    plt.title('Matriz de Confusión')
    plt.xlabel('Predicción')
    plt.ylabel('Valor Real')
    plt.xticks(rotation=45)
    plt.yticks(rotation=45)
    plt.tight_layout()
    plt.savefig(os.path.join(evaluation_path, 'confusion_matrix_extra.png'))
    plt.close()

In [54]:
def load_model(model_type,model_path):
        """Load and prepare model based on type."""
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        try:
            # For PyTorch models
            if model_type == 'efficientnet':
                from efficientNet.model_class import ASLModel
                model = ASLModel(num_classes=29)
            elif model_type == 'mobilenet':
                from mobileNet.model_class import ASLModel
                model = ASLModel(num_classes=29)
            else:
                raise ValueError(f"Unsupported model type: {model_type}")
            
            checkpoint = torch.load(model_path, map_location=device, weights_only=True)
            model.load_state_dict(checkpoint['model_state_dict'] if 'model_state_dict' in checkpoint else checkpoint)
            model.eval().to(device)
            return model
            
        except Exception as e:
            print(f"Error loading model: {e}")
            raise

In [94]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((384, 384)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])

In [95]:
print("Creating  data loaders...")
train_loader, val_loader, test_loader = create_test_loaders(
    df=df,
    transforms = transform,
    batch_size=32,
    train_split=0.1,
    val_split=0.1
)

Creating  data loaders...


In [105]:
model_config = get_model_config(1)

In [106]:
model = load_model(model_config['model_type'],model_config['model_path'])

In [107]:
generate_evaluation_metrics(model, test_loader, model_config['evaluation_path'])

Evaluando: 100%|██████████| 290/290 [01:24<00:00,  3.43it/s]



Resumen Final del Entrenamiento:
Precisión en test: 10.94%
Pérdida en test: 5.9131


{'final_metrics': {'test_loss': 5.913103194894462,
  'test_accuracy': 10.9375,
  'classification_report': {'A': {'precision': 0.0711864406779661,
    'recall': 0.13548387096774195,
    'f1-score': 0.09333333333333334,
    'support': 310.0},
   'B': {'precision': 0.09131205673758866,
    'recall': 0.6338461538461538,
    'f1-score': 0.15962805114296785,
    'support': 325.0},
   'C': {'precision': 0.22448979591836735,
    'recall': 0.07028753993610223,
    'f1-score': 0.1070559610705596,
    'support': 313.0},
   'D': {'precision': 0.09803921568627451,
    'recall': 0.015197568389057751,
    'f1-score': 0.02631578947368421,
    'support': 329.0},
   'E': {'precision': 0.08295625942684766,
    'recall': 0.16467065868263472,
    'f1-score': 0.11033099297893681,
    'support': 334.0},
   'F': {'precision': 0.0684931506849315,
    'recall': 0.01557632398753894,
    'f1-score': 0.025380710659898477,
    'support': 321.0},
   'G': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 