# Entrenamiento de ResNet18 para clasificar carriles bici

## **1. Configuración inicial**

### Instalar librerías necesarias

In [None]:
!conda install -c conda-forge opencv


In [None]:
!conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia

In [None]:
!pip install torch torchvision pandas scikit-learn matplotlib

## **2. Preprocesamiento de datos**

### Cargar la base de datos

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Cargar la base de datos
df = pd.read_csv('D:\Results\ciclo_png_directory.csv')

# Verificar datos
print(df.head())

   number_of_bikelane              condition_name  \
0                 104  ci_o_cr_0_tipci_CA_op_ci_0   
1                 104  ci_o_cr_0_tipci_CA_op_ci_0   
2                 104  ci_o_cr_0_tipci_CA_op_ci_0   
3                 104  ci_o_cr_0_tipci_CA_op_ci_0   
4                 104  ci_o_cr_0_tipci_CA_op_ci_0   

                                individual_file_path  
0  D:\Results\104\ci_o_cr_0_tipci_CA_op_ci_0\date...  
1  D:\Results\104\ci_o_cr_0_tipci_CA_op_ci_0\date...  
2  D:\Results\104\ci_o_cr_0_tipci_CA_op_ci_0\date...  
3  D:\Results\104\ci_o_cr_0_tipci_CA_op_ci_0\date...  
4  D:\Results\104\ci_o_cr_0_tipci_CA_op_ci_0\date...  


### Dividir en entrenamiento y validación

In [2]:
# Dividir los datos
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['condition_name'], random_state=42)

print(f"Entrenamiento: {len(train_df)}, Validación: {len(val_df)}")

Entrenamiento: 48001, Validación: 12001


### Aumentar clases minoritarias y subsamplear clases mayoritarias

In [3]:
# Aumentar clases 0 y 4
augmented_0 = train_df[train_df['condition_name'] == 'ci_o_cr_0_tipci_BAND_op_ci_0'].sample(n=1000, replace=True, random_state=42)
augmented_4 = train_df[train_df['condition_name'] == 'ci_o_cr_1_tipci_BAND_op_ci_0'].sample(n=1000, replace=True, random_state=42)

# Subsamplear la clase 11
subsampled_11 = train_df[train_df['condition_name'] == 'ci_o_cr_1_tipci_VD_op_ci_1'].sample(n=5000, random_state=42)

# Crear un nuevo conjunto de entrenamiento equilibrado
train_df = pd.concat([
    train_df[~train_df['condition_name'].isin(['ci_o_cr_0_tipci_BAND_op_ci_0', 'ci_o_cr_1_tipci_BAND_op_ci_0', 'ci_o_cr_1_tipci_VD_op_ci_1'])],
    augmented_0, augmented_4, subsampled_11
])

print(f"Nuevo tamaño de entrenamiento: {len(train_df)}")

Nuevo tamaño de entrenamiento: 43555


## **3. Preparar datos para PyTorch**

### Transformaciones y DataLoader



**Resize((224, int(224 * (16 / 9))))**

Ajusta el alto a 224 píxeles y calcula el ancho proporcional manteniendo la relación de aspecto 16:9. El nuevo ancho será aproximadamente 398 píxeles.

**Pad((0, 0, (224 - width) // 2, 0))**

Calcula el padding necesario en los lados izquierdo y derecho para llevar el ancho resultante (398) a 224 píxeles. Solo rellena en los lados horizontales.

**Resultado:**

Este enfoque se asegura de que:
- No haya distorsión en la imagen
- Se mantenga el área importante central de la imagen
- Las dimensiones sean las requeridas por el modelo (224x224)

In [8]:
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import Compose, Normalize, ToTensor, ColorJitter, RandomHorizontalFlip
import cv2
import numpy as np

# Transformaciones
def resize_and_pad(image, target_height=224, target_width=224):
    """Resize the image to maintain aspect ratio, then pad to target dimensions."""
    original_height, original_width, _ = image.shape
    aspect_ratio = original_width / original_height
    
    # Calculate new dimensions
    if aspect_ratio > 1:  # Wider than tall
        new_width = target_width
        new_height = int(target_width / aspect_ratio)
    else:  # Taller than wide
        new_height = target_height
        new_width = int(target_height * aspect_ratio)
    
    # Resize image
    resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)
    
    # Calculate padding
    pad_top = (target_height - new_height) // 2
    pad_bottom = target_height - new_height - pad_top
    pad_left = (target_width - new_width) // 2
    pad_right = target_width - new_width - pad_left
    
    # Pad image
    padded_image = cv2.copyMakeBorder(
        resized_image, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=[0, 0, 0]
    )
    return padded_image

# Define transformations
class Transformations:
    def __init__(self, train=True):
        self.train = train
        self.color_jitter = ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1)
        self.normalize = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        self.to_tensor = ToTensor()
        self.horizontal_flip = RandomHorizontalFlip()
    
    def __call__(self, image):
        # Resize and pad
        image = resize_and_pad(image, target_height=224, target_width=224)
        # Convert to float32 and normalize to [0, 1]
        image = image.astype(np.float32) / 255.0
        # Apply color jitter and flip for training
        if self.train:
            image = self.color_jitter(torch.from_numpy(image.transpose(2, 0, 1)))  # Apply jitter
            if torch.rand(1).item() < 0.5:  # Random horizontal flip
                image = torch.flip(image, dims=[2])  # Flip along the width dimension
        else:
            image = torch.from_numpy(image.transpose(2, 0, 1))  # No augmentation for validation
        
        # Normalize
        image = self.normalize(image)
        return image

train_transforms = Transformations(train=True)
val_transforms = Transformations(train=False)

# Dataset personalizado
class BikeLaneDataset(Dataset):
    def __init__(self, dataframe, transforms):
        self.dataframe = dataframe
        self.transforms = transforms

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        # Load image with OpenCV
        image = cv2.imread(row['individual_file_path'])
        if image is None:
            raise ValueError(f"Image not found at {row['individual_file_path']}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
        label = row['condition_name']
        label_idx = condition_to_idx[label]  # Convertir etiqueta a índice
        if self.transforms:
            image = self.transforms(image)
        return image, label_idx

# Crear DataLoader
condition_to_idx = {cond: idx for idx, cond in enumerate(train_df['condition_name'].unique())}

train_dataset = BikeLaneDataset(train_df, train_transforms)
val_dataset = BikeLaneDataset(val_df, val_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True, num_workers=6)
# val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False, num_workers=6)


# **4. Entrenar ResNet18**

### Definir el modelo

In [9]:
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18

# Cargar modelo preentrenado
model = resnet18(pretrained=True)

# Reemplazar la última capa
num_classes = len(condition_to_idx)
model.fc = nn.Linear(model.fc.in_features, num_classes)

# Usar GPU si está disponible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
print(f"Using device: {device}")

Using device: cuda


In [10]:
#Verifica que se este usando el CUDA
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"¿GPU disponible?: {torch.cuda.is_available()}")
print(f"Nombre de la GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'No hay GPU disponible'}")

PyTorch version: 2.5.1
¿GPU disponible?: True
Nombre de la GPU: NVIDIA GeForce RTX 4060


### Definir loss y optimizador

In [11]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### Ciclo de entrenamiento

In [12]:
def train(model, criterion, optimizer, train_loader, val_loader, num_epochs=10):
    for epoch in range(num_epochs):
        # Entrenamiento
        model.train()
        train_loss, train_correct = 0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_correct += (outputs.argmax(1) == labels).sum().item()

        # Validación
        model.eval()
        val_loss, val_correct = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                val_correct += (outputs.argmax(1) == labels).sum().item()

        # Métricas
        train_accuracy = train_correct / len(train_loader.dataset)
        val_accuracy = val_correct / len(val_loader.dataset)

        print(f"Epoch {epoch+1}/{num_epochs}")
        print(f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}")

train(model, criterion, optimizer, train_loader, val_loader, num_epochs=10)

Epoch 1/10
Train Loss: 1900.2883, Train Accuracy: 0.4879
Val Loss: 457.2839, Val Accuracy: 0.5486
Epoch 2/10
Train Loss: 1356.7168, Train Accuracy: 0.6264
Val Loss: 359.0026, Val Accuracy: 0.6307
Epoch 3/10
Train Loss: 1128.6612, Train Accuracy: 0.6844
Val Loss: 313.8178, Val Accuracy: 0.6804
Epoch 4/10
Train Loss: 962.0775, Train Accuracy: 0.7209
Val Loss: 297.3737, Val Accuracy: 0.7013
Epoch 5/10
Train Loss: 851.9297, Train Accuracy: 0.7488
Val Loss: 279.7789, Val Accuracy: 0.7165
Epoch 6/10
Train Loss: 778.2708, Train Accuracy: 0.7654
Val Loss: 267.2904, Val Accuracy: 0.7236
Epoch 7/10
Train Loss: 705.3919, Train Accuracy: 0.7823
Val Loss: 257.6091, Val Accuracy: 0.7284
Epoch 8/10
Train Loss: 659.4561, Train Accuracy: 0.7904
Val Loss: 262.8404, Val Accuracy: 0.7151
Epoch 9/10
Train Loss: 615.3516, Train Accuracy: 0.8042
Val Loss: 253.8431, Val Accuracy: 0.7419
Epoch 10/10
Train Loss: 582.6085, Train Accuracy: 0.8068
Val Loss: 265.2738, Val Accuracy: 0.7095


# **5. Evaluar métricas**

### Predicciones y cálculo de métricas

In [13]:
from sklearn.metrics import classification_report

# Obtener predicciones
def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    return all_preds, all_labels

# Predicciones
val_preds, val_labels = evaluate(model, val_loader)

# Reporte de clasificación
print(classification_report(val_labels, val_preds, target_names=condition_to_idx.keys()))


                              precision    recall  f1-score   support

  ci_o_cr_1_tipci_VD_op_ci_0       0.65      0.86      0.74      1359
 ci_o_cr_1_tipci_PAR_op_ci_1       0.78      0.92      0.84       843
  ci_o_cr_1_tipci_CA_op_ci_0       0.79      0.86      0.82      1875
  ci_o_cr_1_tipci_CA_op_ci_1       0.85      0.77      0.81      1762
  ci_o_cr_0_tipci_VD_op_ci_0       0.36      0.48      0.42       994
ci_o_cr_1_tipci_BAND_op_ci_1       0.81      0.79      0.80       422
  ci_o_cr_0_tipci_CA_op_ci_0       0.40      0.43      0.42       849
 ci_o_cr_1_tipci_PAR_op_ci_0       0.86      0.91      0.88       791
 ci_o_cr_0_tipci_PAR_op_ci_0       0.31      0.09      0.14       244
ci_o_cr_0_tipci_BAND_op_ci_0       0.41      0.52      0.46       108
ci_o_cr_1_tipci_BAND_op_ci_0       0.67      0.87      0.76        38
  ci_o_cr_1_tipci_VD_op_ci_1       0.86      0.59      0.70      2716

                    accuracy                           0.71     12001
                  

### Évaluación de una métrica

In [35]:
import torch
from torchvision import transforms
from PIL import Image
import numpy as np
import torch.nn.functional as F

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the transformation pipeline (same as used during training)
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to load and preprocess the image
def load_image(image_path):
    image = Image.open(image_path).convert('RGB')
    image = transform(image)
    image = image.unsqueeze(0)  # Add batch dimension
    return image

# Function to evaluate a single image
def evaluate_single_image(model, image_path):
    model.eval()
    image = load_image(image_path).to(device)
    with torch.no_grad():
        output = model(image)
        probabilities = F.softmax(output, dim=1)
        top_probs, top_labels = torch.topk(probabilities, 3)
        top_probs = top_probs.cpu().numpy().flatten()
        top_labels = top_labels.cpu().numpy().flatten()
    return top_labels, top_probs

# Example usage
image_path = r"D:\Results\97\ci_o_cr_1_tipci_CA_op_ci_0\date_0_OE_1frame_328.png"  # Use raw string to avoid escape characters
# Alternatively, you can use forward slashes
# image_path = 'C:/Users/User/Downloads/imagen_prueba.png'

top_labels, top_probs = evaluate_single_image(model, image_path)

# Map the predicted labels to the corresponding class names
idx_to_condition = {v: k for k, v in condition_to_idx.items()}
top_classes = [idx_to_condition[label] for label in top_labels]

print("Top 3 predictions:")
for i in range(3):
    print(f"Rank {i+1}:")
    print(f"Class: {top_classes[i]}")
    print(f"Label: {top_labels[i]}")
    print(f"Confidence: {top_probs[i]:.4f}")

Top 3 predictions:
Rank 1:
Class: ci_o_cr_1_tipci_VD_op_ci_0
Label: 0
Confidence: 0.8199
Rank 2:
Class: ci_o_cr_0_tipci_VD_op_ci_0
Label: 4
Confidence: 0.0653
Rank 3:
Class: ci_o_cr_1_tipci_VD_op_ci_1
Label: 11
Confidence: 0.0549


### Reporte con imagenes

In [40]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix
from fpdf import FPDF
from PIL import Image
import os

In [41]:
def evaluate_model(model, val_loader, device):
    model.eval()
    all_preds, all_labels, all_probs = [], [], []
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            probs = torch.nn.functional.softmax(outputs, dim=1)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_probs.extend(probs.cpu().numpy())
    return all_preds, all_labels, all_probs

In [42]:
def plot_confusion_matrix(all_labels, all_preds, class_names, title):
    cm = confusion_matrix(all_labels, all_preds)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=class_names, yticklabels=class_names)
    plt.title(title)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig("confusion_matrix.png")
    plt.close()

In [43]:
class PDF(FPDF):
    def header(self):
        self.set_font("Arial", "B", 12)
        self.cell(0, 10, "Model Evaluation Report", 0, 1, "C")

    def footer(self):
        self.set_y(-15)
        self.set_font("Arial", "I", 8)
        self.cell(0, 10, f"Page {self.page_no()}", 0, 0, "C")

def create_pdf_report(class_names, true_positives, false_positives, metrics, overall_metrics):
    pdf = PDF()
    pdf.add_page()

    # Add overall metrics
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Overall Metrics", 0, 1)
    pdf.set_font("Arial", "", 10)
    for metric, value in overall_metrics.items():
        pdf.cell(0, 10, f"{metric}: {value:.4f}", 0, 1)

    # Add confusion matrix
    pdf.add_page()
    pdf.set_font("Arial", "B", 12)
    pdf.cell(0, 10, "Confusion Matrix", 0, 1)
    pdf.image("confusion_matrix.png", x=10, y=20, w=180)

    # Add class-specific metrics and examples
    for class_name in class_names:
        pdf.add_page()
        pdf.set_font("Arial", "B", 12)
        pdf.cell(0, 10, f"Class: {class_name}", 0, 1)

        # Add metrics
        pdf.set_font("Arial", "", 10)
        for metric, value in metrics[class_name].items():
            pdf.cell(0, 10, f"{metric}: {value:.4f}", 0, 1)

        # Add confidence distribution plot
        pdf.image(f"confidence_distribution_{class_name}.png", x=10, y=50, w=180)

        # Add true positives
        pdf.add_page()
        pdf.set_font("Arial", "B", 12)
        pdf.cell(0, 10, "True Positives", 0, 1)
        for example in true_positives[class_name]:
            pdf.set_font("Arial", "", 10)
            pdf.cell(0, 10, f"Image: {example['image_path']}", 0, 1)
            pdf.cell(0, 10, f"Ground Truth: {example['true_label']}", 0, 1)
            pdf.cell(0, 10, f"Predicted: {example['predicted_label']}", 0, 1)
            pdf.cell(0, 10, f"Confidence: {example['confidence']:.4f}", 0, 1)
            pdf.image(example['image_path'], x=10, y=pdf.get_y(), w=50)
            pdf.ln(60)

        # Add false positives
        pdf.add_page()
        pdf.set_font("Arial", "B", 12)
        pdf.cell(0, 10, "False Positives", 0, 1)
        for example in false_positives[class_name]:
            pdf.set_font("Arial", "", 10)
            pdf.cell(0, 10, f"Image: {example['image_path']}", 0, 1)
            pdf.cell(0, 10, f"Ground Truth: {example['true_label']}", 0, 1)
            pdf.cell(0, 10, f"Predicted: {example['predicted_label']}", 0, 1)
            pdf.cell(0, 10, f"Confidence: {example['confidence']:.4f}", 0, 1)
            pdf.image(example['image_path'], x=10, y=pdf.get_y(), w=50)
            pdf.ln(60)

    # Save the PDF
    pdf.output("D:\Results\model_evaluation_report.pdf")

In [44]:
# Evaluate the model
all_preds, all_labels, all_probs = evaluate_model(model, val_loader, device)

# Get class names
class_names = list(condition_to_idx.keys())

# Calculate overall metrics
overall_metrics = classification_report(all_labels, all_preds, target_names=class_names, output_dict=True)

# Generate confusion matrix
plot_confusion_matrix(all_labels, all_preds, class_names, "Confusion Matrix")

# Initialize dictionaries to store true positives and false positives
true_positives = {class_name: [] for class_name in class_names}
false_positives = {class_name: [] for class_name in class_names}

# Populate true positives and false positives
for idx, (true_label, pred_label, probs) in enumerate(zip(all_labels, all_preds, all_probs)):
    true_class = class_names[true_label]
    pred_class = class_names[pred_label]
    confidence = probs[pred_label]

    if true_label == pred_label:
        if len(true_positives[true_class]) < 10:
            true_positives[true_class].append({
                "image_path": val_df.iloc[idx]['individual_file_path'],
                "true_label": true_class,
                "predicted_label": pred_class,
                "confidence": confidence
            })
    else:
        if len(false_positives[pred_class]) < 10:
            false_positives[pred_class].append({
                "image_path": val_df.iloc[idx]['individual_file_path'],
                "true_label": true_class,
                "predicted_label": pred_class,
                "confidence": confidence
            })

# Generate confidence distribution plots
for class_name in class_names:
    tp_confidences = [example['confidence'] for example in true_positives[class_name]]
    fp_confidences = [example['confidence'] for example in false_positives[class_name]]
    plot_confidence_distribution(tp_confidences, fp_confidences, class_name)

# Create the PDF report
create_pdf_report(class_names, true_positives, false_positives, overall_metrics, overall_metrics['macro avg'])

NameError: name 'plot_confidence_distribution' is not defined

# **6. guardar el modelo**

### Predicciones y cálculo de métricas

In [37]:
import torch

# Assuming 'model' is your trained model
model_path = 'D:\Results\model_weights.pth'
torch.save(model.state_dict(), model_path)
print(f"Model weights saved to {model_path}")

Model weights saved to D:\Results\model_weights.pth


In [None]:
import torch

# Assuming 'MyModel' is your model class
# model = MyModel()
model.load_state_dict(torch.load(model_path))
model.eval()  # Set the model to evaluation mode
print(f"Model weights loaded from {model_path}")