# Importacion de librerias

In [1]:
import os
import torch
import torch.nn as nn
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import pandas as pd

# Definimos las constantes

In [None]:
NUM_CLASSES = 2
CLIP_LENGTH = 32
IMAGE_SIZE = 224
BATCH_SIZE = 2
EPOCHS = 20
LEARNING_RATE = 0.0005
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Definimos the custom dataset

In [None]:
class VideoClipDataset(Dataset):
    def __init__(self, csv_path, transform=None):
        self.data = pd.read_csv(csv_path, sep=' ', header=None)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        clip_id, label, clip_path = self.data.iloc[idx]
        label = int(label)

        frames = sorted(os.listdir(clip_path))[:CLIP_LENGTH]
        clip = []

        for frame_file in frames:
            img_path = os.path.join(clip_path, frame_file)
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            clip.append(image)

        clip = torch.stack(clip, dim=0)  # (T, C, H, W)
        return clip, label

# Define the custom model

In [None]:
class CNN_LSTM(nn.Module):
    def __init__(self, hidden_dim=512, num_layers=1, num_classes=2):
        super(CNN_LSTM, self).__init__()
        base_model = models.mobilenet_v2(pretrained=True)
        self.cnn = base_model.features
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.feature_dim = 1280

        self.lstm = nn.LSTM(input_size=self.feature_dim,
                            hidden_size=hidden_dim,
                            num_layers=num_layers,
                            batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        B, T, C, H, W = x.shape
        x = x.view(B * T, C, H, W)
        with torch.no_grad():
            features = self.cnn(x)
            features = self.pool(features).view(B, T, -1)  # (B, T, 1280)

        output, _ = self.lstm(features)  # (B, T, hidden)
        final_output = output[:, -1, :]  # último paso
        logits = self.fc(final_output)
        return logits

# Cargamos el dataset

In [10]:
# === TRANSFORMS ===
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor()
])

# === CARGA DE DATOS ===
train_dataset = VideoClipDataset('../train.csv', transform)
val_dataset = VideoClipDataset('../val.csv', transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# === ENTRENAMIENTO ===
model = CNN_LSTM(num_classes=NUM_CLASSES).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)



# Entrenamiento

In [11]:
for epoch in range(EPOCHS):
    model.train()
    total_loss, correct, total = 0, 0, 0
    for clips, labels in train_loader:
        clips, labels = clips.to(DEVICE), labels.to(DEVICE)
        outputs = model(clips)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    acc = correct / total
    print(f"[Epoch {epoch+1}] Loss: {total_loss:.4f}, Accuracy: {acc:.2%}")

# === GUARDAR MODELO ===
torch.save(model.state_dict(), 'cnn_lstm_supermercado.pth')
print("✅ Modelo entrenado y guardado.")


[Epoch 1] Loss: 29.4742, Accuracy: 58.75%
[Epoch 2] Loss: 27.7487, Accuracy: 53.75%
[Epoch 3] Loss: 27.2989, Accuracy: 58.75%
[Epoch 4] Loss: 25.3065, Accuracy: 67.50%
[Epoch 5] Loss: 23.8218, Accuracy: 70.00%
[Epoch 6] Loss: 22.0121, Accuracy: 68.75%
[Epoch 7] Loss: 19.0983, Accuracy: 73.75%
[Epoch 8] Loss: 18.1838, Accuracy: 77.50%
[Epoch 9] Loss: 11.6780, Accuracy: 90.00%
[Epoch 10] Loss: 7.8002, Accuracy: 97.50%
[Epoch 11] Loss: 4.6765, Accuracy: 97.50%
[Epoch 12] Loss: 1.7991, Accuracy: 100.00%
[Epoch 13] Loss: 0.7615, Accuracy: 100.00%
[Epoch 14] Loss: 0.4439, Accuracy: 100.00%
[Epoch 15] Loss: 0.2929, Accuracy: 100.00%
[Epoch 16] Loss: 0.1856, Accuracy: 100.00%
[Epoch 17] Loss: 0.1259, Accuracy: 100.00%
[Epoch 18] Loss: 0.0730, Accuracy: 100.00%
[Epoch 19] Loss: 0.0424, Accuracy: 100.00%
[Epoch 20] Loss: 0.0370, Accuracy: 100.00%
✅ Modelo entrenado y guardado.
