In [1]:
!pip install lmdb

import argparse
import os
import numpy as np
import math
import itertools
import signal
import pandas as pd
import random
from sklearn.metrics import mean_absolute_error, r2_score
import lmdb
import pickle
from io import BytesIO
from sklearn.preprocessing import StandardScaler
import torchvision.transforms as transforms
from torchvision.utils import save_image
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torch.autograd import Variable
from torchvision import models

import gc

import zipfile
from io import BytesIO

import torch.nn as nn
import torch.nn.functional as F
import torch

import matplotlib.pyplot as plt
from PIL import Image

from torch.amp import GradScaler, autocast
from torch.utils.data import random_split

import glob

Collecting lmdb
  Downloading lmdb-1.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Downloading lmdb-1.6.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (297 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.8/297.8 kB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lmdb
Successfully installed lmdb-1.6.2


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [4]:
import zipfile
import os

ruta_zip = "/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/imagenes_comprimidas.zip"
carpeta_destino = "dataset_extraido"

os.makedirs(carpeta_destino, exist_ok=True)

with zipfile.ZipFile(ruta_zip, 'r') as zip_ref:
    zip_ref.extractall(carpeta_destino)

print("Descompresión completada.")

Descompresión completada.


In [5]:
# Ruta a imágenes y CSV
root_dir = "dataset_extraido/imagenes_comprimidas"
csv_path = "/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/datos_coches_procesados.csv"
lmdb_path = "dataset.lmdb"

# Leer CSV
df = pd.read_csv(csv_path)


# Crear base de datos LMDB
env = lmdb.open(lmdb_path, map_async=True, map_size=20*1024**3, meminit=False, writemap=True, lock=False)

with env.begin(write=True) as txn:
    for idx, row in df.iterrows():
        url = row['url']
        precio = row['precio']

        carpeta = os.path.join(root_dir, url)
        if not os.path.isdir(carpeta):
            continue
        imagenes = sorted(os.listdir(carpeta))
        if len(imagenes) == 0:
            continue

        imagen_path = os.path.join(carpeta, imagenes[0])
        try:
            with Image.open(imagen_path) as img:
                img = img.convert("RGB")
                buffer = BytesIO()
                img.save(buffer, format='JPEG')
                imagen_bytes = buffer.getvalue()

                key = url.encode('utf-8')
                data = {
                  "image": imagen_bytes,
                  "precio": precio
                }
                txn.put(key, pickle.dumps(data))
        except Exception as e:
            print(f"Error al procesar {imagen_path}: {e}")


In [6]:

random.seed(42)
def split_keys(path, train, dev):
  env = lmdb.open(path, readonly=True, lock=False)
  keys = []

  with env.begin() as txn:
      cursor = txn.cursor()
      for key, _ in cursor:
          keys.append(key)

  random.shuffle(keys)

  train_split = train
  dev_split = dev

  n = len(keys)
  train_keys = keys[:int(train_split * n)]
  dev_keys = keys[int(train_split * n):int((train_split + dev_split) * n)]
  test_keys = keys[int((train_split + dev_split) * n):]
  return train_keys, dev_keys, test_keys

In [7]:
def set_scaler(y_train):
  y_train_int = []
  for y in y_train:
    y = y.split(",")[0]
    y = y.replace('.', '')
    y = int(y)
    y_train_int.append(y)
  targets = np.array(y_train_int).reshape(-1, 1)
  scaler = StandardScaler()
  scaler.fit(targets)
  return scaler

In [8]:
def get_labels_train(df, train_keys):
  keys_str = [key.decode('utf-8') for key in train_keys]
  df = df.set_index('url')
  filtered_df = df.loc[df.index.intersection(keys_str)]
  y_train = filtered_df['precio'].tolist()
  return y_train

In [9]:
class LMDBDataset(Dataset):
    def __init__(self, lmdb_path ,keys, scaler, transform=None):
        self.env = lmdb.open(lmdb_path, readonly=True, lock=False)
        self.keys = keys
        self.transform = transform
        self.normalizar = False
        self.scaler = scaler
    def __len__(self):
        return len(self.keys)

    def __getitem__(self, idx):
        key = self.keys[idx]
        with self.env.begin() as txn:
            data = pickle.loads(txn.get(key))

        images = data["image"]
        precio = data["precio"]

        try:
          precio = precio.split(",")[0]
          precio = precio.replace('.', '')
          precio = int(precio)
        except:
          precio = -1

        precio = self.normalizar_precio(precio)

        img = Image.open(BytesIO(images)).convert("RGB")
        img = self.transform(img)

        return img, precio
    def normalizar_precio(self, precio):
        y = self.scaler.transform([[precio]])[0][0]
        return y

In [10]:
train_keys, dev_keys, test_keys = split_keys(lmdb_path, 0.85, 0.05)
train_precios = get_labels_train(df, train_keys)
scaler = set_scaler(train_precios)

In [11]:
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.RandomAffine(5, translate=(0.02, 0.02)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


In [12]:
dev_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


In [13]:
train_dataset = LMDBDataset(lmdb_path, train_keys, scaler, transform=train_transform)
dev_dataset = LMDBDataset(lmdb_path, dev_keys, scaler, transform=dev_transform)
test_dataset = LMDBDataset(lmdb_path, test_keys, scaler, transform=dev_transform)

In [14]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [15]:
def print_model_parameters(model):
    total = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"\nTotal: {total:,} parámetros entrenables")

In [25]:
import timm
import torch.nn as nn
model = timm.create_model('vit_base_patch16_224', pretrained=True)

# Reemplazar la cabeza (head) para regresión (salida de tamaño 1)
model.head = nn.Linear(model.head.in_features, 1)

In [26]:
for param in model.parameters():
    param.requires_grad = False  # congela todo excepto la cabeza

# Si quieres descongelar solo las últimas capas del encoder (por ejemplo, las 2 últimas)
for name, param in model.named_parameters():
    #print(name)
    if 'head' in name:
        param.requires_grad = True
    elif 'norm.weight' in name or 'norm.bias' in name:
        param.requires_grad = True


In [27]:
for name, param in model.named_parameters():
    estado = "entrenable" if param.requires_grad else "congelada"
    print(f"{name:50} → {estado}")

cls_token                                          → congelada
pos_embed                                          → congelada
patch_embed.proj.weight                            → congelada
patch_embed.proj.bias                              → congelada
blocks.0.norm1.weight                              → congelada
blocks.0.norm1.bias                                → congelada
blocks.0.attn.qkv.weight                           → congelada
blocks.0.attn.qkv.bias                             → congelada
blocks.0.attn.proj.weight                          → congelada
blocks.0.attn.proj.bias                            → congelada
blocks.0.norm2.weight                              → congelada
blocks.0.norm2.bias                                → congelada
blocks.0.mlp.fc1.weight                            → congelada
blocks.0.mlp.fc1.bias                              → congelada
blocks.0.mlp.fc2.weight                            → congelada
blocks.0.mlp.fc2.bias                              → co

In [19]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = float('inf')
        self.counter = 0
        self.should_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.should_stop = True


In [20]:
class Trainer():
  def __init__(self, model, dataset, dataloader, devloader, testloader, num_epochs, learning_rate, device, save, earlystopping):
    self.model = model
    self.dataset = dataset
    self.dataloader = dataloader
    self.num_epochs = num_epochs
    self.learning_rate = learning_rate
    self.device = device
    self.model = self.model.to(self.device)
    self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
    self.criterion = nn.MSELoss()
    self.save = save
    self.earlystopping = earlystopping
    self.devloader = devloader
    self.testloader = testloader
    self.save_path ='/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/vision_model-unfreeze_total.pth'

  def train(self):
    for epoch in range(self.num_epochs):
      self.model.train()
      for image, y in self.dataloader:

        image = image.clone().detach().to(self.device)
        y = y.clone().detach().to(torch.float32).to(self.device)

        self.optimizer.zero_grad()
        try:
          output = self.model(image)
        except:
          print(image.shape)
          print(image)
          print(y)
          print(output)
          print(output.shape)
          continue
        loss = self.criterion(output.flatten(), y.flatten())
        loss.backward()
        self.optimizer.step()
        print(f"Epoch {epoch+1}/{self.num_epochs}, Loss: {loss.item()}")

      eval_loss = self.eval()

      if self.save and self.earlystopping.best_loss > eval_loss:
        torch.save(self.model.state_dict(), self.save_path)

      print(f"Epoch {epoch+1}/{self.num_epochs}, Eval Loss: {eval_loss}")
      if epoch == 8:
        self.descongelar_capa()

      self.earlystopping(eval_loss)
      if self.earlystopping.should_stop:
        print("Early stopping triggered")
        break

  def eval(self):
    self.model.eval()
    losses = []
    with torch.no_grad():
      for image, y in self.devloader:
        image = image.clone().detach().to(self.device)
        y = y.clone().detach().to(torch.float32).to(self.device)
        output = self.model(image)
        loss = self.criterion(output.flatten(), y.flatten())
        losses.append(loss.item())
    return sum(losses)/len(losses)

  def test(self):
    self.model.eval()
    targets = []
    preds = []
    with torch.no_grad():
      for image, y in self.testloader:
        image = image.clone().detach().to(self.device)
        y = y.clone().detach().to(torch.float32).to(self.device)
        output = self.model(image)
        preds.append(output.flatten())
        targets.append(y.flatten())

    y_pred = torch.cat([torch.tensor(p) for p in preds]).to('cpu').numpy().flatten()
    y_true = torch.cat([torch.tensor(t) for t in targets]).to('cpu').numpy().flatten()

    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    print(f"MAE: {mae:.4f}")
    print(f"R²:  {r2:.4f}")

    return mae, r2

  def save(self, path=None):
    if path is None:
      torch.save(self.model.state_dict(), self.save_path)
    else:
      torch.save(self.model.state_dict(), path)
  def descongelar_capa(self):
    print("Descongelando capas...")
    for name, param in self.model.named_parameters():
      if '11' in name:
        param.requires_grad = True
      elif '10' in name:
        param.requires_grad = True

In [21]:
earlystopping = EarlyStopping(patience=7, min_delta=0.0)

In [None]:
trainer = Trainer(model, train_dataset, train_loader, dev_loader, test_loader, 50, 0.001, device, True, earlystopping)

In [None]:
trainer.train()

Epoch 1/15, Loss: 0.3896639347076416
Epoch 1/15, Loss: 17.105104446411133
Epoch 1/15, Loss: 1.122908592224121
Epoch 1/15, Loss: 0.4316956400871277
Epoch 1/15, Loss: 0.6277965307235718
Epoch 1/15, Loss: 0.40810680389404297
Epoch 1/15, Loss: 0.7153657674789429
Epoch 1/15, Loss: 1.474835753440857
Epoch 1/15, Loss: 0.754314661026001
Epoch 1/15, Loss: 0.8148517608642578
Epoch 1/15, Loss: 0.5693520307540894
Epoch 1/15, Loss: 0.8003987073898315
Epoch 1/15, Loss: 0.5666482448577881
Epoch 1/15, Loss: 0.5637763142585754
Epoch 1/15, Loss: 0.4435006082057953
Epoch 1/15, Loss: 0.30641767382621765
Epoch 1/15, Loss: 0.6635559797286987
Epoch 1/15, Loss: 0.2927430272102356
Epoch 1/15, Loss: 1.1189502477645874
Epoch 1/15, Loss: 0.5298223495483398
Epoch 1/15, Loss: 0.41045868396759033
Epoch 1/15, Loss: 0.4256512522697449
Epoch 1/15, Loss: 0.7681136131286621
Epoch 1/15, Loss: 3.224663019180298
Epoch 1/15, Loss: 14.38764476776123
Epoch 1/15, Loss: 0.6525657773017883
Epoch 1/15, Loss: 0.36691582202911377
Ep

In [28]:
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/vision_model.pth'))

<All keys matched successfully>

In [29]:
trainer = Trainer(model, train_dataset, train_loader, dev_loader, test_loader, 50, 0.001, device, True, earlystopping)

In [30]:
trainer.test()

MAE: 0.5330
R²:  0.0696


  y_pred = torch.cat([torch.tensor(p) for p in preds]).to('cpu').numpy().flatten()
  y_true = torch.cat([torch.tensor(t) for t in targets]).to('cpu').numpy().flatten()


(0.5330195426940918, 0.06957763433456421)