In [3]:
!pip install lmdb

import argparse
import os
import numpy as np
import math
import itertools
import signal
import pandas as pd
import random
from sklearn.metrics import mean_absolute_error, r2_score
import lmdb
import pickle
from io import BytesIO
from sklearn.preprocessing import StandardScaler
import torchvision.transforms as transforms
from torchvision.utils import save_image
from transformers import AutoTokenizer, AutoModel
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
from torch.autograd import Variable
from torchvision import models

import gc

import zipfile
from io import BytesIO

import torch.nn as nn
import torch.nn.functional as F
import torch

import matplotlib.pyplot as plt
from PIL import Image

from torch.amp import GradScaler, autocast
from torch.utils.data import random_split

import glob



In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using {} device'.format(device))

Using cuda device


In [6]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/datos_coches_procesados.csv')

In [7]:
import zipfile
import os

ruta_zip = "/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/imagenes_comprimidas.zip"
carpeta_destino = "dataset_extraido"

os.makedirs(carpeta_destino, exist_ok=True)

with zipfile.ZipFile(ruta_zip, 'r') as zip_ref:
    zip_ref.extractall(carpeta_destino)

print("Descompresión completada.")

Descompresión completada.


In [8]:
# Ruta a imágenes y CSV
root_dir = "dataset_extraido/imagenes_comprimidas"
csv_path = "/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/datos_coches_procesados.csv"
lmdb_path = "dataset.lmdb"

# Leer CSV
df = pd.read_csv(csv_path)
df = df.set_index('url')


# Crear base de datos LMDB
env = lmdb.open(lmdb_path, map_async=True, map_size=20*1024**3, meminit=False, writemap=True, lock=False)

with env.begin(write=True) as txn:
    for url in os.listdir(root_dir):
        folder_path = os.path.join(root_dir, url)
        if not os.path.isdir(folder_path) or url not in df.index:
            continue

        images = []

         # Cargar todas las imágenes de la subcarpeta
        for img_name in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_name)
            with open(img_path, 'rb') as f:
                img_bytes = f.read()
                images.append((img_name, img_bytes))
            break  # Guardamos el nombre y el contenido

        if not images:
            continue

        key = url.encode('utf-8')  # Solo el nombre de la carpeta como clave
        metadata = df.loc[url].to_dict()
        value = pickle.dumps({
            "image": images,    # Lista de (nombre, bytes)
            "metadata": metadata,
        })

        txn.put(key, value)

In [9]:
random.seed(42)
def split_keys(path, train, dev):
  env = lmdb.open(path, readonly=True, lock=False)
  keys = []

  with env.begin() as txn:
      cursor = txn.cursor()
      for key, _ in cursor:
          keys.append(key)

  random.shuffle(keys)

  train_split = train
  dev_split = dev

  n = len(keys)
  train_keys = keys[:int(train_split * n)]
  dev_keys = keys[int(train_split * n):int((train_split + dev_split) * n)]
  test_keys = keys[int((train_split + dev_split) * n):]
  return train_keys, dev_keys, test_keys

In [10]:
def set_scaler(y_train):
  y_train_int = []
  for y in y_train:
    y = y.split(",")[0]
    y = y.replace('.', '')
    y = int(y)
    y_train_int.append(y)
  targets = np.array(y_train_int).reshape(-1, 1)
  scaler = StandardScaler()
  scaler.fit(targets)
  return scaler

In [11]:
def get_labels_train(df, train_keys):
  keys_str = [key.decode('utf-8') for key in train_keys]
  filtered_df = df.loc[df.index.intersection(keys_str)]
  y_train = filtered_df['precio'].tolist()
  return y_train

In [12]:
class LMDBDataset(Dataset):
    def __init__(self, lmdb_path ,keys, scaler, transform=None):
        self.env = lmdb.open(lmdb_path, readonly=True, lock=False)
        self.keys = keys
        self.transform = transform or transforms.ToTensor()
        self.normalizar = False
        self.scaler = scaler
    def __len__(self):
        return len(self.keys)

    def __getitem__(self, idx):
        key = self.keys[idx]
        with self.env.begin() as txn:
            data = pickle.loads(txn.get(key))

        metadata = data["metadata"]
        try:
          año = int(float(metadata['año']))
        except:
          año = None
        try:
          km = int(float(metadata['km']))
        except:
          km = None
        try:
          precio = metadata['precio'].split(",")[0]
          precio = precio.replace('.', '')
          precio = int(precio)
        except:
          precio = -1

        text = "Modelo: " + str(metadata['marca']) +" "+str(metadata['modelo']) +" " + "año: " + str(año)+ " "+"Kilómetros: " + str(km) + " " + str(metadata['desc'])


        precio = self.normalizar_precio(precio)

        name, img_bytes = data["image"][0]
        img = Image.open(BytesIO(img_bytes)).convert("RGB")
        img = self.transform(img)

        return img, text, precio
    def normalizar_precio(self, precio):
        y = self.scaler.transform([[precio]])[0][0]
        return y

In [13]:
train_keys, dev_keys, test_keys = split_keys(lmdb_path, 0.85, 0.05)
print(len(train_keys))

6907


In [14]:
train_precios = get_labels_train(df, train_keys)
scaler = set_scaler(train_precios)

In [15]:
train_dataset = LMDBDataset(lmdb_path, train_keys, scaler)
dev_dataset = LMDBDataset(lmdb_path, dev_keys, scaler)
test_dataset = LMDBDataset(lmdb_path, test_keys, scaler)

In [16]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dev_loader = DataLoader(dev_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [17]:
from transformers import CLIPModel, CLIPProcessor

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/4.19k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/605M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/605M [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/592 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/862k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.22M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

In [18]:
for name, param in model.named_parameters():
    if 'projection' in name:  # suponiendo que el último bloque tiene '16' en el nombre
        param.requires_grad = True
    else:
        param.requires_grad = False

In [19]:
for name, param in model.named_parameters():
    estado = "entrenable" if param.requires_grad else "congelada"
    print(f"{name:50} → {estado}")

logit_scale                                        → congelada
text_model.embeddings.token_embedding.weight       → congelada
text_model.embeddings.position_embedding.weight    → congelada
text_model.encoder.layers.0.self_attn.k_proj.weight → congelada
text_model.encoder.layers.0.self_attn.k_proj.bias  → congelada
text_model.encoder.layers.0.self_attn.v_proj.weight → congelada
text_model.encoder.layers.0.self_attn.v_proj.bias  → congelada
text_model.encoder.layers.0.self_attn.q_proj.weight → congelada
text_model.encoder.layers.0.self_attn.q_proj.bias  → congelada
text_model.encoder.layers.0.self_attn.out_proj.weight → congelada
text_model.encoder.layers.0.self_attn.out_proj.bias → congelada
text_model.encoder.layers.0.layer_norm1.weight     → congelada
text_model.encoder.layers.0.layer_norm1.bias       → congelada
text_model.encoder.layers.0.mlp.fc1.weight         → congelada
text_model.encoder.layers.0.mlp.fc1.bias           → congelada
text_model.encoder.layers.0.mlp.fc2.weight      

In [29]:
class ClipModel(nn.Module):
  def __init__(self, model, processor, device, final_layer=1024):
    super(ClipModel, self).__init__()
    self.model = model
    self.processor = processor
    self.device = device
    self.model = self.model.to(self.device)
    self.fc = nn.Linear(final_layer, 1).to(self.device)
  def forward(self, image, text):
    inputs = self.processor(text=text, images=image, return_tensors="pt", padding=True, truncation=True).to(self.device)
    image_embeds = self.model.get_image_features(pixel_values=inputs["pixel_values"])
    text_embeds = self.model.get_text_features(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
    out = torch.cat((image_embeds, text_embeds), dim=1)
    out = self.fc(out)

    return out

In [21]:
class EarlyStopping:
    def __init__(self, patience=5, min_delta=0.0):
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = float('inf')
        self.counter = 0
        self.should_stop = False

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1
            if self.counter >= self.patience:
                self.should_stop = True


In [68]:
class Trainer():
  def __init__(self, model, dataset, dataloader, devloader, testloader, num_epochs, learning_rate, device, save, earlystopping):
    self.model = model
    self.dataset = dataset
    self.dataloader = dataloader
    self.num_epochs = num_epochs
    self.learning_rate = learning_rate
    self.device = device
    self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate)
    self.criterion = nn.MSELoss()
    self.save = save
    self.earlystopping = earlystopping
    self.devloader = devloader
    self.testloader = testloader
    self.save_path ='/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/clip_model-2.pth'
    self.upload_path = '/content/drive/MyDrive/Colab Notebooks/MDT/ProyectoIndividual/clip_model.pth'

  def train(self):
    for epoch in range(self.num_epochs):
      self.model.train()
      for images, text, y in self.dataloader:

        y = y.clone().detach().to(torch.float32).to(self.device)

        self.optimizer.zero_grad()

        output = self.model(images, text)
        loss = self.criterion(output.flatten(), y.flatten())
        loss.backward()
        self.optimizer.step()
        print(f"Epoch {epoch+1}/{self.num_epochs}, Loss: {loss.item()}")

      eval_loss = self.eval()

      if self.save and self.earlystopping.best_loss > eval_loss:
        self.save_model()

      print(f"Epoch {epoch+1}/{self.num_epochs}, Eval Loss: {eval_loss}")
      if epoch == 8:
        self.descongelar_capas()
      self.earlystopping(eval_loss)
      if self.earlystopping.should_stop:
        print("Early stopping triggered")
        break

  def eval(self):
    self.model.eval()
    losses = []
    with torch.no_grad():
      for images, text, y in self.devloader:
        y = y.clone().detach().to(torch.float32).to(self.device)
        output = self.model(images, text)
        loss = self.criterion(output.flatten(), y.flatten())
        losses.append(loss.item())
    return sum(losses)/len(losses)

  def test(self):
    self.model.eval()
    targets = []
    preds = []
    with torch.no_grad():
      for images, text, y in self.testloader:
        y = y.clone().detach().to(torch.float32).to(self.device)
        output = self.model(images, text)
        preds.append(output)
        targets.append(y)

    y_pred = torch.cat([torch.tensor(p) for p in preds]).to('cpu').numpy().flatten()
    y_true = torch.cat([torch.tensor(t) for t in targets]).to('cpu').numpy().flatten()

    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    print(f"MAE: {mae:.4f}")
    print(f"R²:  {r2:.4f}")

    return mae, r2

  def save_model(self, path=None):
    if path is None:
      torch.save(self.model.state_dict(), self.save_path)
    else:
      torch.save(self.model.state_dict(), path)
  def descongelar_capas(self):
    print("Descongelando capas...")
    for name, param in self.model.named_parameters():
      if 'post_layernorm' in name or 'final_layer_norm' in name:
        param.requires_grad = True
    for name, param in self.model.named_parameters():
      if '11.' in name:
        param.requires_grad = True
  def get_model(self, path = None):
    self.model.load_state_dict(torch.load(self.upload_path))

In [59]:
clipmodel = ClipModel(model, processor, device, final_layer=1024)

In [63]:
earlystopping = EarlyStopping(patience=7, min_delta=0.01)

In [69]:
trainer = Trainer(clipmodel, train_dataset, train_loader, dev_loader, test_loader, 30, 0.001, device, True, earlystopping)

In [55]:
trainer.train()

Epoch 1/30, Loss: 0.6772846579551697
Epoch 1/30, Loss: 58.96745300292969
Epoch 1/30, Loss: 3.8759677410125732
Epoch 1/30, Loss: 9.11526107788086
Epoch 1/30, Loss: 20.306079864501953
Epoch 1/30, Loss: 9.905231475830078
Epoch 1/30, Loss: 2.0624542236328125
Epoch 1/30, Loss: 4.2506208419799805
Epoch 1/30, Loss: 5.998174667358398
Epoch 1/30, Loss: 9.110426902770996
Epoch 1/30, Loss: 6.3601226806640625
Epoch 1/30, Loss: 1.5313165187835693
Epoch 1/30, Loss: 0.16361770033836365
Epoch 1/30, Loss: 2.021665573120117
Epoch 1/30, Loss: 3.2881805896759033
Epoch 1/30, Loss: 3.213326930999756
Epoch 1/30, Loss: 2.977738380432129
Epoch 1/30, Loss: 1.3677732944488525
Epoch 1/30, Loss: 4.911464214324951
Epoch 1/30, Loss: 0.854882538318634
Epoch 1/30, Loss: 1.8574965000152588
Epoch 1/30, Loss: 2.366572856903076
Epoch 1/30, Loss: 1.7631211280822754
Epoch 1/30, Loss: 1.0522825717926025
Epoch 1/30, Loss: 0.36010870337486267
Epoch 1/30, Loss: 14.9149751663208
Epoch 1/30, Loss: 0.35373392701148987
Epoch 1/30, 

In [70]:
trainer.get_model()
trainer.test()

MAE: 0.3461
R²:  0.6626


  y_pred = torch.cat([torch.tensor(p) for p in preds]).to('cpu').numpy().flatten()
  y_true = torch.cat([torch.tensor(t) for t in targets]).to('cpu').numpy().flatten()


(0.34607234597206116, 0.662649393081665)