In [1]:
import os
import json
from PIL import Image

import torch

from transformers import AutoProcessor
from transformers import Blip2ForConditionalGeneration


device = "cuda" if torch.cuda.is_available() else "cpu"


def load_model():
  processor = AutoProcessor.from_pretrained(
      "Salesforce/blip2-opt-2.7b"
  )
  model = Blip2ForConditionalGeneration.from_pretrained(
      "Salesforce/blip2-opt-2.7b",
      device_map="auto",
      load_in_8bit=True
  )

  return processor,model


def getcaption(file,processor,model):
  image= Image.open(file).convert('RGB')
  inputs = processor(image, return_tensors="pt").to(device, torch.float16)
  generated_ids = model.generate(**inputs, max_new_tokens=20)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
  return generated_text


processor,model=load_model()
users_dir = "Dataset"

  from .autonotebook import tqdm as notebook_tqdm
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 2/2 [01:18<00:00, 39.01s/it]


In [None]:
# Directorio principal de los usuarios
users_dir = "Dataset"

# Recorrer cada subdirectorio (usuario) en Users
for username in os.listdir(users_dir):
    user_path = os.path.join(users_dir, username)
    
    # Verificar si es un directorio y si existe el archivo user.json
    if os.path.isdir(user_path):
        user_json_path = os.path.join(user_path, 'user.json')
        
        if os.path.exists(user_json_path):
            # Cargar el archivo user.json
            with open(user_json_path, 'r', encoding='utf-8') as f:
                user_data = json.load(f)
                print(f"Username {user_data['username']}")
            
            # Procesar cada publicación del usuario
            for post in user_data["posts"]:
                image_path = os.path.join(user_path, post["image_path"])
                
                # Generar la descripción de la imagen si existe y no tiene ya un description
                if os.path.isfile(image_path) and post["image_description"] == "None":
                    try:
                        post["image_description"] = getcaption(image_path, processor, model)
                    except Exception as e:
                        print(f"Error al procesar la imagen {image_path}: {e}")
                        post["image_description"] = "Error en la generación de descripción"
            
            # Guardar de nuevo el archivo user.json con las descripciones actualizadas
            with open(user_json_path, 'w', encoding='utf-8') as f:
                json.dump(user_data, f, ensure_ascii=False, indent=4)

print("Descripciones de imágenes generadas y guardadas exitosamente en los archivos JSON.")