In [1]:
!pip install torch torchvision transformers diffusers pillow



In [3]:
import os
import torch
from diffusers import StableDiffusionPipeline
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image

# Modelos de Hugging Face
caption_model = "Salesforce/blip-image-captioning-base"
image_gen_model = "stabilityai/stable-diffusion-2"

# Cargar el modelo de descripción de imágenes
processor = BlipProcessor.from_pretrained(caption_model)
model = BlipForConditionalGeneration.from_pretrained(caption_model)

# Función para generar descripciones de imágenes
def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(image, return_tensors="pt")
    with torch.no_grad():
        caption = model.generate(**inputs)
    return processor.batch_decode(caption, skip_special_tokens=True)[0]

# Directorio con imágenes
image_dir = "./imagenes"
output_file = "descripciones.txt"

# Generar descripciones para todas las imágenes en el directorio
descriptions = {}
for filename in os.listdir(image_dir):
    if filename.endswith((".jpg", ".png", ".jpeg")):
        image_path = os.path.join(image_dir, filename)
        caption = generate_caption(image_path)
        descriptions[filename] = caption
        print(f"Imagen: {filename} -> Descripción: {caption}")

# Guardar las descripciones en un archivo
with open(output_file, "w") as f:
    for img, desc in descriptions.items():
        f.write(f"{img}: {desc}\n")

# Elegir una imagen y regenerarla
selected_image = list(descriptions.keys())[0]
prompt = descriptions[selected_image]

print(f"\nGenerando nueva imagen con prompt: '{prompt}'")

# Cargar modelo de generación de imágenes
pipe = StableDiffusionPipeline.from_pretrained(image_gen_model, torch_dtype=torch.float16)
pipe.to("cuda" if torch.cuda.is_available() else "cpu")

# Generar imagen
generated_image = pipe(prompt).images[0]

# Guardar la imagen generada
generated_image_path = f"generada_{selected_image}"
generated_image.save(generated_image_path)

print(f"Imagen generada guardada en: {generated_image_path}")

Imagen: knots.jpg -> Descripción: a bunch of colorful ropes on a white surface
Imagen: dry_blue_paint.jpg -> Descripción: a blue and brown wall with peeling paint
Imagen: salt_worker.jpg -> Descripción: a man working in the terraces of the terraces
Imagen: car_tunnel.jpg -> Descripción: a long tunnel with a car driving through it
Imagen: two_motorbikers.jpg -> Descripción: a man riding on the back of a motorcycle
Imagen: kid_in_tree.jpg -> Descripción: a trail through the forest
Imagen: green_landscape.jpg -> Descripción: a beautiful view of the valley and mountains in the yorkshire countryside

Generando nueva imagen con prompt: 'a bunch of colorful ropes on a white surface'


Fetching 13 files: 100%|██████████| 13/13 [01:51<00:00,  8.58s/it]
Loading pipeline components...: 100%|██████████| 6/6 [00:00<00:00,  6.25it/s]
100%|██████████| 50/50 [00:07<00:00,  6.35it/s]


Imagen generada guardada en: generada_knots.jpg
