In [1]:
import os
import json
import torch
import gc
import logging
import time
from PIL import Image
from torchvision.transforms import ToPILImage
from transformers import pipeline
from diffusers import DiffusionPipeline
from IPython.display import display

import bittensor as bt
from bitmind.constants import (
    PROMPT_GENERATOR_NAMES,
    PROMPT_GENERATOR_ARGS,
    DIFFUSER_NAMES,
    DIFFUSER_ARGS
)

2024-07-03 21:46:45.186576: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-03 21:46:45.217092: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  deprecate("Transformer2DModelOutput", "1.0.0", deprecation_message)


2024-07-03 21:46:47.799 |       INFO       |  - Loading image generation model (stabilityai/stable-diffusion-xl-base-1.0)... - 


In [2]:
# Configure logging
logging.basicConfig(level=logging.INFO)
# Set device for model operations
device = "cuda" if torch.cuda.is_available() else "cpu"
if device == "cpu":
    raise RuntimeError("This script requires a GPU because it uses torch.float16.")  # Added check for GPU availability

In [3]:
def list_datasets(base_dir):
    """List all subdirectories in the base directory."""
    return [d for d in os.listdir(base_dir) if os.path.isdir(os.path.join(base_dir, d))]

def load_annotations(base_dir, dataset):
    """Load annotations from JSON files within a specified directory."""
    annotations = []
    path = os.path.join(base_dir, dataset)
    for filename in os.listdir(path):
        if filename.endswith(".json"):
            with open(os.path.join(path, filename), 'r') as file:
                data = json.load(file)
                annotations.append(data)
    return annotations

def load_diffuser(model_name):
    """Load a diffusion model by name, configured according to provided arguments."""
    bt.logging.info(f"Loading image generation model ({model_name})...")
    model = DiffusionPipeline.from_pretrained(
        model_name, torch_dtype=torch.float32 if device == "cpu" else torch.float16, **DIFFUSER_ARGS[model_name]
    )
    model.to(device)
    return model

def generate_images(annotations, diffuser, save_dir, num_images=1):
    """Generate images based on prompts derived from annotations and save them to a directory."""
    os.makedirs(save_dir, exist_ok=True)
    generated_images = []
    start_time = time.time()

    for i in range(min(num_images, len(annotations))):
        start_loop = time.time()
        annotation = annotations[i]
        prompt = annotation['description']
        logging.info(f"Annotation {i}: {json.dumps(annotation, indent=2)}")
        
        # Generate image tensor
        generated_image = diffuser(prompt=prompt).images[0]
        logging.info(f"Type of generated image: {type(generated_image)}")

        # Check if conversion is necessary
        if isinstance(generated_image, torch.Tensor):
            img = ToPILImage()(generated_image)
        else:
            img = generated_image  # No conversion needed
        
        img_filename = f"{save_dir}/{prompt[:50].replace(' ', '_')}-{i}.png"
        img.save(img_filename)
        generated_images.append(img_filename)
        loop_time = time.time() - start_loop
        logging.info(f"Image saved to {img_filename}")
        
    total_time = time.time() - start_time
    logging.info(f"Total processing time: {total_time:.2f} seconds")
    return generated_images

In [4]:
# Main testing loop
def test_diffusers_on_datasets(annotations_dir, output_dir):
    datasets = list_datasets(annotations_dir)
    for dataset in datasets:
        annotations = load_annotations(annotations_dir, dataset)
        diffuser = None
        for diffuser_name in DIFFUSER_NAMES:
            if diffuser is not None:
                logging.info("Deleting previous diffuser, freeing memory")
                diffuser.to('cpu')
                del diffuser
                gc.collect()
                torch.cuda.empty_cache()
            
            logging.info(f"Testing {diffuser_name} on annotation dataset {dataset}...")
            diffuser = load_diffuser(diffuser_name)
            try:
                save_dir = os.path.join(output_dir, dataset)
                generated_images = generate_images(annotations, diffuser, save_dir, num_images=5)
                logging.info("Images generated and saved successfully.\n")
            except Exception as e:
                logging.error(f"Failed to generate image with {diffuser_name}: {str(e)}\n")

In [5]:
ANNOTATIONS_DIR = "annotations/"
OUTPUT_DIR = "synthetics_from_annotations/"

In [6]:
test_diffusers_on_datasets(ANNOTATIONS_DIR, OUTPUT_DIR)

INFO:root:Testing stabilityai/stable-diffusion-xl-base-1.0 on annotation dataset dalle-mini_open-images...
INFO:bittensor: - Loading image generation model (stabilityai/stable-diffusion-xl-base-1.0)... - 


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

INFO:root:Annotation 0: {
  "description": "A picture of a group of people playing in a yard.The setting is a grassy area with a red ball and a few people.The background is a green lawn.The people are wearing blue shirts and black pants.."
}


  0%|          | 0/50 [00:00<?, ?it/s]

INFO:root:Type of generated image: <class 'PIL.Image.Image'>
INFO:root:Image saved to synthetics_from_annotations/dalle-mini_open-images/A_picture_of_a_group_of_people_playing_in_a_yard.T-0.png
INFO:root:Annotation 1: {
  "description": "A picture of a red chinese flag flying in the wind.The setting is a cloudy sky..The background is a white sky.."
}


  0%|          | 0/50 [00:00<?, ?it/s]

INFO:root:Type of generated image: <class 'PIL.Image.Image'>
INFO:root:Image saved to synthetics_from_annotations/dalle-mini_open-images/A_picture_of_a_red_chinese_flag_flying_in_the_wind-1.png
INFO:root:Annotation 2: {
  "description": "A picture of three men sitting on a couch in a room.The setting is a room with a bed and a couch.The background is a wall.The men are wearing white robes and have beards.."
}


  0%|          | 0/50 [00:00<?, ?it/s]

INFO:root:Type of generated image: <class 'PIL.Image.Image'>
INFO:root:Image saved to synthetics_from_annotations/dalle-mini_open-images/A_picture_of_three_men_sitting_on_a_couch_in_a_roo-2.png
INFO:root:Annotation 3: {
  "description": "A picture of three chickens walking on a grassy hillside.The setting is a forest.The background is a green field.The chickens are brown and black.The chickens are walking on the grass.The."
}


  0%|          | 0/50 [00:00<?, ?it/s]

INFO:root:Type of generated image: <class 'PIL.Image.Image'>
INFO:root:Image saved to synthetics_from_annotations/dalle-mini_open-images/A_picture_of_three_chickens_walking_on_a_grassy_hi-3.png
INFO:root:Annotation 4: {
  "description": "A picture of a pile of junk outside of a building.The setting is a dilapidated building.The background is a green plant.."
}


  0%|          | 0/50 [00:00<?, ?it/s]

In [None]:
#### To-do

-Improve latency for image generation, implementing multiprocessing, or ensure efficient gpu usage

-Set up evaluation for real image counterpart and synthetic generated from annotation of said real image