In [8]:
import os
import random
import shutil

def select_and_rename_image_files(path, num_images, label):
    # List all files in the directory
    all_files = os.listdir(path)

    # Filter only image files (assuming they have extensions like .jpg, .jpeg, .png, .bmp, .gif)
    image_files = [f for f in all_files if f.endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]

    # Randomly select the specified number of images
    selected_images = random.sample(image_files, num_images)

    # Define the path to the new directory
    selected_path = os.path.join(path, "selected_images")

    # Create the directory if it doesn't exist, or clear it if it does
    if os.path.exists(selected_path):
        shutil.rmtree(selected_path)
    os.makedirs(selected_path, exist_ok=True)

    # Copy and rename the selected images to the new directory
    for i, image in enumerate(selected_images, start=1):
        new_filename = f"{i}_{label}.jpg"
        shutil.copy(os.path.join(path, image), os.path.join(selected_path, new_filename))

    print(f"Selected {len(selected_images)} images, renamed them, and copied them to {selected_path}")

# Apply the function to each dataset
datasets = [("dataset/cat", "cat"), ("dataset/dog", "dog"), ("dataset/swan", "swan")]
for dataset_path, label in datasets:
    select_and_rename_image_files(dataset_path, 40, label)


Selected 40 images, renamed them, and copied them to dataset/cat/selected_images
Selected 40 images, renamed them, and copied them to dataset/dog/selected_images
Selected 40 images, renamed them, and copied them to dataset/swan/selected_images


In [9]:
import os
import random
import shutil
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import pandas as pd

def generate_captions_for_dataset(dataset_name, base_path="dataset", num_images=40):
    dataset_path = os.path.join(base_path, dataset_name)
    captions_path = os.path.join(base_path, f"{dataset_name}_captions")
    os.makedirs(captions_path, exist_ok=True)

    # List all files in the directory
    all_files = os.listdir(dataset_path)

    # Filter only image files (assuming they have extensions like .jpg, .jpeg, .png, .bmp, .gif)
    image_files = [f for f in all_files if f.endswith(('.jpg', '.jpeg', '.png', '.bmp', '.gif'))]

    # Randomly select num_images images
    selected_images = random.sample(image_files, num_images)

    # Load the processor and model
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")


    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

    captions = []

    for image_file in selected_images:
        # Load and preprocess the image
        image_path = os.path.join(dataset_path, image_file)
        raw_image = Image.open(image_path).convert("RGB")  # Ensure the image is in RGB mode

        # Generate caption with minimum and maximum length settings
        inputs = processor(raw_image, return_tensors="pt")

        generated_ids = model.generate(
            **inputs,
            min_length=25,  # Set a minimum length to encourage longer captions
            max_length=100,  # Set a maximum length to avoid excessively long captions
            num_beams=5,  # Use beam search to improve the quality of the generated caption
            no_repeat_ngram_size=2,  # Avoid repeating n-grams of the specified size
            early_stopping=True  # Stop early when the end token is generated
        )

        caption = processor.decode(generated_ids[0], skip_special_tokens=True)

        # # Ensure the caption has exactly 20 words, adjusting if necessary
        # words = caption.split()
        # if len(words) > 20:
        #     caption = ' '.join(words[:20])
        # elif len(words) < 20:
        #     # Optionally, pad with additional words or symbols if less than 20
        #     caption = ' '.join(words + ['<pad>'] * (20 - len(words)))

        # Store the caption with its corresponding image file name
        captions.append({"image": image_file, "caption": caption})

    # Save captions to a CSV file
    captions_df = pd.DataFrame(captions)
    captions_df.to_csv(os.path.join(captions_path, "captions.csv"), index=False)

    print(f"Generated captions for {dataset_name} images and saved to CSV.")

In [10]:
datasets = ["dog/selected_images", "cat/selected_images", "swan/selected_images"]
for dataset in datasets:
    generate_captions_for_dataset(dataset)

Generated captions for dog/selected_images images and saved to CSV.
Generated captions for cat/selected_images images and saved to CSV.
Generated captions for swan/selected_images images and saved to CSV.


In [None]:
import torch
from diffusers import (
    StableDiffusionXLPipeline, 
    KDPM2AncestralDiscreteScheduler,
    AutoencoderKL
)

# Load VAE component
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", 
    torch_dtype=torch.float16
)

# Configure the pipeline
pipe = StableDiffusionXLPipeline.from_pretrained(
    "Corcelio/mobius", 
    vae=vae,
    torch_dtype=torch.float16
)
pipe.scheduler = KDPM2AncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to('cuda')

# Define prompts and generate image
prompt = "mystery"
negative_prompt = ""

image = pipe(
    prompt, 
    negative_prompt=negative_prompt, 
    width=1024,
    height=1024,
    guidance_scale=7,
    num_inference_steps=50,
    clip_skip=3
).images[0]


image.save("generated_image.png")


config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/671 [00:00<?, ?B/s]

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

text_encoder_2/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/705 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/588 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.78G [00:00<?, ?B/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/856 [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/10.3G [00:00<?, ?B/s]

In [None]:
import torch
from diffusers import (
    StableDiffusionXLPipeline, 
    KDPM2AncestralDiscreteScheduler,
    AutoencoderKL
)

# Load VAE component
vae = AutoencoderKL.from_pretrained(
    "madebyollin/sdxl-vae-fp16-fix", 
    torch_dtype=torch.float16
)

# Configure the pipeline
pipe = StableDiffusionXLPipeline.from_pretrained(
    "Corcelio/mobius", 
    vae=vae,
    torch_dtype=torch.float16
)
pipe.scheduler = KDPM2AncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to('cuda')

# Define prompts and generate image
prompt = "mystery"
negative_prompt = ""

image = pipe(
    prompt, 
    negative_prompt=negative_prompt, 
    width=1024,
    height=1024,
    guidance_scale=7,
    num_inference_steps=50,
    clip_skip=3
).images[0]


image.save("generated_image.png")


config.json:   0%|          | 0.00/631 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/671 [00:00<?, ?B/s]

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

text_encoder_2/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/558 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/705 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/560 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/588 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/462 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.78G [00:00<?, ?B/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/856 [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.77k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/10.3G [00:00<?, ?B/s]

In [None]:
from diffusers import DiffusionPipeline

pipeline = DiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")


In [None]:
from diffusers import StableDiffusionPipeline
import torch

model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
pipe = pipe.to("cuda")

prompt = "a cat with blue eyes laying on a sofa"
image = pipe(prompt).images[0]  
    
image.save("astronaut_rides_horse.png")