In [1]:
# Import necessary libraries
from pathlib import Path  # Module for working with file paths
import tqdm  # Library for displaying progress bars
import torch  #girst PyTorch library for deep learning
import pandas as pd  # Pandas library for data manipulation and analysis
import numpy as np  # NumPy library for numerical operations
from diffusers import StableDiffusionPipeline  # Stable Diffusion model for image generation
from transformers import pipeline, set_seed  # Hugging Face Transformers library for natural language processing
import matplotlib.pyplot as plt  # Matplotlib library for data visualization
import cv2  # OpenCV library for computer vision

In [2]:
# Configuration class to store various settings
class CFG:
    # Device for computation (CUDA for GPU if available)
    device = "cuda"
    
    # Seed for reproducibility
    seed = 42
    
    # PyTorch random number generator initialization
    generator = torch.Generator(device).manual_seed(seed)
    
    # Parameters for image generation
    image_gen_steps = 35  # Number of steps for image generation
    image_gen_model_id = "stabilityai/stable-diffusion-2"  # Model ID for Stable Diffusion
    image_gen_size = (400, 400)  # Size of the generated image
    image_gen_guidance_scale = 9  # Guidance scale for the image generation
    
    # Model ID for prompt-based text generation
    prompt_gen_model_id = "gpt2"
    
    # Dataset size for prompt-based text generation
    prompt_dataset_size = 6
    
    # Maximum length of the generated prompt
    prompt_max_length = 12

In [3]:
# Load the Stable Diffusion model for image generation
image_gen_model = StableDiffusionPipeline.from_pretrained(
    CFG.image_gen_model_id,
    torch_dtype=torch.float16,
    revision="fp16",
    use_auth_token='Hugging_face_api_key',
    guidance_scale=CFG.image_gen_guidance_scale
)

# Move the model to the specified device (CPU or GPU)
image_gen_model = image_gen_model.to(CFG.device)

unet\diffusion_pytorch_model.safetensors not found
Keyword arguments {'guidance_scale': 9} are not expected by StableDiffusionPipeline and will be ignored.


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

In [4]:
# Function to generate an image given a prompt using the loaded model
def generate_image(prompt, model):
    # Generate an image based on the provided prompt
    image = model(
        prompt,
        num_inference_steps=CFG.image_gen_steps,
        generator=CFG.generator,
        guidance_scale=CFG.image_gen_guidance_scale
    ).images[0]
    
    # Resize the generated image to the specified size
    image = image.resize(CFG.image_gen_size)
    
    # Return the generated image
    return image

In [5]:
# Example usage: generate an image with the prompt "a boy sit on chair"
generate_image("a boy sit on chair ", image_gen_model)


  0%|          | 0/35 [00:00<?, ?it/s]