In [1]:
# Cell 1: Imports
import argparse
from PIL import Image
import cv2
import numpy as np
from diffusers import StableDiffusionPipeline
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Cell 2: Function definitions
def segment_image(image_path):
    # Load the image
    img = cv2.imread(image_path)
    
    # Convert to HSV color space
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    # Define range of white color in HSV
    lower_white = np.array([0, 0, 200])
    upper_white = np.array([255, 55, 255])
    
    # Threshold the HSV image to get only white colors
    mask = cv2.inRange(hsv, lower_white, upper_white)
    
    # Apply morphological operations to refine the mask
    kernel = np.ones((5, 5), np.uint8)
    mask = cv2.erode(mask, kernel, iterations=1)
    mask = cv2.dilate(mask, kernel, iterations=1)
    
    # Create a new image with the segmented object
    result = cv2.bitwise_and(img, img, mask=cv2.bitwise_not(mask))
    
    return result, cv2.bitwise_not(mask)


def generate_scene(text_prompt):
    # Initialize Stable Diffusion pipeline
    model_id = "CompVis/stable-diffusion-v1-4"
    
    # Move the model to GPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(device)
    
    # Generate image
    image = pipe(text_prompt).images[0]
    
    return image

def compose_final_image(segmented_object, scene, mask):
    # Resize the scene to match the object size
    scene = scene.resize((segmented_object.width, segmented_object.height))
    
    # Paste the object onto the scene
    final_image = scene.copy()
    final_image.paste(segmented_object, (0, 0), mask=mask)
    
    return final_image

In [3]:
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Number of GPUs: {torch.cuda.device_count()}")
print(f"Current GPU: {torch.cuda.current_device()}")
print(f"GPU name: {torch.cuda.get_device_name(0)}")

CUDA available: True
Number of GPUs: 1
Current GPU: 0
GPU name: NVIDIA GeForce RTX 3060 Laptop GPU


In [5]:
# Cell 3: Main logic
image_path = "./example1.jpg"
text_prompt = "“Product in a kitchen used in meal preparation"
output_path = "./generated.png"

# Segment the object from the input image
segmented_object, mask = segment_image(image_path)

# Generate the scene based on the text prompt
scene = generate_scene(text_prompt)

# Compose the final image
final_image = compose_final_image(Image.fromarray(segmented_object), scene, Image.fromarray(mask))

# Save the output
final_image.save(output_path)

print(f"Generated image saved to {output_path}")

Cannot initialize model with low cpu memory usage because `accelerate` was not found in the environment. Defaulting to `low_cpu_mem_usage=False`. It is strongly recommended to install `accelerate` for faster and less memory-intense model loading. You can do so with: 
```
pip install accelerate
```
.
Loading pipeline components...: 100%|██████████| 7/7 [00:11<00:00,  1.61s/it]
100%|██████████| 50/50 [00:09<00:00,  5.16it/s]


Generated image saved to ./generated.png
