# Stable Diffusion Model

In [1]:
!pip install torch torchvision torchaudio
!pip install diffusers transformers

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [6]:
import torch
from PIL import Image
from diffusers import StableDiffusionImg2ImgPipeline
import cv2

# Load the model
pipe = StableDiffusionImg2ImgPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
pipe.to("cuda")  # Move the model to GPU for faster inference

# Load the image using OpenCV
img_path = '/content/image.jpg'
img = cv2.imread(img_path)
if img is None:
    raise ValueError(f"Failed to load image from {img_path}")

# Convert from BGR to RGB
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Convert numpy array to PIL Image
init_image = Image.fromarray(img)
init_image = init_image.convert("RGB")

# Print the type of the init_image to ensure it is a PIL.Image.Image
print("Type of init_image:", type(init_image))

def denoise_image(pipe, init_image, prompt, strength=0.75, num_inference_steps=50):
    generator = torch.manual_seed(0)
    with torch.no_grad():
        image = pipe(
            prompt=prompt,
            image=init_image,  # Corrected parameter name
            strength=strength,
            num_inference_steps=num_inference_steps,
            generator=generator,
        ).images[0]
    return image

# Define a prompt (optional but recommended for guiding the denoising process)
prompt = "A highly detailed, sharp, and clear Nike product label with the number 056417070, professional studio lighting, no blur, ultra-high resolution, photorealistic, cinematic quality, and crisp typography."

# Denoise the image
denoised_image = denoise_image(pipe, init_image, prompt)

# Save the denoised image
denoised_image.save("denoised_image.png")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Type of init_image: <class 'PIL.Image.Image'>


  0%|          | 0/37 [00:00<?, ?it/s]

# Image Enhancement with Advanced Stable Diffusion

In [10]:
!pip install diffusers transformers accelerate torch Pillow



In [20]:
from diffusers import StableDiffusionImg2ImgPipeline
import torch
from PIL import Image

# Load pre-trained model (SD 1.5 or 2.1)
model_id = "runwayml/stable-diffusion-v1-5"
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    use_safetensors=True
).to("cuda")  # Use "cpu" if no GPU

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

In [21]:
input_image = Image.open("/content/image.jpg").convert("RGB")
input_image = input_image.resize((512, 512))  # Resize to SD's default resolution

In [22]:
prompt = "high quality, sharp focus, 4k resolution, professional photography"
negative_prompt = "blurry, noisy, pixelated, low resolution"  # Optional

enhanced_image = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=input_image,
    strength=0.4,  # Controls how much to alter the input (0=no change, 1=full generation)
    guidance_scale=9.0,  # Higher = stricter prompt adherence (7-15 works well)
    num_inference_steps=50,  # More steps = better quality (but slower)
).images[0]

enhanced_image.save("enhanced_image.jpg")

  0%|          | 0/20 [00:00<?, ?it/s]

# Image Enhancement with Controlled Edges

In [15]:
!pip install controlnet-aux

Collecting controlnet-aux
  Downloading controlnet_aux-0.0.9-py3-none-any.whl.metadata (6.5 kB)
Collecting timm<=0.6.7 (from controlnet-aux)
  Downloading timm-0.6.7-py3-none-any.whl.metadata (33 kB)
Downloading controlnet_aux-0.0.9-py3-none-any.whl (282 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m282.4/282.4 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading timm-0.6.7-py3-none-any.whl (509 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.0/510.0 kB[0m [31m36.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: timm, controlnet-aux
  Attempting uninstall: timm
    Found existing installation: timm 1.0.15
    Uninstalling timm-1.0.15:
      Successfully uninstalled timm-1.0.15
Successfully installed controlnet-aux-0.0.9 timm-0.6.7


In [23]:
from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
from controlnet_aux import CannyDetector

from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline
from controlnet_aux import CannyDetector

# Load ControlNet + SD pipeline
controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_canny", torch_dtype=torch.float16)
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float16
).to("cuda")

input_image = Image.open("/content/image.jpg").convert("RGB")
input_image = input_image.resize((512, 512))  # Resize to SD's default resolution

prompt = "high quality, sharp focus, 4k resolution, professional photography"
negative_prompt = "blurry, noisy, pixelated, low resolution"  # Optional

# Detect edges in input image
canny_detector = CannyDetector()
canny_image = canny_detector(input_image, low_threshold=100, high_threshold=200)

# Enhance with edge guidance
enhanced_image = pipe(
    prompt=prompt,
    image=input_image,
    control_image=canny_image,
    strength=0.5,
    guidance_scale=10,
).images[0]

enhanced_image.save("enhanced_image.jpg")

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]