## ControlNet
ControlNet was introduced in to add conditions to text-to-image diffusion models (https://arxiv.org/abs/2302.05543) by Lvmin Zhang and Maneesh Agrawala. There are seven models in the controlNet pipeline our focus of study is on the canny edge detection model and how accurate the model image generation is given the canny image of an dataset and its prompt. 

Pictorially, training a ControlNet looks like so:
<p align="center">
    <img src="https://github.com/lllyasviel/ControlNet/raw/main/github_page/sd.png" alt="controlnet-structure"><br>
    <em>The diagram is taken from <a href=https://github.com/lllyasviel/ControlNet/blob/main/github_page/sd.png>here</a>.</em>
</p>

In [None]:
!pip install -q diffusers==0.14.0 transformers xformers git+https://github.com/huggingface/accelerate.git
!pip install -q opencv-contrib-python
!pip install -q controlnet_aux

In [None]:
import cv2
import numpy as np
from PIL import Image
import os

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Preparing the dataset
- Converting the horse dataset to canny images for using them as features to the ControlNet Model
- With aim to analyze the difference in image generation when the dataset is resized and not resized

In [None]:
target_size = (500, 500)

input_dir = '/content/drive/MyDrive/input_horses'
output_dir = '/content/drive/MyDrive/resized_horses'

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

image_org = []
image_resized = []

for filename in os.listdir(output_dir):
  img_path = os.path.join(input_dir, filename)
  img = Image.open(img_path)
  image_org.append(img)

# Loop through each image in the input directory
for filename in os.listdir(input_dir):
  # Load image
  img_path = os.path.join(input_dir, filename)
  img = Image.open(img_path)

  # Resize image
  resized_img = img.resize(target_size)
  image_resized.append(resized_img)

  # Save resized image to the output directory
  output_path = os.path.join(output_dir, filename)
  resized_img.save(output_path)
  print(f"processing.....{filename}")

In [None]:
image_og = np.array(image_org)
image_res = np.array(image_resized)

img = [image_og, image_res]

low_threshold = 100
high_threshold = 200

canny_images_org = []
canny_images_res = []

for i, imgdata in enumerate(img):
  for image in imgdata:

      # Convert PIL Image to NumPy array
      image_array = np.array(image)

      # Apply Canny edge detection
      edges = cv2.Canny(image_array, low_threshold, high_threshold)

      # Expand dimensions to make it a 3-channel image
      edges = edges[:, :, None]
      edges = np.concatenate([edges, edges, edges], axis=2)

      canny_image = Image.fromarray(edges) # Convert back to PIL Image

      if i == 0:
        canny_images_org.append(canny_image)
      else:
        canny_images_res.append(canny_image)

In [None]:
canny_images_org[0]

In [None]:
canny_images_res[0]

# Building ControlNet Model Pipeline for Analysis

1. **Load Pre-trained Control Net Model:**
   - We load a pre-trained control net model from the "lllyasviel/sd-controlnet-canny" repository using the `ControlNetModel` class.

2. **Create Stable Diffusion Pipeline:**
   - We create a pipeline using the Stable Diffusion model with the control net. The pipeline is configured with the "runwayml/stable-diffusion-v1-5" model.

3. **Configure Scheduler:**
   - We configure the scheduler for the pipeline using a `UniPCMultistepScheduler` from the `diffusers` library.



In [None]:
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
import torch

# Load the pre-trained control net model
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny", torch_dtype=torch.float16)

# Create a pipeline using the Stable Diffusion model with the control net
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
)

In [None]:
# Configure the scheduler for the pipeline
from diffusers import UniPCMultistepScheduler
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# Enable model CPU offload for better performance
pipe.enable_model_cpu_offload()

# Enable memory-efficient attention for efficient memory usage
pipe.enable_xformers_memory_efficient_attention()

# Generating the images based on the canny inages generated

In [None]:
output_folder_org = '/content/drive/MyDrive/generated_horses_org'
output_folder_res = '/content/drive/MyDrive/generated_horses_res'
os.makedirs(output_folder_org, exist_ok=True)
os.makedirs(output_folder_res, exist_ok=True)

generators = [torch.Generator(device="cpu").manual_seed(3)]
canny_images = [canny_images_org, canny_images_res]

output_images_org = []
output_images_res = []

i = 0

for i, canny in enumerate(canny_images):
  for j, image in enumerate(canny):
    print(f'Generating image {j} for {i}: image...')
    output = pipe(
        image=image,  # Corrected the position of the image argument
        prompt="horse",
        negative_prompt="monochrome, lowres, bad anatomy, worst quality, low quality",
        generator=generators,
        num_inference_steps=20
    )

    if i == 0:
        output_images_org.append(output.images[0])
    else:
        output_images_res.append(output.images[0])


In [None]:
resized_input = []
for i, image in enumerate(output_images_org):
  resized_img = image.resize(target_size)
  image_path = os.path.join(output_folder_org, f'image_{i}.png')
  resized_input.append(resized_img)
  resized_img.save(image_path)

for i, image in enumerate(output_images_res):
    image_path = os.path.join(output_folder_res, f'image_{i}.png')
    image.save(image_path)

# Measuring the accuracy of the generated model using Learned Perceptual learned image patch similarity (LPIPS)

In [None]:
# Defining the filepath
generated_horses_org_path = "/content/drive/MyDrive/generated_horses_org"
generated_horses_res_path = "/content/drive/MyDrive/generated_horses_res"
resized_horses_path = "/content/drive/MyDrive/resized_horses"

images = [generated_horses_org_path, generated_horses_res_path, resized_horses_path]

# Importing all the resized and generated values again to reduce the runtime of testing the model
generated_horses_org = []
generated_horses_res = []
resized_horses = []

for i, dir in enumerate(images):
    for filename in os.listdir(dir):
        img_path = os.path.join(dir, filename)
        img = Image.open(img_path)

        # Append images to the respective arrays based on the directory
        if i == 0:
            generated_horses_org.append(img)
        elif i == 1:
            generated_horses_res.append(img)
        elif i == 2:
            resized_horses.append(img)


In [None]:
!pip install torchmetrics

In [None]:
import torch
_ = torch.manual_seed(123)
from torchvision import transforms
from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
lpips = LearnedPerceptualImagePatchSimilarity(net_type='squeeze')

In [None]:
# Transformation
target_size = (500, 500)
transform = transforms.Compose([transforms.Resize(target_size), transforms.ToTensor()])

# Applying transformations to the images
generated_horses_org_tensor = [transform(image) for image in generated_horses_org]
generated_horses_res_tensor = [transform(image) for image in generated_horses_res]
resized_horses_tensor = [transform(image) for image in resized_horses]

In [None]:
# Function to calculate LPIPS scores
def calculate_lpips_scores(images1, images2):
    lpips_scores = []
    for i in range(50):
        lpips_scores.append(lpips(images1[i].unsqueeze(0), images2[i].unsqueeze(0)))
        print(f"LPIPS Value of {i}: {lpips_scores[i]}")
    return lpips_scores

# Create the LPIPS object with normalization
lpips = LearnedPerceptualImagePatchSimilarity(net_type='squeeze', normalize=True)

lpips_scores_org = calculate_lpips_scores(generated_horses_org_tensor, resized_horses_tensor)
lpips_scores_res = calculate_lpips_scores(generated_horses_res_tensor, resized_horses_tensor)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 5))
# Plotting the LPIPS scores for org images
fig, ax = lpips.plot(lpips_scores_org)
plt.show()

plt.figure(figsize=(10, 5))
# Plotting the LPIPS scores for resized images
fig, ax = lpips.plot(lpips_scores_res)
plt.show()

In [None]:
lpips_scores_org_np = np.array([item.detach().numpy() for item in lpips_scores_org])
lpips_scores_res_np = np.array([item.detach().numpy() for item in lpips_scores_res])

# Now you can compute the mean
mean_lpips_org = np.mean(lpips_scores_org_np)
mean_lpips_res = np.mean(lpips_scores_res_np)

print("Mean LPIPS for org images:", mean_lpips_org)
print("Mean LPIPS for resized images:", mean_lpips_res)