## Installing Dependencies

In [None]:
%pip install -q git+https://github.com/huggingface/transformers.git
%pip install torch

!wget -q https://github.com/huggingface/diffusers/raw/main/examples/dreambooth/train_dreambooth.py
%pip install -qq git+https://github.com/huggingface/diffusers.git &> /dev/null
%pip install -q -U --pre triton &> /dev/null
%pip install -q accelerate transformers ftfy xformers bitsandbytes==0.35.0 &> /dev/null

%pip install -q diffusers==0.14.0 transformers xformers git+https://github.com/huggingface/accelerate.git
%pip install -q opencv-contrib-python
%pip install -q controlnet_aux==0.0.1

## Import Packages

In [None]:
import cv2
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
import numpy as np
from PIL import Image
from torchvision import transforms
from google.colab.patches import cv2_imshow
from transformers import Swin2SRForImageSuperResolution
from transformers import Swin2SRImageProcessor 
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
from controlnet_aux import HEDdetector

## Dreambooth Image Generation

## ControlNet Finetuning

In [None]:
class CN_pipeline:
    def __init__(self):
        controlnet = ControlNetModel.from_pretrained("fusing/stable-diffusion-v1-5-controlnet-hed", torch_dtype=torch.float16)
        pipe = StableDiffusionControlNetPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16
        )
        pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
        pipe.enable_model_cpu_offload()
        pipe.enable_xformers_memory_efficient_attention()
        self.pipe = pipe
    
    def process(self, image, prompt = "", canny_config = 
        {"num_inference_steps":20, "num_images_per_prompt":4}):
        if(not ('num_inference_steps' in canny_config and 'num_images_per_prompt' in canny_config)):
            print("canny_config missing num_inference_steps or num_images_per_prompt")
            print("Using default canny_config")
            canny_config = {"num_inference_steps":20, "num_images_per_prompt":4}
        hed = HEDdetector.from_pretrained('lllyasviel/ControlNet')
        hed_image = hed(image)
        images = self.pipe(prompt,
             hed_image,
             num_inference_steps=int(canny_config["num_inference_steps"]),
             num_images_per_prompt=int(canny_config["num_images_per_prompt"]))[0]
        return images

In [None]:

# calling CN 
testp = CN_pipeline()

extern_img2 = download_image("https://www.bing.com/images/blob?bcid=qOKuNG1lPVkFfA", False).resize((512,512))

# can ignore canny config, it is built in to the class.
canny_config = {"num_inference_steps":20, "num_images_per_prompt":4}
prompt = "Car on cobble street with a sunny sky"
images = testp.process(extern_img2,prompt=prompt,canny_config=canny_config)

## Inpainting of Focus Object

## Silhouette

In [None]:
# Load pretrained model
model = torch.hub.load('pytorch/vision:v0.6.0', 'deeplabv3_resnet101', pretrained=True)
# Segment people only for the purpose of human silhouette extraction
people_class = 15

# Evaluate model
model.eval()
print ("Model has been loaded.")

blur = torch.FloatTensor([[[[1.0, 2.0, 1.0],[2.0, 4.0, 2.0],[1.0, 2.0, 1.0]]]]) / 16.0

# Use GPU if supported, for better performance
if torch.cuda.is_available():
	model.to('cuda')
	blur = blur.to('cuda')
	
# Apply preprocessing (normalization)
preprocess = transforms.Compose([
	transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to create segmentation mask
def makeSegMask(img):
    # Scale input frame
	frame_data = torch.FloatTensor( img ) / 255.0

	input_tensor = preprocess(frame_data.permute(2, 0, 1))
    
    # Create mini-batch to be used by the model
	input_batch = input_tensor.unsqueeze(0)

    # Use GPU if supported, for better performance
	if torch.cuda.is_available():
		input_batch = input_batch.to('cuda')

	with torch.no_grad():
		output = model(input_batch)['out'][0]

	segmentation = output.argmax(0)

	bgOut = output[0:1][:][:]
	a = (1.0 - F.relu(torch.tanh(bgOut * 0.30 - 1.0))).pow(0.5) * 2.0

	people = segmentation.eq( torch.ones_like(segmentation).long().fill_(people_class) ).float()

	people.unsqueeze_(0).unsqueeze_(0)
	
	for i in range(3):
		people = F.conv2d(people, blur, stride=1, padding=1)

	# Activation function to combine masks - F.hardtanh(a * b)
	combined_mask = F.relu(F.hardtanh(a * (people.squeeze().pow(1.5)) ))
	combined_mask = combined_mask.expand(1, 3, -1, -1)

	res = (combined_mask * 255.0).cpu().squeeze().byte().permute(1, 2, 0).numpy()

	return res

In [None]:
img = cv2.imread('/content/sddefault.png')
# Apply background subtraction to extract foreground (silhouette)
mask = makeSegMask(img)

# Apply thresholding to convert mask to binary map
ret,thresh = cv2.threshold(mask,127,255,cv2.THRESH_BINARY)

# Show extracted silhouette only, by multiplying mask and input frame
final = cv2.bitwise_and(thresh, img)

# Show current frame
cv2_imshow(mask)


## Upscaling

In [3]:
model = Swin2SRForImageSuperResolution.from_pretrained("caidas/swin2SR-classical-sr-x2-64")
image = Image.open('/content/sddefault.png')
image

FileNotFoundError: [Errno 2] No such file or directory: '/content/sddefault.png'

In [None]:
processor = Swin2SRImageProcessor()
pixel_values = processor(image, return_tensors="pt").pixel_values
with torch.no_grad():
  outputs = model(pixel_values)

In [None]:
output = outputs.reconstruction.data.squeeze().float().cpu().clamp_(0, 1).numpy()
output = np.moveaxis(output, source=0, destination=-1)
output = (output * 255.0).round().astype(np.uint8)  # float32 to uint8
Image.fromarray(output)