#Setting up

In [None]:
!pip install diffusers transformers accelerate

In [None]:
!accelerate config default

In [None]:
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel
from diffusers import UniPCMultistepScheduler
from diffusers import StableDiffusionControlNetPipeline
from diffusers.utils import load_image
import PIL
import torch
import io
from google.colab import files

import cv2
import numpy as np
from PIL import Image, ImageOps

import requests

# Loading Model

In [None]:
#@title **Hugging Face model**

model_repository = 'GreeneryScenery/SheepsControlV5' #@param {type:"string"}
stable_diffusion_repository = 'stabilityai/stable-diffusion-2-1-base' #@param {type:"string"}

controlnet = ControlNetModel.from_pretrained(model_repository, torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
    stable_diffusion_repository, controlnet=controlnet, torch_dtype=torch.float16
)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

# this command loads the individual model components on GPU on-demand.
pipe.enable_model_cpu_offload()

# Preprocess Image

In [None]:
from transformers import BlipProcessor, BlipForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cuda")

In [None]:
format_open_assistant = '<|prompter|>Please form a creative sentence describing an image from these words: [|INPUT|]. This sentence will be used for image generation in ControlNet and Stable Diffusion so be as descriptive and creative as possible, while not adding too much extra stuff.<|endoftext|><|assistant|>'

In [None]:
API_URL = "https://api-inference.huggingface.co/models/OpenAssistant/oasst-sft-1-pythia-12b"

#TODO: Replace with your own API key
headers = {"Authorization": ""}

def query_oa(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()

In [None]:
def open_assistant_generator(string):
    # Define the format_open_assistant variable with a placeholder for the prompt
    format_open_assistant = "Please provide a detailed and creative prompt for generating an image based on the following concept: |INPUT|"
    text_open_assistant = format_open_assistant.replace('|INPUT|', string)
    output = query_oa({
        "inputs": text_open_assistant,
    })

In [None]:
 #open_assistant_generator('cat with flowers')

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer_magic_prompt = AutoTokenizer.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")
model_magic_prompt = AutoModelForCausalLM.from_pretrained("Gustavosta/MagicPrompt-Stable-Diffusion")

In [None]:
def magic_prompt_generator(string):
    input_magic_prompt = tokenizer_magic_prompt.encode(string, return_tensors='pt')
    output_magic_prompt = model_magic_prompt.generate(input_magic_prompt, max_length=100, do_sample=True, top_k=50, top_p=0.95, temperature=0.5, num_return_sequences=1)
    text_magic_prompt = tokenizer_magic_prompt.decode(output_magic_prompt[0], skip_special_tokens=True)
    return text_magic_prompt

In [None]:
# print(magic_prompt_generator('cat with flowers').replace('\n', ''))

In [None]:
def canny(image):
  return Image.fromarray(cv2.Canny(np.asarray(image), 100, 200))

In [None]:
 #from PIL import Image
 #import cv2
 #import numpy as np
 # canny(Image.open('10277.png'))

In [None]:
!wget https://raw.github.com/s9xie/hed/master/examples/hed/deploy.prototxt
!wget https://vcl.ucsd.edu/hed/hed_pretrained_bsds.caffemodel

In [None]:
net = cv2.dnn.readNetFromCaffe("deploy.prototxt", "hed_pretrained_bsds.caffemodel")
pad = 14

In [None]:
def caffe_hed(image):
  img = image
  img = np.array(img)
  img = img[:, :, ::-1]
  img = cv2.copyMakeBorder(img, 50, 50, 50, 50, cv2.BORDER_CONSTANT, value = (255, 255, 255))
  (H, W) = img.shape[:2]
  blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(W, H),
      swapRB=False, crop=False)
  net.setInput(blob)
  hed = net.forward()
  hed = cv2.resize(hed[0, 0], (W, H))
  hed = (255 * hed).astype("uint8")
  blob = cv2.resize(blob[0, 0], (W, H))
  cropped_img = hed[H-256 - pad:H - pad, W-256 - pad:W - pad]
  return Image.fromarray(cropped_img)

#Generate image

In [None]:
#@title **Calling model**

#@markdown ## Fine adjustments
seed = 339 #@param {type:"slider", min:0, max:10000, step:1}
steps = 129 #@param {type:"slider", min:0, max:500, step:1}
guidance_scale = 3 #@param {type:"slider"}

#@markdown ## Prompts
caption = True #@param {type:"boolean"}
open_assistant = True #@param {type:"boolean"}
magic_prompt = True #@param {type:"boolean"}
#@markdown  <summary>Prompt is ignored if caption is true.</sumary>
prompt = "" #@param {type:"string"}
negative_prompt = "" #@param {type:"string"}
if negative_prompt == "":
  negative_prompt = None
# , trending on artstation, artstationHD, artstationHQ, patreon, 4k, 8k

generator = torch.manual_seed(seed)

#@markdown ## Upload image
upload = True #@param {type:"boolean"}
loaded = False #@param {type:"boolean"}
#@markdown <summary>Link and filename are ignored if uploading file</summary>
filename = '' #@param {type:"string"}
link = '' #@param {type:"string"}

#@markdown ## Preprocess image
edge_mode = "PIL invert" #@param ["None", "Canny", "Caffe HED", "PIL invert"]

if loaded:
  init_image = PIL.Image.open(filename).convert("RGB")
elif not upload:
  !wget $link
  init_image = PIL.Image.open(filename).convert("RGB")
else:
  uploaded = files.upload()
  init_image = PIL.Image.open(io.BytesIO(uploaded[next(iter(uploaded))])).convert("RGB")

img = init_image
width, height = img.size
aspect_ratio = width / height
if width > height:
    new_height = 512
    new_width = int(512 * aspect_ratio)
else:
    new_width = 512
    new_height = int(512 / aspect_ratio)
img = img.resize((new_width, new_height))
if new_width > new_height:
    left = (new_width - 512) / 2
    top = 0
    right = left + 512
    bottom = 512
else:
    left = 0
    top = (new_height - 512) / 2
    right = 512
    bottom = top + 512
img = img.crop((left, top, right, bottom))
init_image = img

if caption:
   #prompt = captioner(init_image, 'A sketch')[0]['generated_text']
  inputs = processor(init_image, 'a sketch of', return_tensors="pt").to("cuda")
  out = model.generate(**inputs)
  prompt = processor.decode(out[0], skip_special_tokens=True)
  prompt = prompt.replace('a sketch of ', '')
  print(prompt)
if open_assistant:
  oa_prompt = open_assistant_generator(prompt)
  print(oa_prompt)
if magic_prompt:
  prompt = magic_prompt_generator(prompt)
  print(prompt)


if edge_mode == 'None':
  pass
elif edge_mode == 'Canny':
  init_image = canny(init_image)
elif edge_mode == 'Caffe HED':
  init_image = caffe_hed(init_image)
elif edge_mode == 'PIL invert':
  init_image = ImageOps.invert(init_image)

out_image = pipe(
    prompt,
    image = init_image,
    num_inference_steps = steps,
    generator = generator,
    negative_prompt = negative_prompt,
    guidance_scale = guidance_scale
).images[0]

display(init_image)
display(out_image)

out_image.save(f"{prompt[:200]}.png")