## Step 1: Download the base model and sample some images from it

In [None]:
%%capture
# !pip install git+https://github.com/huggingface/diffusers.git
# !pip install accelerate
# !pip install datasets
# !pip install bitsandbytes

In [None]:
!pip install xformers
!pip install bitsandbytes

In [None]:
# %env MODEL_NAME=stabilityai/stable-diffusion-2-1
# %env MODEL_NAME=stabilityai/stable-diffusion-xl-base-1.0
# %env MODEL_NAME=SG161222/RealVisXL_V4.0
%env MODEL_NAME=runwayml/stable-diffusion-v1-5

In [None]:
def plot_images(images):
    from matplotlib import pyplot as plt
    plt.figure()
    f, axarr = plt.subplots(1, len(images), figsize=(20,10))
    for ax, img in zip(axarr.flatten(), images):
        ax.imshow(img)
        ax.axis('off')
    plt.show()

In [None]:
import os
import torch

from diffusers import StableDiffusionPipeline
from diffusers import DiffusionPipeline

In [None]:
# pipe = StableDiffusionPipeline.from_pretrained(os.getenv('MODEL_NAME'), torch_dtype=torch.float16)
pipe = DiffusionPipeline.from_pretrained(
    os.getenv('MODEL_NAME'), torch_dtype=torch.float16
)


In [None]:
pipe = pipe.to("cuda")

In [None]:
# prompt = "isometric view of a skyscraper in the style of a city building game"
prompt = "Name: 'Luck Of The Irish' Frozen Lemonade; Recipie: 3/4 oz. Monin Pistachio Syrup\n3/4 oz. Monin Ginger Syrup\n2 oz. fresh lemon sour mix\n\nFill serving glass full of ice.\nPour ingredients into blender cup in order listed.\nAdd ice from serving glass, cap, and blend until smooth.\nPour back into serving glass, add garnish and serve.; Category: lemonade"
images = pipe(prompt, num_images_per_prompt=6).images

In [None]:
plot_images(images)

**IMPORTANT:** We need to free up the memory of the GPU to be able to start the actual training, let's delete the python variables and collect all the garbage using the garbage collector. Finally we use torch to empty the GPU memory

In [None]:
# Flush the GPU memory to be able to run the training
del pipe
del images

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

## Step 2: Fine-tune the model

In [None]:
!git clone https://github.com/huggingface/diffusers.git

In [None]:
# for training in colab
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
%%capture
# No need to train the model for long to see meaningful results.
# %env max_training_epochs = 50

In [None]:
import os
os.listdir("../data/monin/combined_dataset")

In [None]:
torch.cuda.is_available()

In [None]:
# The --use_8bit_adam flag is crucial to be able to train on the T4 GPU which has only 15GB of memory
!accelerate launch diffusers/examples/text_to_image/train_text_to_image.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --train_data_dir="../data/monin/combined_dataset/train" \
  --use_ema \
  --max_train_steps=50 \
  # --use_8bit_adam \
  # --enable_xformers_memory_efficient_attention \
  --resolution=512 --center_crop --random_flip \
  --train_batch_size=1 \
  # --gradient_accumulation_steps=4 \
  # --gradient_checkpointing \
  --mixed_precision="fp16" \
  --learning_rate=1e-05 \
  --max_grad_norm=1 \
  --lr_scheduler="constant" --lr_warmup_steps=0 \
  --output_dir="../models/monin" \
  # --push_to_hub

In [None]:
from PIL import Image, ImageOps
from transparent_background import Remover

remover = Remover()  # default setting
remover = Remover(mode='base') 

img = Image.open("../data/monin/combined_dataset/test/1725657501_6166165.png")
# img = img.resize((512, 512))
img = img.convert('RGB')
mask = remover.process(img, type='map')
mask

In [None]:
import cv2

import numpy as np

low_threshold = 150
high_threshold = 400
# low_threshold = 550
# high_threshold = 650

# image = np.array(background_img)
image = np.array(img)
image = cv2.Canny(image, low_threshold, high_threshold)
image = image[:, :, None]
image = np.concatenate([image, image, image], axis=2)
# Apply Gaussian blur to the Canny image (you can adjust kernel size (5, 5) and sigma as needed)
blurred_image = cv2.GaussianBlur(image, (5, 5), 0)

# Convert to PIL image for display or further processing
canny_image = Image.fromarray(blurred_image)

canny_image

## Step 3: Sample from the finetuned model

In [None]:
from diffusers import ControlNetModel
import os
import torch

from diffusers import StableDiffusionPipeline
from diffusers import DiffusionPipeline
from diffusers import StableDiffusionControlNetPipeline, StableDiffusionControlNetInpaintPipeline



controlnet = ControlNetModel.from_pretrained(
                                            "lllyasviel/sd-controlnet-canny",
                                            # "diffusers/controlnet-canny-sdxl-1.0",
                                             torch_dtype=torch.float16
                                             )

pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
                                                # "runwayml/stable-diffusion-v1-5", 
                                                "sd-model-finetuned-15-50",
                                                # "runwayml/stable-diffusion-inpainting",
                                                    torch_dtype=torch.float16, 
                                                controlnet=controlnet
)
pipe = pipe.to("cuda")
# prompt = "isometric view of a skyscraper in the style of a city building game"
prompt = "Name: 'Luck Of The Irish' Frozen Lemonade; Recipie: 3/4 oz. Monin Pistachio Syrup\n3/4 oz. Monin Ginger Syrup\n2 oz. fresh lemon sour mix\n\nFill serving glass full of ice.\nPour ingredients into blender cup in order listed.\nAdd ice from serving glass, cap, and blend until smooth.\nPour back into serving glass, add garnish and serve.; Category: lemonade"
negative_prompt = 'low quality, bad quality, sketches'
controlnet_conditioning_scale = 0.5

images = pipe(prompt=prompt, negative_prompt=negative_prompt, 
              image=img, 
              control_image=canny_image,
              mask_image=mask,
              num_images_per_prompt=6, 
              controlnet_conditioning_scale=controlnet_conditioning_scale
             ).images

In [None]:
plot_images(images)

In [None]:
from huggingface_hub import notebook_login

notebook_login()

In [None]:
pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
                                                # "runwayml/stable-diffusion-v1-5", 
                                                "sd-model-finetuned-15-100",
                                                # "runwayml/stable-diffusion-inpainting",
                                                    torch_dtype=torch.float16, 
                                                controlnet=controlnet
)

In [None]:
pipe.push_to_hub("ThreeBibas/sd-napitochki-finetune-15-100")