<a href="https://colab.research.google.com/github/Haque360/DivItUp/blob/main/jupyters/colab_example_images_mixing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!git clone https://github.com/TheDenk/images_mixing.git
!pip install -r ./images_mixing/requirements.txt > /dev/null

fatal: destination path 'images_mixing' already exists and is not an empty directory.


## Imports

In [None]:
!pip install diffusers==0.16.1 transformers==4.38.0 open-clip-torch==2.20.0


In [None]:
!pip install  huggingface_hub==0.25.0

In [None]:
!pip install "jax[cuda12_pip]==0.4.23" -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html

In [None]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import sys
sys.path.append('./images_mixing')

import torch
import open_clip
from PIL import Image
from open_clip import SimpleTokenizer
from diffusers import DiffusionPipeline
from transformers import CLIPFeatureExtractor, CLIPModel

from utils import show_images

## Load additional models: CLIP and CoCa

In [None]:
feature_extractor = CLIPFeatureExtractor.from_pretrained(
    "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
)
clip_model = CLIPModel.from_pretrained(
    "laion/CLIP-ViT-B-32-laion2B-s34B-b79K", torch_dtype=torch.float16
)
coca_model = open_clip.create_model('coca_ViT-L-14', pretrained='laion2B-s13B-b90k').to('cuda')
coca_model.dtype = torch.float16
coca_transform = open_clip.image_transform(
    coca_model.visual.image_size,
    is_train = False,
    mean = getattr(coca_model.visual, 'image_mean', None),
    std = getattr(coca_model.visual, 'image_std', None),
)
coca_tokenizer = SimpleTokenizer()

## Create DiffusionPipeline from local .py file

In [None]:
mixing_pipeline = DiffusionPipeline.from_pretrained(
    # "stabilityai/stable-diffusion-2-base",
    "CompVis/stable-diffusion-v1-4",
    custom_pipeline="./images_mixing/images_mixing.py",
    clip_model=clip_model,
    feature_extractor=feature_extractor,
    coca_model=coca_model,
    coca_tokenizer=coca_tokenizer,
    coca_transform=coca_transform,
    torch_dtype=torch.float16,
)
mixing_pipeline = mixing_pipeline.to("cuda")

## Generate

In [None]:
!pip install open_clip_torch transformers==4.30.2

In [None]:
generator = torch.Generator(device="cuda").manual_seed(17)

content_image = Image.open('./images_mixing/images/cat.jpeg').convert("RGB")
style_image = Image.open('./images_mixing/images/bread.png').convert("RGB")

pipe_images = mixing_pipeline(
    num_inference_steps=50,
    content_image=content_image,
    style_image=style_image,
    content_prompt=None,  # If None will be automaticly created with CoCa
    style_prompt=None,  # If None will be automaticly created with CoCa
    noise_strength=0.4,  # Noise for start point (content image). More noise - more new information.
    slerp_latent_style_strength=0.25,  # Amount Style image information for start point
    slerp_prompt_style_strength=0.99,  # Amount Style prompt information for diffusion steps
    slerp_clip_image_style_strength=0.9,  # Amount Style image information for diffusion steps
    guidance_scale=9.0,
    batch_size=1,
    clip_guidance_scale=100,
    generator=generator,
    print_promts=True,
).images

show_images([content_image, style_image, pipe_images[0]], figsize=(16, 8))