In [None]:
# install the packages
!pip install git+https://github.com/huggingface/diffusers.git
!pip install transformers sentencepiece accelerate protobuf

In [None]:
import torch
from diffusers import FluxPipeline
import diffusers
from PIL import Image
import matplotlib.pyplot as plt

In [None]:
# Modify the rope function to handle CUDA device
_flux_rope = diffusers.models.transformers.transformer_flux.rope
def new_flux_rope(pos: torch.Tensor, dim: int, theta: int) -> torch.Tensor:
    assert dim % 2 == 0, "The dimension must be even."
    if pos.device.type == "cuda":
        # Move tensor to CPU for ROPE computation, then move it back to CUDA
        return _flux_rope(pos.to("cpu"), dim, theta).to(device=pos.device)
    else:
        # Perform ROPE computation directly if tensor is not on CUDA
        return _flux_rope(pos, dim, theta)
diffusers.models.transformers.transformer_flux.rope = new_flux_rope


In [None]:
# Load the Flux Schnell model
pipe = FluxPipeline.from_pretrained(
    "black-forest-labs/FLUX.1-schnell",
    revision='refs/pr/1',
    torch_dtype=torch.bfloat16
).to("cuda")

In [None]:
# Define the prompt
# This is the textual description that the model will use to generate the image
import base64
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
# Modify the rope function to handle CUDA device
encode_image = encode_image("gen_image.png")
prompt = f"{encode_image} can you make"

# Generate the image
out = pipe(
    prompt=prompt,
    guidance_scale=0.,
    height=1024,
    width=1024,
    num_inference_steps=4,
    max_sequence_length=256,
).images[0]

In [None]:
# Save the generated image
out.save("gen_image.png")

# Display the generated image
image = Image.open("gen_image.png")
plt.imshow(image)
plt.axis('off')  # Hide axes
plt.show()