In [1]:
!pip install tensorflow keras diffusers transformers accelerate



In [2]:
import tensorflow as tf
from PIL import Image
from diffusers import StableDiffusionPipeline
import io
import os

# Set up the device for computation (GPU is highly recommended)
# You might need to set up the environment for GPU usage first.

# --- Configuration ---
MODEL_ID = "runwayml/stable-diffusion-v1-5" # A well-known model for this task
PROMPT = "A photorealistic portrait of an old man with a kind smile, detailed eyes, wearing a tweed jacket, soft studio lighting."
FACE_PROMPT = PROMPT # The prompt specific to the face generation
OUTPUT_FILENAME = "generated_face.png"

# --- 1. Load the Model and Components (VAE, U-Net/Transformer, Text Encoder) ---
try:
    # StableDiffusionPipeline encapsulates the required components:
    # 1. Text Encoder (Transformer): Encodes the prompt into a latent representation.
    # 2. VAE (Variational Autoencoder): Encodes/Decodes images to/from a latent space.
    # 3. U-Net (Transformer-based): The core diffusion model that denoises the image latent.

    print(f"Loading Stable Diffusion pipeline: {MODEL_ID}...")
    pipeline = StableDiffusionPipeline.from_pretrained(MODEL_ID, use_safetensors=True)

    # Optional: Move the pipeline to GPU if available and configured
    # if tf.config.list_physical_devices('GPU'):
    #     print("Moving pipeline to GPU.")
    #     # Note: Diffusers often manages this automatically, or you might use .to("cuda")
    #     # depending on whether you're using PyTorch or TensorFlow backend.
    #     # For this example, let's assume the default configuration works.

    print("Model components loaded successfully.")

    # --- 2. Generate the Image ---
    print(f"Generating image with prompt: '{FACE_PROMPT}'")

    # The 'pipeline' handles the full VAE-Transformer flow:
    # a. Text is encoded by the Text Encoder (Transformer).
    # b. The U-Net (Transformer) takes the text embedding and a noisy VAE latent code,
    #    and repeatedly denoises it (the 'diffusion' process).
    # c. The final latent code is passed to the VAE's decoder to create the full image.

    # Note: 'num_inference_steps' controls the quality and speed. ~25-50 is standard.
    image = pipeline(
        prompt=FACE_PROMPT,
        height=512,
        width=512,
        num_inference_steps=30,
        guidance_scale=7.5 # How much the prompt should influence the image
    ).images[0]

    # --- 3. Save the Result ---
    print(f"Image generation complete. Saving to {OUTPUT_FILENAME}")
    image.save(OUTPUT_FILENAME)
    print(f"Successfully saved the generated face image.")

    # Optional: Display the image (if running in an environment like Jupyter)
    # image.show()

except Exception as e:
    print(f"\nAn error occurred during execution:")
    print(f"Please ensure you have sufficient VRAM (typically 8GB+) and that the libraries are installed.")
    print(f"Error details: {e}")

Loading Stable Diffusion pipeline: runwayml/stable-diffusion-v1-5...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

safety_checker/model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

text_encoder/model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Model components loaded successfully.
Generating image with prompt: 'A photorealistic portrait of an old man with a kind smile, detailed eyes, wearing a tweed jacket, soft studio lighting.'


  0%|          | 0/30 [00:00<?, ?it/s]

Image generation complete. Saving to generated_face.png
Successfully saved the generated face image.
