In [None]:
import os
import logging
from typing import Optional, List, Tuple
from dataclasses import dataclass
from pathlib import Path
from datetime import datetime

import torch
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
from tqdm.auto import tqdm
from accelerate import Accelerator
from diffusers import StableDiffusionXLPipeline

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

@dataclass
class SDXLConfig:
    model_id: str = "stabilityai/stable-diffusion-xl-base-1.0"
    vae_id: str = "madebyollin/sdxl-vae-fp16-fix"
    image_size: Tuple[int, int] = (1024, 1024)
    train_batch_size: int = 1
    num_train_epochs: int = 5
    gradient_accumulation_steps: int = 1
    learning_rate: float = 1e-5
    max_grad_norm: float = 1.0
    adam_beta1: float = 0.9
    adam_beta2: float = 0.999
    adam_weight_decay: float = 1e-2
    mixed_precision: str = "fp16"
    save_interval: int = 500
    root_dir: str = "trained_models"
    seed: int = 42

class ImageDataset(Dataset):
    def __init__(self, image_dir: str, config: SDXLConfig):
        self.image_dir = Path(image_dir)
        self.config = config
        self.image_paths = list(self.image_dir.glob("*.jpg")) + list(self.image_dir.glob("*.png"))
        self.transform = transforms.Compose([
            transforms.Resize(config.image_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])
        ])
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return {
            "pixel_values": image,
            "prompt": f"A high quality photo of {image_path.stem}"
        }

class SDXLTrainer:
    def __init__(self, config: SDXLConfig):
        self.config = config
        self.accelerator = Accelerator(
            gradient_accumulation_steps=config.gradient_accumulation_steps,
            mixed_precision=config.mixed_precision
        )
        self._setup_pipeline()
        torch.manual_seed(config.seed)

    def _setup_pipeline(self):
        # Load SDXL pipeline
        self.pipeline = StableDiffusionXLPipeline.from_pretrained(
            self.config.model_id,
            torch_dtype=torch.float16,
            use_safetensors=True,
            variant="fp16"
        ).to(self.accelerator.device)
        
        # Enable optimizations
        self.pipeline.enable_vae_slicing()
        self.pipeline.enable_attention_slicing()

    def train_step(self, batch):
        # Convert images to latent space
        latents = self.pipeline.vae.encode(
            batch["pixel_values"].to(dtype=torch.float16)
        ).latent_dist.sample()
        latents = latents * self.pipeline.vae.config.scaling_factor
        
        # Add noise
        noise = torch.randn_like(latents)
        timesteps = torch.randint(
            0, self.pipeline.scheduler.config.num_train_timesteps,
            (latents.shape[0],), device=latents.device
        )
        noisy_latents = self.pipeline.scheduler.add_noise(latents, noise, timesteps)
        
        # Get text embeddings
        prompt_embeds = self.pipeline.tokenizer(
            batch["prompt"],
            padding="max_length",
            max_length=self.pipeline.tokenizer.model_max_length,
            truncation=True,
            return_tensors="pt"
        ).input_ids.to(self.accelerator.device)
        
        encoder_hidden_states = self.pipeline.text_encoder(prompt_embeds)[0]
        
        # Predict noise
        with torch.amp.autocast('cuda'):
            model_pred = self.pipeline.unet(
                noisy_latents,
                timesteps,
                encoder_hidden_states
            ).sample
        
        loss = F.mse_loss(model_pred.float(), noise.float())
        return loss

    def train(self, train_dataset):
        train_dataloader = DataLoader(
            train_dataset,
            batch_size=self.config.train_batch_size,
            shuffle=True,
            pin_memory=True
        )
        
        optimizer = torch.optim.AdamW(
            self.pipeline.unet.parameters(),
            lr=self.config.learning_rate,
            betas=(self.config.adam_beta1, self.config.adam_beta2),
            weight_decay=self.config.adam_weight_decay
        )
        
        # Prepare for training
        self.pipeline.unet, optimizer, train_dataloader = self.accelerator.prepare(
            self.pipeline.unet, optimizer, train_dataloader
        )
        
        # Training loop
        global_step = 0
        for epoch in range(self.config.num_train_epochs):
            self.pipeline.unet.train()
            for step, batch in enumerate(tqdm(train_dataloader)):
                with self.accelerator.accumulate(self.pipeline.unet):
                    loss = self.train_step(batch)
                    self.accelerator.backward(loss)
                    
                    if self.accelerator.sync_gradients:
                        self.accelerator.clip_grad_norm_(
                            self.pipeline.unet.parameters(),
                            self.config.max_grad_norm
                        )
                    
                    optimizer.step()
                    optimizer.zero_grad()
                    
                if (step + 1) % 10 == 0:
                    logger.info(f"Epoch {epoch}, Step {step}: Loss = {loss.item():.4f}")
                
                if (step + 1) % self.config.save_interval == 0:
                    self._save_checkpoint(global_step)
                global_step += 1

    def _save_checkpoint(self, step: int):
        save_path = os.path.join(self.config.root_dir, f"checkpoint-{step}")
        self.accelerator.save(
            self.pipeline.unet.state_dict(),
            os.path.join(save_path, "unet.pt")
        )
        
    def generate_image(
        self,
        prompt: str,
        negative_prompt: Optional[str] = None,
        num_inference_steps: int = 50,
        guidance_scale: float = 7.5,
        seed: Optional[int] = None
    ):
        if seed is not None:
            generator = torch.Generator(device=self.accelerator.device).manual_seed(seed)
        else:
            generator = None
            
        with torch.autocast(self.accelerator.device.type):
            image = self.pipeline(
                prompt=prompt,
                negative_prompt=negative_prompt,
                num_inference_steps=num_inference_steps,
                guidance_scale=guidance_scale,
                generator=generator
            ).images[0]
            
        return image

    def benchmark_performance(self, prompts: List[str]):
        """Benchmark the performance of the trained model using real-world prompt samples."""
        results = []
        for prompt in prompts:
            image = self.generate_image(prompt)
            results.append((prompt, image))
            logger.info(f"Generated image for prompt: {prompt}")
        return results

def setup_training_environment():
    """Set up training environment and verify paths"""
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Define paths
    root_dir = '/content/drive/MyDrive/SDXL_images'
    image_dir = os.path.join(root_dir, 'training_images')
    output_dir = os.path.join(root_dir, 'generated_images')
    
    # Create directories
    for dir_path in [root_dir, image_dir, output_dir]:
        os.makedirs(dir_path, exist_ok=True)
        
    # Check if Drive is mounted correctly
    if not os.path.exists('/content/drive/MyDrive'):
        raise RuntimeError("Google Drive not mounted correctly")
        
    # List available images
    image_files = list(Path(image_dir).glob('*.jpg')) + list(Path(image_dir).glob('*.png'))
    if not image_files:
        logger.error(f"No images found in {image_dir}")
        logger.info("Please add .jpg or .png images to the training_images folder")
        logger.info(f"Current directory contents: {os.listdir(image_dir)}")
        raise ValueError(f"No training images found in {image_dir}")
        
    return root_dir, image_dir, output_dir, len(image_files)

def main():
    try:
        # Setup environment and get paths
        root_dir, image_dir, output_dir, num_images = setup_training_environment()
        logger.info(f"Found {num_images} training images")
        
        # Initialize config
        config = SDXLConfig(root_dir=root_dir)
        
        # Initialize trainer and dataset
        trainer = SDXLTrainer(config)
        train_dataset = ImageDataset(image_dir, config)
        
        # Training
        trainer.train(train_dataset)
        
        # Performance benchmarking
        prompts = [
            "A beautiful mountain landscape with a sunset and snow-capped peaks",
            "A futuristic cityscape at night with neon lights",
            "A serene beach with crystal clear water and palm trees"
        ]
        results = trainer.benchmark_performance(prompts)
        
        # Save generated images
        for i, (prompt, image) in enumerate(results):
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            save_path = os.path.join(output_dir, f"generated_{timestamp}_{i}.png")
            image.save(save_path)
            logger.info(f"Generated image saved to {save_path}")
        
    except Exception as e:
        logger.error(f"Error during execution: {str(e)}")
        raise

if __name__ == "__main__":
    main()

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

Mounted at /content/drive
Initializing model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

Fetching 19 files:   0%|          | 0/19 [00:00<?, ?it/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/479 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/737 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.78G [00:00<?, ?B/s]

text_encoder_2/config.json:   0%|          | 0.00/575 [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

tokenizer_2/tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

tokenizer_2/special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.68k [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/10.3G [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Pipeline setup complete on device: cuda
Generating image for prompt: A beautiful mountain landscape with a sunset and snow-capped peaks


  0%|          | 0/50 [00:00<?, ?it/s]

  images = (images * 255).round().astype("uint8")


Saving image to: /content/drive/MyDrive/SDXL_images/generated_images/generated_20241215_143206.png
Initializing training components...


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Pipeline setup complete on device: cuda
Loaded dataset with 7 images from /content/drive/MyDrive/SDXL_images/landscape_images
Dataset loaded with 7 images
=== Starting Pipeline Diagnosis ===

1. Testing Dataset...
Dataset size: 7
First item keys: dict_keys(['image', 'prompt', 'path'])
Image shape: torch.Size([3, 512, 512])

2. Testing DataLoader...

3. Testing Pipeline Components...
Latents shape: torch.Size([1, 4, 64, 64])
Text embeddings shape: torch.Size([1, 77, 768])


Traceback (most recent call last):
  File "<ipython-input-1-cee81c310a5a>", line 190, in diagnose_training_pipeline
    noise_pred = model.pipeline.unet(
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1747, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/diffusers/models/unets/unet_2d_condition.py", line 1152, in forward
    aug_emb = self.get_aug_embed(
  File "/usr/local/lib/python3.10/dist-packages/diffusers/models/unets/unet_2d_condition.py", line 976, in get_aug_embed
    raise ValueError(
ValueError: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`
  scaler = torch.cuda.amp.G


Error during diagnosis: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`
Starting epoch 1/5


 14%|█▍        | 1/7 [00:01<00:07,  1.20s/it]

Error during batch 0: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 29%|██▊       | 2/7 [00:02<00:06,  1.29s/it]

Error during batch 1: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 43%|████▎     | 3/7 [00:03<00:04,  1.16s/it]

Error during batch 2: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 57%|█████▋    | 4/7 [00:04<00:02,  1.08it/s]

Error during batch 3: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 71%|███████▏  | 5/7 [00:05<00:02,  1.09s/it]

Error during batch 4: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 86%|████████▌ | 6/7 [00:06<00:01,  1.11s/it]

Error during batch 5: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


100%|██████████| 7/7 [00:07<00:00,  1.10s/it]


Error during batch 6: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`
Epoch 1 completed with average loss: 0.0000, Memory: {'gpu_memory_mb': 8284.4267578125, 'ram_memory_mb': 7526.734375}
Starting epoch 2/5


 14%|█▍        | 1/7 [00:00<00:02,  2.02it/s]

Error during batch 0: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 29%|██▊       | 2/7 [00:01<00:02,  1.69it/s]

Error during batch 1: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 43%|████▎     | 3/7 [00:01<00:02,  1.60it/s]

Error during batch 2: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 57%|█████▋    | 4/7 [00:02<00:02,  1.46it/s]

Error during batch 3: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 71%|███████▏  | 5/7 [00:03<00:01,  1.45it/s]

Error during batch 4: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 86%|████████▌ | 6/7 [00:03<00:00,  1.51it/s]

Error during batch 5: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


100%|██████████| 7/7 [00:04<00:00,  1.50it/s]


Error during batch 6: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`
Epoch 2 completed with average loss: 0.0000, Memory: {'gpu_memory_mb': 8284.0517578125, 'ram_memory_mb': 7526.734375}
Starting epoch 3/5


 14%|█▍        | 1/7 [00:00<00:03,  1.92it/s]

Error during batch 0: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 29%|██▊       | 2/7 [00:01<00:02,  1.94it/s]

Error during batch 1: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 43%|████▎     | 3/7 [00:01<00:01,  2.03it/s]

Error during batch 2: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 57%|█████▋    | 4/7 [00:02<00:01,  1.96it/s]

Error during batch 3: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 71%|███████▏  | 5/7 [00:02<00:01,  1.87it/s]

Error during batch 4: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 86%|████████▌ | 6/7 [00:03<00:00,  1.80it/s]

Error during batch 5: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


100%|██████████| 7/7 [00:03<00:00,  1.92it/s]


Error during batch 6: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`
Epoch 3 completed with average loss: 0.0000, Memory: {'gpu_memory_mb': 8284.9287109375, 'ram_memory_mb': 7526.734375}
Starting epoch 4/5


 14%|█▍        | 1/7 [00:00<00:03,  1.96it/s]

Error during batch 0: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 29%|██▊       | 2/7 [00:00<00:02,  2.12it/s]

Error during batch 1: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 43%|████▎     | 3/7 [00:01<00:02,  1.88it/s]

Error during batch 2: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 57%|█████▋    | 4/7 [00:02<00:01,  1.88it/s]

Error during batch 3: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 71%|███████▏  | 5/7 [00:02<00:01,  1.96it/s]

Error during batch 4: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 86%|████████▌ | 6/7 [00:03<00:00,  1.87it/s]

Error during batch 5: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


100%|██████████| 7/7 [00:03<00:00,  1.92it/s]


Error during batch 6: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`
Epoch 4 completed with average loss: 0.0000, Memory: {'gpu_memory_mb': 8284.80517578125, 'ram_memory_mb': 7526.734375}
Starting epoch 5/5


 14%|█▍        | 1/7 [00:00<00:03,  1.72it/s]

Error during batch 0: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 29%|██▊       | 2/7 [00:01<00:02,  1.75it/s]

Error during batch 1: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 43%|████▎     | 3/7 [00:01<00:02,  1.91it/s]

Error during batch 2: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 57%|█████▋    | 4/7 [00:02<00:01,  1.95it/s]

Error during batch 3: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 71%|███████▏  | 5/7 [00:02<00:01,  1.93it/s]

Error during batch 4: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


 86%|████████▌ | 6/7 [00:03<00:00,  1.88it/s]

Error during batch 5: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`


100%|██████████| 7/7 [00:03<00:00,  1.81it/s]

Error during batch 6: <class 'diffusers.models.unets.unet_2d_condition.UNet2DConditionModel'> has the config param `addition_embed_type` set to 'text_time' which requires the keyword argument `time_ids` to be passed in `added_cond_kwargs`
Epoch 5 completed with average loss: 0.0000, Memory: {'gpu_memory_mb': 8284.92822265625, 'ram_memory_mb': 7526.73828125}





Training report saved at /content/drive/MyDrive/SDXL_images/training_report.json
