## Step 1: Import packages and hydra configurations
- Please use `Python=3.8+` with `Pytorch==1.13.1` and `diffusers==0.23.0`
- Configurations are governed by [Hydra configs](https://hydra.cc/docs/configure_hydra/intro/).

In [1]:
import sys
sys.path.append('..')
from hydra import compose, initialize
import os
import transformers
import warnings
import json
from accelerate import Accelerator
from accelerate.utils import set_seed
from transformers import AutoTokenizer, PretrainedConfig
import diffusers
from diffusers import (
    AutoencoderKL,
    DDPMScheduler,
    UNet2DConditionModel,
)
from DreamBooth_melo.trainer.melo_trainer import *
from DreamBooth_melo.database.router import *
os.environ['http_proxy'] = '127.0.0.1:7890'
os.environ['https_proxy'] = '127.0.0.1:7890'
import numpy as np
# Load Configurations governed by hydra
initialize(version_base=None, config_path="config")
config = compose(config_name="config")
print(OmegaConf.to_yaml(config))

alg: melo_diff
train_text_encoder: true
learning_rate: 1.0e-05
validation_prompt: a photo of {} in the Acropolis
task: diffusion
pretrained_model_name_or_path: CompVis/stable-diffusion-v1-4
pretrained_cache_dir: pretrained_stable_diffusion
revision: null
tokenizer_name: null
class_data_dir: class_data
instance_prompt: a photo of
class_prompt: a photo of
with_prior_preservation: false
prior_loss_weight: 0.1
num_class_images: 40
seed: null
resolution: 512
center_crop: false
train_batch_size: 1
sample_batch_size: 4
num_train_epochs: 1
max_train_steps: 800
checkpoints_total_limit: null
resume_from_checkpoint: null
gradient_accumulation_steps: 1
gradient_checkpointing: false
scale_lr: false
lr_scheduler: constant
lr_warmup_steps: 0
lr_num_cycles: 1
lr_power: 1.0
use_8bit_adam: false
dataloader_num_workers: 0
adam_beta1: 0.9
adam_beta2: 0.999
adam_weight_decay: 0.01
adam_epsilon: 1.0e-08
max_grad_norm: 1.0
push_to_hub: false
hub_token: null
hub_model_id: null
allow_tf32: true
num_validation_



### According to [DreamBooth](https://arxiv.org/pdf/2208.12242), prior preservation are needed to prevent language drift. While Multi-MELO could dynamically activate LoRA blocks, knowledge are retained when no specific blocks are activated, so we set `config.with_prior_preservation` to `False`.

In [2]:
def check_config(config):
    base_dir = '.'
    config.class_data_dir = os.path.join(base_dir, config.class_data_dir)
    env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
    if env_local_rank != -1 and env_local_rank != config.local_rank:
        config.local_rank = env_local_rank
    if config.with_prior_preservation:
        if config.class_data_dir is None:
            raise ValueError("You must specify a data directory for class images.")
        if config.class_prompt is None:
            raise ValueError("You must specify prompt for class images.")
    else:
        if config.class_data_dir is not None:
            warnings.warn("You need not use --class_data_dir without --with_prior_preservation.")
        if config.class_prompt is not None:
            warnings.warn("You need not use --class_prompt without --with_prior_preservation.")
    if config.train_text_encoder and config.pre_compute_text_embeddings:
        raise ValueError("`--train_text_encoder` cannot be used with `--pre_compute_text_embeddings`")

def import_model_class_from_model_name_or_path(pretrained_model_name_or_path: str, revision: str):
    text_encoder_config = PretrainedConfig.from_pretrained(
        pretrained_model_name_or_path,
        subfolder="text_encoder",
        revision=revision,
    )
    model_class = text_encoder_config.architectures[0]

    if model_class == "CLIPTextModel":
        from transformers import CLIPTextModel

        return CLIPTextModel
    elif model_class == "RobertaSeriesModelWithTransformation":
        from diffusers.pipelines.alt_diffusion.modeling_roberta_series import RobertaSeriesModelWithTransformation

        return RobertaSeriesModelWithTransformation
    elif model_class == "T5EncoderModel":
        from transformers import T5EncoderModel

        return T5EncoderModel
    else:
        raise ValueError(f"{model_class} is not supported.")

## Step 2: Check import configurations.

In [3]:
LOG.info("*MELO* Dreambooth")
check_config(config)

diff_config_keys = ['class_prompt', 'with_prior_preservation', 'prior_loss_weight', 'learning_rate']
melo_config_keys = ['UNET_TARGET_MODULES', 'TEXT_ENCODER_TARGET_MODULES']
DIFF_CONFIG = dict(config)
MELO_CONFIG = dict(config.model)
for k in diff_config_keys:
    print(f'[-DIFF CONFIG-]  {k}: {DIFF_CONFIG[k]}')
for k in melo_config_keys:
    print(f'[-MELO CONFIG-]  {k}: {MELO_CONFIG[k]}')

base_dir = '.'
with open_dict(config):
    config.base_dir = base_dir


[-DIFF CONFIG-]  class_prompt: a photo of
[-DIFF CONFIG-]  with_prior_preservation: False
[-DIFF CONFIG-]  prior_loss_weight: 0.1
[-DIFF CONFIG-]  learning_rate: 1e-05
[-MELO CONFIG-]  UNET_TARGET_MODULES: ['to_q', 'to_v', 'query', 'value']
[-MELO CONFIG-]  TEXT_ENCODER_TARGET_MODULES: ['q_proj', 'v_proj']




## Step 3: Edit 3 personal objects

In [4]:
accelerator = Accelerator(
    gradient_accumulation_steps=config.gradient_accumulation_steps,
    mixed_precision=config.mixed_precision
)

# Gradient Accumulation is not supported for multi_gpu setting
if config.train_text_encoder and config.gradient_accumulation_steps > 1 and accelerator.num_processes > 1:
    raise ValueError(
        "Gradient accumulation is not supported when training the text encoder in distributed training. "
        "Please set gradient_accumulation_steps to 1. This feature will be supported in the future."
    )

if accelerator.is_local_main_process:
    transformers.logging.set_verbosity_warning()
    diffusers.utils.logging.set_verbosity_info()
else:
    transformers.logging.set_verbosity_error()
    diffusers.utils.logging.set_verbosity_error()

# If passed along, set the training seed now.
if config.seed is not None:
    set_seed(config.seed)

'''
Load Model
'''
text_encoder_cls = import_model_class_from_model_name_or_path(config.pretrained_model_name_or_path,
                                                              config.revision)
# Load scheduler and models
noise_scheduler = DDPMScheduler.from_pretrained(config.pretrained_model_name_or_path, subfolder="scheduler")
text_encoder = text_encoder_cls.from_pretrained(
    config.pretrained_model_name_or_path, subfolder="text_encoder", revision=config.revision)
vae = AutoencoderKL.from_pretrained(
    config.pretrained_model_name_or_path, subfolder="vae", revision=config.revision)

unet = UNet2DConditionModel.from_pretrained(
    config.pretrained_model_name_or_path, subfolder="unet", revision=config.revision)

'''
Load tokenizer
'''
if config.tokenizer_name:
    tokenizer = AutoTokenizer.from_pretrained(config.tokenizer_name, revision=config.revision, use_fast=False)
elif config.pretrained_model_name_or_path:
    tokenizer = AutoTokenizer.from_pretrained(
        config.pretrained_model_name_or_path,
        subfolder="tokenizer",
        revision=config.revision,
        use_fast=False,
    )

'''
Algorithm Initialization
'''
alg_module = importlib.import_module(f'algs.{config.alg}')
AlgClass = getattr(alg_module, config.alg.upper())
alg = AlgClass(accelerator, tokenizer, noise_scheduler, vae, unet, text_encoder, config)

'''
data_info
'''
with open(os.path.join(base_dir, "data","data.json"), 'r') as f:
    data_info = json.load(f)
#subject_list = data_info.keys()
subject_list = list(data_info.keys())[:3]
identifier_list = np.load(os.path.join(base_dir, "data/rare_tokens/rare_tokens.npy"))[:len(subject_list)]

'''
Trainer
'''
trainer = dream_trainer(config, alg, accelerator, tokenizer, None, data_info, subject_list, identifier_list)
torch.cuda.empty_cache()
trainer.run_edit()

You are using a model of type clip_text_model to instantiate a model of type . This is not supported for all configurations of models and can yield errors.
{'timestep_spacing', 'prediction_type', 'sample_max_value', 'variance_type', 'thresholding', 'clip_sample_range', 'dynamic_thresholding_ratio'} was not found in config. Values will be initialized to default values.
{'norm_num_groups', 'force_upcast'} was not found in config. Values will be initialized to default values.
{'time_cond_proj_dim', 'resnet_skip_time_act', 'encoder_hid_dim_type', 'class_embeddings_concat', 'dropout', 'resnet_time_scale_shift', 'dual_cross_attention', 'mid_block_type', 'projection_class_embeddings_input_dim', 'addition_embed_type', 'num_attention_heads', 'addition_embed_type_num_heads', 'mid_block_only_cross_attention', 'num_class_embeds', 'time_embedding_dim', 'reverse_transformer_layers_per_block', 'conv_out_kernel', 'addition_time_embed_dim', 'cross_attention_norm', 'upcast_attention', 'time_embedding_ty