<a href="https://colab.research.google.com/github/Ray-wind/Ray-wind.github.io/blob/master/AIbeauty.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -q transformers==4.31.0 accelerate==0.21.0 diffusers==0.20.0 huggingface_hub==0.16.4 omegaconf==2.3.0
# Beautiful Realistic Asians V7をダウンロードして、diffusersで使える形式に変換する
!wget https://civitai.com/api/download/models/177164 --content-disposition
!wget https://raw.githubusercontent.com/huggingface/diffusers/v0.20.0/scripts/convert_original_stable_diffusion_to_diffusers.py
!python convert_original_stable_diffusion_to_diffusers.py \
--checkpoint_path /content/beautifulRealistic_v7.safetensors \
--dump_path /content/beautifulRealistic_v7/ \
--from_safetensors

!rm -rf beautifulRealistic_v7.safetensors
!rm -rf convert_original_stable_diffusion_to_diffusers.py
!rm -rf sample_data

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m29.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.5/79.5 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m68.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m63.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.0/117.0 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.p

In [2]:
# 標準ライブラリ
import sys
import os
import shutil
import time
import json
import zipfile

# サードパーティのライブラリ
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

# Google Colab 関連
from google.colab import files as colab_files

# Hugging Face Transformers & Diffusers
import diffusers
import transformers

# デバイスとデータタイプの設定
def get_device_and_dtype():
    """GPUが利用可能か確認し、適切なデバイスとデータタイプを返します。"""
    if torch.cuda.is_available():
        return torch.device("cuda"), torch.float16
    else:
        return torch.device("cpu"), torch.float32

# パイプラインの設定
def setup_pipeline(model_path, clip_skip, torch_dtype):
    """モデルとテキストエンコーダを読み込み、Diffusionパイプラインをセットアップします。"""
    if clip_skip > 1:
        text_encoder = transformers.CLIPTextModel.from_pretrained(
            "runwayml/stable-diffusion-v1-5",
            subfolder="text_encoder",
            num_hidden_layers=12 - (clip_skip - 1),
            torch_dtype=torch_dtype
        )
        pipe = diffusers.DiffusionPipeline.from_pretrained(
            model_path,
            torch_dtype=torch_dtype,
            safety_checker=None,
            text_encoder=text_encoder,
        )
    else:
        pipe = diffusers.DiffusionPipeline.from_pretrained(
            model_path,
            torch_dtype=torch_dtype,
            safety_checker=None
        )
    return pipe

# プロンプト埋め込みの取得
def get_embeddings(pipe, prompt, negative_prompt, split_character = ",", device_name = torch.device("cpu")):
    """プロンプトの埋め込みを取得します。"""
    max_length = pipe.tokenizer.model_max_length
    # Simple method of checking if the prompt is longer than the negative
    # prompt - split the input strings using `split_character`.
    count_prompt = len(prompt.split(split_character))
    count_negative_prompt = len(negative_prompt.split(split_character))

    # If prompt is longer than negative prompt.
    if count_prompt >= count_negative_prompt:
        input_ids = pipe.tokenizer(
            prompt, return_tensors = "pt", truncation = False
        ).input_ids.to(device_name)
        shape_max_length = input_ids.shape[-1]
        negative_ids = pipe.tokenizer(
            negative_prompt,
            truncation = False,
            padding = "max_length",
            max_length = shape_max_length,
            return_tensors = "pt"
        ).input_ids.to(device_name)

    # If negative prompt is longer than prompt.
    else:
        negative_ids = pipe.tokenizer(
            negative_prompt, return_tensors = "pt", truncation = False
        ).input_ids.to(device_name)
        shape_max_length = negative_ids.shape[-1]
        input_ids = pipe.tokenizer(
            prompt,
            return_tensors = "pt",
            truncation = False,
            padding = "max_length",
            max_length = shape_max_length
        ).input_ids.to(device_name)

    # Concatenate the individual prompt embeddings.
    concat_embeds = []
    neg_embeds = []
    for i in range(0, shape_max_length, max_length):
        concat_embeds.append(
            pipe.text_encoder(input_ids[:, i: i + max_length])[0]
        )
        neg_embeds.append(
            pipe.text_encoder(negative_ids[:, i: i + max_length])[0]
        )

    return torch.cat(concat_embeds, dim = 1), torch.cat(neg_embeds, dim = 1)
# 画像の生成と保存
def generate_and_save_images(pipe, prompt_embeds, negative_prompt_embeds, seeds, use_prompt_embeddings, guidance_scale, num_inference_steps, width, height, device_name):
    """画像を生成して保存します。"""
    images = []
    for count, seed in enumerate(seeds):
        start_time = time.time()

        if use_prompt_embeddings is False:
            new_img = pipe(
                prompt=prompt_embeds,
                negative_prompt=negative_prompt_embeds,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                num_images_per_prompt=1,
                generator=torch.manual_seed(seed),
            ).images
        else:
            new_img = pipe(
                prompt_embeds=prompt_embeds,
                negative_prompt_embeds=negative_prompt_embeds,
                width=width,
                height=height,
                guidance_scale=guidance_scale,
                num_inference_steps=num_inference_steps,
                num_images_per_prompt=1,
                generator=torch.manual_seed(seed),
            ).images

        images = images + new_img

    os.makedirs("images", exist_ok=True)
    for i, img in enumerate(images):
        img.save(f"images/image_{i}.png")

# ZIPファイルの作成とダウンロード
def create_and_download_zip():
    """画像をZIPファイルに固めてダウンロードします。"""
    with zipfile.ZipFile("images.zip", "w") as zipf:
        for root, _, files in os.walk("images"):
            for file in files:
                zipf.write(os.path.join(root, file), os.path.relpath(os.path.join(root, file), os.path.join("images", '..')))
    colab_files.download("images.zip")
    shutil.rmtree("images")

# メイン関数
def main():
    """メイン関数を実行します。"""
    with open('config.json', 'r') as f:
        config = json.load(f)

    # 設定ファイルを読み込み
    prompt = config["prompt"].strip()
    negative_prompt = config["negative_prompt"].strip()
    clip_skip = config["clip_skip"]
    model_path = config["model_path"]
    use_prompt_embeddings = config["use_prompt_embeddings"]
    start_idx = config["start_idx"]
    batch_size = config["batch_size"]
    seeds = [i for i in range(start_idx, start_idx + batch_size, 1)]
    num_inference_steps = config["num_inference_steps"]
    guidance_scale = config["guidance_scale"]
    width = config["width"]
    height = config["height"]

    device_name, torch_dtype = get_device_and_dtype()
    pipe = setup_pipeline(model_path, clip_skip, torch_dtype)
    pipe = pipe.to(device_name)
    pipe.scheduler = diffusers.EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)

    prompt_embeds, negative_prompt_embeds = get_embeddings(pipe, prompt, negative_prompt, ",", device_name)

    generate_and_save_images(pipe, prompt_embeds, negative_prompt_embeds, seeds, use_prompt_embeddings, guidance_scale, num_inference_steps, width, height, device_name)

    create_and_download_zip()

In [3]:
main()

Downloading (…)_encoder/config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

Some weights of the model checkpoint at runwayml/stable-diffusion-v1-5 were not used when initializing CLIPTextModel: ['text_model.encoder.layers.11.self_attn.q_proj.bias', 'text_model.encoder.layers.11.self_attn.v_proj.weight', 'text_model.encoder.layers.11.self_attn.out_proj.bias', 'text_model.encoder.layers.11.self_attn.k_proj.weight', 'text_model.encoder.layers.11.self_attn.q_proj.weight', 'text_model.encoder.layers.11.layer_norm2.weight', 'text_model.encoder.layers.11.self_attn.k_proj.bias', 'text_model.encoder.layers.11.layer_norm2.bias', 'text_model.encoder.layers.11.mlp.fc1.bias', 'text_model.encoder.layers.11.self_attn.out_proj.weight', 'text_model.encoder.layers.11.mlp.fc2.weight', 'text_model.encoder.layers.11.layer_norm1.bias', 'text_model.encoder.layers.11.mlp.fc1.weight', 'text_model.encoder.layers.11.layer_norm1.weight', 'text_model.encoder.layers.11.self_attn.v_proj.bias', 'text_model.encoder.layers.11.mlp.fc2.bias']
- This IS expected if you are initializing CLIPTextMo

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
Token indices sequence length is longer than the specified maximum sequence length for this model (227 > 77). Running this sequence through the model will result in indexing errors


  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

  0%|          | 0/40 [00:00<?, ?it/s]

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>