In [8]:
import torchvision
from torchvision import transforms
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
import matplotlib.pyplot as plt
import cv2
from diffusers import UNet2DModel
from diffusers import AutoencoderKL
from diffusers import DDPMScheduler, DPMSolverMultistepScheduler, DDIMScheduler
import torch.nn.functional as F
import numpy as np
from tqdm import tqdm
from diffusers.pipeline_utils import DiffusionPipeline
from diffusers import DDPMPipeline
from diffusers import StableDiffusionPipeline, StableDiffusionDepth2ImgPipeline
from PIL import Image
import json
import create_init_unet
import pathlib
import copy
import wandb
from pipeline_ddpm_sketch2img import DDPMSketch2ImgPipeline
from datasets import FashionMNISTDataset
device = "cuda"
VAE_SCALE_FACTOR = 0.18215

In [2]:
dataset = FashionMNISTDataset("./data/FashionMNIST")

In [18]:
scheduler = DDPMScheduler.from_pretrained("./model/from_init_test/scheduler")

In [51]:
pil_list = [Image.open("./data/dog.png"), Image.open("./data/dog.7.jpg")]
tesnsor = transforms.functional.pil_to_tensor(pil_list[0])
print(tesnsor[0])
tesnsor = tesnsor.int()
print(tesnsor[0])
print(tesnsor.dtype)
img = transforms.functional.to_pil_image(tesnsor.to(torch.uint16))
img

tensor([[159, 156, 153,  ..., 153, 152, 157],
        [156, 152, 154,  ..., 151, 150, 155],
        [155, 154, 155,  ..., 152, 154, 155],
        ...,
        [122, 121, 116,  ...,  92,  88,  95],
        [124, 114, 125,  ..., 101, 104,  92],
        [122, 112, 116,  ..., 102, 107, 102]], dtype=torch.uint8)
tensor([[159, 156, 153,  ..., 153, 152, 157],
        [156, 152, 154,  ..., 151, 150, 155],
        [155, 154, 155,  ..., 152, 154, 155],
        ...,
        [122, 121, 116,  ...,  92,  88,  95],
        [124, 114, 125,  ..., 101, 104,  92],
        [122, 112, 116,  ..., 102, 107, 102]], dtype=torch.int32)
torch.int32


AttributeError: module 'torch' has no attribute 'uint16'

In [15]:
print(pipe)

StableDiffusionDepth2ImgPipeline {
  "_class_name": "StableDiffusionDepth2ImgPipeline",
  "_diffusers_version": "0.11.1",
  "depth_estimator": [
    "transformers",
    "DPTForDepthEstimation"
  ],
  "feature_extractor": [
    "transformers",
    "DPTImageProcessor"
  ],
  "scheduler": [
    "diffusers",
    "PNDMScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}



In [3]:
def train(dataset):
    # define configs
    save_path = "./model/from_init_test_train_func"
    pretrained_model_name_or_path = "./model/init_s2i_fmnist_5epochs"
    num_epochs = 1
    batch_size = 64
    lr = 1e-5
    grad_accumulation_steps = 2
    train_dataset_rate = 0.01
    device = "cuda"
    wandb_project_name = "ddpm_train_test"

    # wandb initizalize
    config = dict(
        batch_size=batch_size,
        lr=lr,
        grad_accumulation_steps=grad_accumulation_steps,
        num_epochs=num_epochs,
        device=device,
        train_dataset_rate=train_dataset_rate,
    )
    wandb.init(project=wandb_project_name, config=config)

    # load pipe
    pipe = DDPMPipeline.from_pretrained(
        pretrained_model_name_or_path=pretrained_model_name_or_path
    ).to(device)

    # split dataset
    dataset_len = int(len(dataset) * train_dataset_rate)
    dataset, _ = torch.utils.data.random_split(
        dataset, [dataset_len, len(dataset) - dataset_len]
    )

    # train
    dataloader = DataLoader(dataset, batch_size, shuffle=True)
    optimizer = torch.optim.AdamW(pipe.unet.parameters(), lr=lr)

    for epoch in range(num_epochs):
        print(epoch)
        for step, (image, sketch) in enumerate(tqdm(dataloader)):
            bs = image.shape[0]

            # prepare valiables
            image = image.to(device)
            sketch = sketch.to(device)
            noise = torch.randn_like(image).to(device)
            timesteps = torch.randint(
                0, pipe.scheduler.num_train_timesteps, (bs,), device=device
            ).long()

            # set up model input
            noisy_image = pipe.scheduler.add_noise(image, noise, timesteps)
            model_input = torch.cat([noisy_image, sketch], dim=1).to(device)

            # prediction
            noise_pred = pipe.unet(model_input, timesteps).sample

            # backward
            loss = F.mse_loss(noise_pred, noise)
            loss.backward(loss)

            # optimizer's step
            if (step + 1) % grad_accumulation_steps == 0:
                optimizer.step()
                optimizer.zero_grad()

            # logging
            wandb.log({"loss": loss.item()})
    # save
    pipe.save_pretrained(save_path)

    # finish wandb
    wandb.finish()
train(dataset)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mizumisatoshi05[0m. Use [1m`wandb login --relogin`[0m to force relogin


0


100%|██████████| 10/10 [00:22<00:00,  2.24s/it]


0,1
loss,█▃▃▃▄▄▂▅▃▁

0,1
loss,0.03502


In [4]:
pipe = DDPMSketch2ImgPipeline.from_pretrained("./model/from_init_test_train_func")

In [7]:
pipe.unet.conv_in.weight[0][1]

tensor([[2.5832e-06, 1.3508e-05, 2.9377e-05],
        [1.8843e-05, 3.2196e-05, 3.0885e-05],
        [3.6792e-05, 2.4276e-05, 2.9926e-05]], grad_fn=<SelectBackward0>)