In [39]:
# Step 1: Imports
import torch
import peft
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

from diffusers import StableDiffusionPipeline
from peft import LoraConfig, get_peft_model, PeftModel
#from diffusers.models.attention_processor import LoRAAttnProcessor
# diffusers-> Stable Diffusion library
# LoRAAttnProcessor -> lightweight fine-tuning
# LoRA fine-tuning with Modern Diffusers PEFT (Parameter Efficient Fine-Tuning)
print("PyTorch version:",torch.__version__)
print("PEFT VERSION:", peft.__version__)

PyTorch version: 2.9.1+cpu
PEFT VERSION: 0.18.0


In [31]:
# Step 2 - Load Stable Diffusion
model_id = "runwayml/stable-diffusion-v1-5"

pipe = StableDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float32
)

pipe = pipe.to("cpu")
pipe.safety_checker = None
print("diff model", model_id, pipe)

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

diff model runwayml/stable-diffusion-v1-5 StableDiffusionPipeline {
  "_class_name": "StableDiffusionPipeline",
  "_diffusers_version": "0.36.0",
  "_name_or_path": "runwayml/stable-diffusion-v1-5",
  "feature_extractor": [
    "transformers",
    "CLIPImageProcessor"
  ],
  "image_encoder": [
    null,
    null
  ],
  "requires_safety_checker": true,
  "safety_checker": [
    null,
    null
  ],
  "scheduler": [
    "diffusers",
    "PNDMScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}



In [22]:
# Step 3 - Test Base Model
prompt = "seamless knitted textile pattern, flat surface"

image = pipe(
    prompt=prompt,
    num_inference_steps=30,
    guidance_scale=7.5
).images[0]

image.show()
# if image is showing then everything is working

  0%|          | 0/30 [00:00<?, ?it/s]

In [32]:
# Step 4 - Create Dataset Loader class
class KnitDataset(Dataset):
    def __init__(self, image_dir, caption_dir):
        self.image_dir = image_dir
        self.caption_dir = caption_dir

        self.images = sorted([
            f for f in os.listdir(image_dir)
            if f.endswith(".png") or f.endswith(".jpg")
        ])

        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor()
        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]

        img_path = os.path.join(self.image_dir, img_name)
        img = Image.open(img_path).convert("RGB")
        img = self.transform(img)

        caption_path = os.path.join(
            self.caption_dir,
            img_name.replace(".png", ".txt")
        )

        with open(caption_path, "r") as f:
            caption = f.read().strip()

        return img, caption
print("Loader class:", img)

Loader class: tensor([[[[0.6863, 0.6863, 0.6863,  ..., 0.6863, 0.6863, 0.6863],
          [0.6863, 0.6863, 0.6863,  ..., 0.6863, 0.6863, 0.6863],
          [0.6863, 0.6863, 0.6863,  ..., 0.6863, 0.6863, 0.6863],
          ...,
          [0.8431, 0.8431, 0.8431,  ..., 0.8431, 0.8431, 0.8431],
          [0.8431, 0.8431, 0.8431,  ..., 0.8431, 0.8431, 0.8431],
          [0.8431, 0.8431, 0.8431,  ..., 0.8431, 0.8431, 0.8431]],

         [[0.8510, 0.8510, 0.8510,  ..., 0.8510, 0.8510, 0.8510],
          [0.8510, 0.8510, 0.8510,  ..., 0.8510, 0.8510, 0.8510],
          [0.8510, 0.8510, 0.8510,  ..., 0.8510, 0.8510, 0.8510],
          ...,
          [0.9255, 0.9255, 0.9255,  ..., 0.9255, 0.9255, 0.9255],
          [0.9255, 0.9255, 0.9255,  ..., 0.9255, 0.9255, 0.9255],
          [0.9255, 0.9255, 0.9255,  ..., 0.9255, 0.9255, 0.9255]],

         [[1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
          [1.0000, 1.0000, 1

In [33]:
# STEP 5 - Load Dataset
dataset = KnitDataset("dataset", "dataset/captions")
print("data loaded", dataset)

data loaded <__main__.KnitDataset object at 0x7f3a305fb230>


In [34]:
# Step 6 - Attach LoRA Adapters
lora_config = LoraConfig(
    r=8,                         # rank
    lora_alpha=16,
    target_modules=["to_q", "to_k", "to_v", "to_out.0"],  # attention layers
    lora_dropout=0.1,
    bias="none",
)

pipe.unet = get_peft_model(pipe.unet, lora_config)
pipe.unet.print_trainable_parameters()

print("attach succesfully LoRA this keep original weights frozen & adds small trainble adapters & allow learning new concepts(knits)")

trainable params: 1,594,368 || all params: 861,115,332 || trainable%: 0.1852
attach succesfully LoRA this keep original weights frozen & adds small trainble adapters & allow learning new concepts(knits)


In [35]:
# STEP 7 - Optimizer
optimizer = torch.optim.AdamW(
    pipe.unet.parameters(),
    lr=1e-4
)
print("adding optimizer")

adding optimizer


In [36]:
# STEP 8 - Training Loop
pipe.unet.train()

for epoch in range(3):
    print(f"\nEpoch {epoch+1}")

    for img, text in dataset:
        img = img.unsqueeze(0)

        latents = pipe.vae.encode(img).latent_dist.sample()
        latents = latents * 0.18215

        noise = torch.randn_like(latents)
        timesteps = torch.randint(0, 1000, (1,), device=latents.device).long()

        noisy_latents = pipe.scheduler.add_noise(latents, noise, timesteps)

        text_inputs = pipe.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        encoder_hidden_states = pipe.text_encoder(
            text_inputs.input_ids
        )[0]

        noise_pred = pipe.unet(
            noisy_latents,
            timesteps,
            encoder_hidden_states
        ).sample

        loss = torch.nn.functional.mse_loss(noise_pred, noise)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print("Loss:", loss.item())



Epoch 1
Loss: 0.12019162625074387

Epoch 2
Loss: 0.032951563596725464

Epoch 3
Loss: 0.12864385545253754


In [41]:
# Step 9 - Save LoRA Weights
pipe.unet.save_pretrained("knitfr_lora")
print("saving model")

saving model


In [None]:
# Step 10 - Use Trained Model
#pipe.load_lora_weights("knit_lora_front", prefix=None)
pipe.unet = PeftModel.from_pretrained(pipe.unet, "knitfr_lora")
pipe.unet.eval()

image = pipe(
    "<knitstyle> flat knit fabric texture, seamless textile, top view",
    negative_prompt="person, human, body, mannequin, model, face, arms",
    num_inference_steps=40,
    guidance_scale=8.5
).images[0]

image.show()




  0%|          | 0/40 [00:00<?, ?it/s]