In [1]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
steubk_wikiart_path = kagglehub.dataset_download('steubk/wikiart')

print('Data source import complete.')


Using Colab cache for faster access to the 'wikiart' dataset.
Data source import complete.


In [2]:
import os
import json
from PIL import Image
from tqdm.auto import tqdm

# --- 1. Cấu hình ---
BASE_INPUT_DIR = "/kaggle/input/wikiart"

BASE_OUTPUT_DIR = "/kaggle/working/lora_dataset"

# Kích thước ảnh mong muốn (như trong kế hoạch)
IMAGE_SIZE = 512

# Số lượng ảnh lấy từ mỗi style
NUM_IMAGES_PER_STYLE = 100

# Định nghĩa 5 style và caption "kích hoạt"
# Bạn có thể đổi caption nếu muốn
STYLES_TO_PROCESS = {
    # Tên thư mục gốc : "Caption bạn muốn dùng để trigger"
    "Contemporary_Realism": "a painting in contemporary realism style",
    "New_Realism": "a painting in new realism style",
    "Synthetic_Cubism": "a painting in synthetic cubism style",
    "Analytical_Cubism": "a painting in analytical cubism style",
    "Action_painting": "a painting in action painting style",
}

print(f"Bắt đầu xử lý dataset cho {len(STYLES_TO_PROCESS)} styles...")
print(f"Output sẽ được lưu tại: {BASE_OUTPUT_DIR}")
print(f"Kích thước ảnh: {IMAGE_SIZE}x{IMAGE_SIZE}")
print(f"Số lượng ảnh/style: {NUM_IMAGES_PER_STYLE}")
print("="*40)


# --- 2. Hàm xử lý ảnh (Resize và Center Crop) ---
def process_image(img, target_size=512):
    """
    Resize ảnh sao cho cạnh ngắn nhất bằng target_size,
    sau đó center crop về target_size x target_size.
    """
    img = img.convert("RGB") # Đảm bảo ảnh là RGB

    # Tính toán kích thước mới
    width, height = img.size
    short, long = (width, height) if width < height else (height, width)

    new_short = target_size
    new_long = int(target_size * long / short)
    new_size = (new_short, new_long) if width < height else (new_long, new_short)

    # Resize
    img = img.resize(new_size, Image.Resampling.LANCZOS)

    # Center Crop
    left = (new_size[0] - target_size) / 2
    top = (new_size[1] - target_size) / 2
    right = (new_size[0] + target_size) / 2
    bottom = (new_size[1] + target_size) / 2

    img = img.crop((left, top, right, bottom))
    return img


# --- 3. Vòng lặp xử lý chính ---
for style_folder_name, style_caption in STYLES_TO_PROCESS.items():

    print(f"\nĐang xử lý style: {style_folder_name}")

    # 1. Tạo thư mục nguồn và đích
    source_dir = os.path.join(BASE_INPUT_DIR, style_folder_name)
    dest_dir = os.path.join(BASE_OUTPUT_DIR, style_folder_name)
    os.makedirs(dest_dir, exist_ok=True)

    # 2. Kiểm tra thư mục nguồn
    if not os.path.exists(source_dir):
        print(f"  [CẢNH BÁO] Không tìm thấy thư mục nguồn, bỏ qua: {source_dir}")
        continue

    # 3. Lấy danh sách ảnh
    try:
        image_files = [f for f in os.listdir(source_dir)
                       if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

        if not image_files:
            print(f"  [CẢNH BÁO] Không tìm thấy ảnh nào trong: {source_dir}")
            continue

        print(f"  Tìm thấy {len(image_files)} ảnh. Đang chọn {NUM_IMAGES_PER_STYLE} ảnh...")
        images_to_process = image_files[:NUM_IMAGES_PER_STYLE]

    except Exception as e:
        print(f"  [LỖI] Không thể đọc thư mục: {e}")
        continue

    # 4. Xử lý ảnh và tạo metadata
    metadata_list = [] # Lưu metadata cho file .jsonl

    for i, file_name in enumerate(tqdm(images_to_process, desc=f"  Xử lý {style_folder_name}")):
        source_path = os.path.join(source_dir, file_name)

        # Đặt tên file mới cho sạch
        new_file_name = f"style_{i:04d}.png"
        dest_path = os.path.join(dest_dir, new_file_name)

        try:
            with Image.open(source_path) as img:
                # Xử lý (resize/crop)
                processed_img = process_image(img, IMAGE_SIZE)

                # Lưu ảnh đã xử lý (dưới dạng PNG cho nhất quán)
                processed_img.save(dest_path, "PNG")

                # Thêm vào danh sách metadata
                metadata_entry = {
                    "file_name": new_file_name,
                    "text": style_caption
                }
                metadata_list.append(metadata_entry)

        except Exception as e:
            print(f"  [LỖI] Xử lý thất bại ảnh {file_name}: {e}")

    # 5. Ghi file metadata.jsonl
    metadata_path = os.path.join(dest_dir, "metadata.jsonl")
    try:
        with open(metadata_path, 'w', encoding='utf-8') as f:
            for entry in metadata_list:
                f.write(json.dumps(entry) + '\n')
        print(f"  Đã xử lý {len(metadata_list)} ảnh. Đã lưu metadata tại: {metadata_path}")
    except Exception as e:
        print(f"  [LỖI] Không thể ghi file metadata: {e}")

print("\n" + "="*40)
print("Xử lý toàn bộ dataset thành công!")
print(f"Dataset của bạn đã sẵn sàng tại: {BASE_OUTPUT_DIR}")

Bắt đầu xử lý dataset cho 5 styles...
Output sẽ được lưu tại: /kaggle/working/lora_dataset
Kích thước ảnh: 512x512
Số lượng ảnh/style: 100

Đang xử lý style: Contemporary_Realism
  Tìm thấy 481 ảnh. Đang chọn 100 ảnh...


  Xử lý Contemporary_Realism:   0%|          | 0/100 [00:00<?, ?it/s]

  Đã xử lý 100 ảnh. Đã lưu metadata tại: /kaggle/working/lora_dataset/Contemporary_Realism/metadata.jsonl

Đang xử lý style: New_Realism
  Tìm thấy 314 ảnh. Đang chọn 100 ảnh...


  Xử lý New_Realism:   0%|          | 0/100 [00:00<?, ?it/s]

  Đã xử lý 100 ảnh. Đã lưu metadata tại: /kaggle/working/lora_dataset/New_Realism/metadata.jsonl

Đang xử lý style: Synthetic_Cubism
  Tìm thấy 216 ảnh. Đang chọn 100 ảnh...


  Xử lý Synthetic_Cubism:   0%|          | 0/100 [00:00<?, ?it/s]

  Đã xử lý 100 ảnh. Đã lưu metadata tại: /kaggle/working/lora_dataset/Synthetic_Cubism/metadata.jsonl

Đang xử lý style: Analytical_Cubism
  Tìm thấy 110 ảnh. Đang chọn 100 ảnh...


  Xử lý Analytical_Cubism:   0%|          | 0/100 [00:00<?, ?it/s]

  Đã xử lý 100 ảnh. Đã lưu metadata tại: /kaggle/working/lora_dataset/Analytical_Cubism/metadata.jsonl

Đang xử lý style: Action_painting
  Tìm thấy 98 ảnh. Đang chọn 100 ảnh...


  Xử lý Action_painting:   0%|          | 0/98 [00:00<?, ?it/s]

  Đã xử lý 98 ảnh. Đã lưu metadata tại: /kaggle/working/lora_dataset/Action_painting/metadata.jsonl

Xử lý toàn bộ dataset thành công!
Dataset của bạn đã sẵn sàng tại: /kaggle/working/lora_dataset


In [3]:
# !pip install xformers

In [4]:
import wandb
import torch
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler, AutoPipelineForText2Image
from huggingface_hub import model_info

In [5]:
!pip install git+https://github.com/huggingface/diffusers
!pip install accelerate wand
!pip install -r https://raw.githubusercontent.com/huggingface/diffusers/main/examples/text_to_image/requirements.txt

!accelerate config default
# accelerate configuration saved at $HOME/.cache/huggingface/accelerate/default_config.yaml



Collecting git+https://github.com/huggingface/diffusers
  Cloning https://github.com/huggingface/diffusers to /tmp/pip-req-build-j1r3i009
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/diffusers /tmp/pip-req-build-j1r3i009
  Resolved https://github.com/huggingface/diffusers to commit 01a56927f1603f1e89d1e5ada74d2aa75da2d46b
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: diffusers
  Building wheel for diffusers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for diffusers: filename=diffusers-0.36.0.dev0-py3-none-any.whl size=4460436 sha256=f8b7756f82d155413f1af287bd5f18071f67ad7d9338377e9e8fa75f15b1b29b
  Stored in directory: /tmp/pip-ephem-wheel-cache-7src0ppo/wheels/90/d4/44/a58bc00fb405fefb633b0d9d2307f6e3aec6cc1775d82555d3
Successfully built diffusers
Installing collected packa

Collecting wand
  Downloading Wand-0.6.13-py2.py3-none-any.whl.metadata (4.0 kB)
Downloading Wand-0.6.13-py2.py3-none-any.whl (143 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.8/143.8 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: wand
Successfully installed wand-0.6.13
Collecting ftfy (from -r https://raw.githubusercontent.com/huggingface/diffusers/main/examples/text_to_image/requirements.txt (line 5))
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading ftfy-6.3.1-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.8/44.8 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ftfy
Successfully installed ftfy-6.3.1
accelerate configuration saved at /root/.cache/huggingface/accelerate/default_config.yaml


In [6]:
!wget https://raw.githubusercontent.com/huggingface/diffusers/main/examples/text_to_image/train_text_to_image_lora.py -O /kaggle/working/train_text_to_image_lora.py

--2025-11-16 09:40:53--  https://raw.githubusercontent.com/huggingface/diffusers/main/examples/text_to_image/train_text_to_image_lora.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 41276 (40K) [text/plain]
Saving to: ‘/kaggle/working/train_text_to_image_lora.py’


2025-11-16 09:40:53 (5.59 MB/s) - ‘/kaggle/working/train_text_to_image_lora.py’ saved [41276/41276]



In [None]:
!accelerate launch /kaggle/working/train_text_to_image_lora.py \
 --pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5 \
 --dataset_name=/kaggle/working/lora_dataset/Contemporary_Realism \
 --output_dir=/kaggle/working/lora_models/Contemporary_Realism \
 --caption_column=text \
 --resolution=512 \
 --center_crop \
 --random_flip \
 --train_batch_size=2 \
 --gradient_accumulation_steps=4 \
 --max_train_steps=1500 \
 --learning_rate=1e-4 \
 --lr_scheduler=cosine \
 --lr_warmup_steps=0 \
 --max_grad_norm=1 \
 --rank=4 \
 --mixed_precision=fp16 \
 --gradient_checkpointing \
#  --enable_xformers_memory_efficient_attention \
 --report_to=tensorboard \
 --seed=42 \
 --checkpointing_steps=500

2025-11-16 09:41:21.037196: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763286081.056874    2060 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763286081.062784    2060 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763286081.077618    2060 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1763286081.077647    2060 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1763286081.077651    2060 computation_placer.cc:177] computation placer alr