In [1]:
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import torch

model_name = "runwayml/stable-diffusion-v1-5"

pipeline = StableDiffusionPipeline.from_pretrained(
    model_name,
    torch_dtype=torch.float16
).to("cuda")

# Replace scheduler for faster sampling later
pipeline.scheduler = DPMSolverMultistepScheduler.from_config(pipeline.scheduler.config)




Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!


In [2]:
import os

for p in ["HF_HOME","HUGGINGFACE_HUB_CACHE","TRANSFORMERS_CACHE","XDG_CACHE_HOME"]:
    print(p, "=", os.getenv(p))


HF_HOME = D:\HF_MODELS
HUGGINGFACE_HUB_CACHE = D:\HF_MODELS
TRANSFORMERS_CACHE = D:\HF_MODELS
XDG_CACHE_HOME = D:\HF_MODELS


In [3]:
pipeline.save_pretrained("my_custom_sd15")

In [None]:
from torchvision import transforms
from datasets import load_dataset
from torch.utils.data import DataLoader
import torch

dataset = load_dataset("imagefolder", data_dir=r"D:\SEBASTIAN\AOL_AI\condition_classifier\archive")
label_names = dataset["train"].features["label"].names

# 2. Define Image Transforms
train_resize = transforms.Compose([
    transforms.Resize(512, interpolation=transforms.InterpolationMode.BILINEAR),
    transforms.CenterCrop(512),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5]), 
])

# 3. Define the Corrected Preprocessor
def preprocess_for_model(examples):
    # Image processing
    images = [image.convert("RGB") for image in examples["image"]]
    examples["pixel_values"] = [train_resize(image) for image in images]

    # Text processing (The part that was breaking)
    prompts = []
    for label_idx in examples["label"]:
        raw_label = label_names[label_idx]
        
        if raw_label == "brokenphone":
            p = "a photo of a broken smartphone screen with cracks"
        elif raw_label == "phone":
            p = "a photo of a clean smartphone screen in good condition"
        else:
            p = f"a photo of a {raw_label}"
        prompts.append(p)

    # Tokenize: WE USE THE LIST 'prompts', NOT examples["prompt"]
    examples["input_ids"] = pipeline.tokenizer(
        prompts, 
        padding="max_length", 
        truncation=True, 
        max_length=pipeline.tokenizer.model_max_length,
        return_tensors="pt"
    ).input_ids
    
    return examples

# 4. Apply Transform
train_dataset = dataset["train"].with_transform(preprocess_for_model)

# 5. RE-CREATE THE DATALOADER (Crucial Step!)
# If you don't run this, the loop uses the old broken dataset
def collate_fn(examples):
    pixel_values = torch.stack([ex["pixel_values"] for ex in examples])
    input_ids = torch.stack([ex["input_ids"] for ex in examples])
    return {"pixel_values": pixel_values, "input_ids": input_ids}

train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True, collate_fn=collate_fn)

‚ôªÔ∏è  Reloading dataset...


Resolving data files:   0%|          | 0/48 [00:00<?, ?it/s]

‚úÖ COMPLETE RESET. You can now run the Training Loop safely.


In [5]:
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from peft import LoraConfig
import torch
from torch.optim import AdamW

unet = pipeline.unet

# ---------------------------------------------------------
# NEW: Configure LoRA using PEFT (The modern way)
# ---------------------------------------------------------
# We freeze the UNet first
unet.requires_grad_(False)

# Define the LoRA configuration
lora_config = LoraConfig(
    r=8,                        # Rank (Same as your lora_rank)
    lora_alpha=8,               # Alpha usually equals rank or rank*2
    init_lora_weights="gaussian",
    target_modules=["to_k", "to_q", "to_v", "to_out.0"], # Target attention layers
)

# Attach the LoRA adapter to the UNet
# This automatically handles the layer injection you were trying to do manually
unet.add_adapter(lora_config)

# ---------------------------------------------------------
# Set up the Optimizer
# ---------------------------------------------------------
# The add_adapter method automatically sets requires_grad=True 
# for the LoRA layers. We just need to filter them for the optimizer.

lora_layers = filter(lambda p: p.requires_grad, unet.parameters())

# Check if we actually found parameters
trainable_params = list(lora_layers)

optimizer = AdamW(trainable_params, lr=1e-4)

print(f"\nüöÄ Trainable LoRA parameters: {sum(p.numel() for p in trainable_params):,}")
print(f"üß© LoRA layers active")


üöÄ Trainable LoRA parameters: 1,594,368
üß© LoRA layers active


In [6]:
from torch.optim import AdamW

# 1. FORCE UNFREEZE LoRA LAYERS
# We manually iterate through the model to fix the 'requires_grad' status
trainable_count = 0

print("üîß Scanning model to fix LoRA layers...")

for name, param in unet.named_parameters():
    # If the layer belongs to LoRA, we unlock it
    if "lora" in name:
        param.requires_grad = True
        # also cast to float32 to fix the 'unscale FP16' error
        param.data = param.data.to(torch.float32) 
        trainable_count += 1
    else:
        # Ensure everything else is frozen
        param.requires_grad = False

print(f"‚úÖ Found and unfrozen {trainable_count} LoRA layers.")

# 2. CREATE OPTIMIZER (Safely)
# We convert to a list first to ensure it's not empty before passing to AdamW
params_to_train = list(filter(lambda p: p.requires_grad, unet.parameters()))

if len(params_to_train) == 0:
    raise ValueError("‚ö†Ô∏è CRITICAL: LoRA adapter is missing! Please re-run the 'unet.add_adapter' block.")

optimizer = AdamW(params_to_train, lr=1e-4)
print(f"üöÄ Optimizer ready with {len(params_to_train)} tensors to update.")

üîß Scanning model to fix LoRA layers...
‚úÖ Found and unfrozen 256 LoRA layers.
üöÄ Optimizer ready with 256 tensors to update.


In [7]:
for param in unet.parameters():
    if param.requires_grad:
        param.data = param.data.to(torch.float32)

In [8]:
import diffusers
print(diffusers.__version__)


0.35.2


In [10]:
from diffusers import DDPMScheduler

# Define the scheduler needed for training
noise_scheduler = DDPMScheduler.from_config(pipeline.scheduler.config)

print("‚úÖ Scheduler is defined. Now run the training loop.")

‚úÖ Scheduler is defined. Now run the training loop.


In [None]:
from diffusers import AutoencoderKL
from torchvision import transforms
import torch
import tqdm
from torch.cuda.amp import GradScaler
from torch.optim import AdamW
import torch.nn.functional as F

device = "cuda"

vae = pipeline.vae.to(device).eval()
unet = pipeline.unet.to(device).train()  # train mode but frozen weights except LoRA



weight_dtype = torch.float16  # We use mixed precision to save memory

# Move models to GPU
unet.to(device)
vae.to(device, dtype=weight_dtype)
text_encoder = pipeline.text_encoder.to(device, dtype=weight_dtype)

# Ensure only LoRA params are training
unet.train()
vae.eval()
text_encoder.eval()

# Re-define optimizer just in case (Standard LoRA settings)
# We filter for parameters that have requires_grad=True (which is just the LoRA layers)
trainable_params = filter(lambda p: p.requires_grad, unet.parameters())
optimizer = AdamW(trainable_params, lr=1e-4)

# Scaler prevents "Underflow" errors in float16 training
scaler = GradScaler()

# --- 2. TRAINING LOOP ---
num_epochs = 5  # You can increase this if results aren't strong enough
global_step = 0

print(f"üöÄ Starting Training for {num_epochs} epochs...")

for epoch in range(num_epochs):
    # Wrap dataloader in tqdm for a progress bar
    progress_bar = tqdm.tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs}")
    
    for batch in progress_bar:
        optimizer.zero_grad()

        # A. PREPARE DATA (Images -> Latents)
        # Move pixels to GPU and cast to float16
        pixel_values = batch["pixel_values"].to(device, dtype=weight_dtype)
        
        with torch.no_grad():
            # Compress image to latents (64x64)
            latents = vae.encode(pixel_values).latent_dist.sample()
            latents = latents * vae.config.scaling_factor # 0.18215

            # Encode text prompts
            input_ids = batch["input_ids"].to(device)
            encoder_hidden_states = text_encoder(input_ids)[0]

        # B. NOISE GENERATION
        # Sample noise to add to the latents
        noise = torch.randn_like(latents)
        bsz = latents.shape[0]
        
        # Sample random timestep (how much noise to add)
        timesteps = torch.randint(
            0, noise_scheduler.config.num_train_timesteps, (bsz,), device=device
        ).long()

        # Add noise to latents (Forward Diffusion)
        noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

        # C. PREDICTION & LOSS
        # Use autocast for mixed precision (speed + memory safe)
        with torch.cuda.amp.autocast():
            # Predict the noise residual
            model_pred = unet(
                noisy_latents, 
                timesteps, 
                encoder_hidden_states
            ).sample
            
            # Calculate loss (Compare predicted noise vs actual noise)
            loss = F.mse_loss(model_pred.float(), noise.float(), reduction="mean")

        # D. BACKPROPAGATION
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        # Update progress bar
        progress_bar.set_postfix(loss=loss.item())
        global_step += 1

  scaler = GradScaler()


üöÄ Starting Training for 5 epochs...


  with torch.cuda.amp.autocast():
Epoch 1/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 48/48 [00:26<00:00,  1.78it/s, loss=0.0968] 
Epoch 2/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 48/48 [00:26<00:00,  1.84it/s, loss=0.0203] 
Epoch 3/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 48/48 [00:28<00:00,  1.68it/s, loss=0.00567]
Epoch 4/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 48/48 [00:29<00:00,  1.60it/s, loss=0.114]  
Epoch 5/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 48/48 [00:29<00:00,  1.62it/s, loss=0.0389] 

‚úÖ Training Finished!





In [12]:
from peft.utils import get_peft_model_state_dict

# 1. Extract the trained LoRA weights explicitly
# Since we used 'add_adapter', we need to grab the weights manually from the unet
unet_lora_layers = get_peft_model_state_dict(unet)

# 2. Save using the pipeline helper
save_path = "my_custom_lora_weights"

pipeline.save_lora_weights(
    save_directory=save_path,
    unet_lora_layers=unet_lora_layers,
    safe_serialization=True
)

print(f"üíæ Success! LoRA weights saved to folder: {save_path}")

üíæ Success! LoRA weights saved to folder: my_custom_lora_weights


In [13]:
label_names

['brokenphone', 'phone']

In [14]:
# Load the weights we just saved
pipeline.load_lora_weights("my_custom_lora_weights")

# 2. Setup Output Folder
output_dir = "synthetic_data"
os.makedirs(output_dir, exist_ok=True)

def generate_batch(class_name, prompt, total_count=500, batch_size=4):
    print(f"Generating {total_count} images for '{class_name}'...")
    
    # Calculate how many batches we need
    num_batches = total_count // batch_size
    
    for i in range(num_batches):
        # Create a list of prompts for the batch (e.g., 4 prompts at once)
        batch_prompts = [prompt] * batch_size
        
        # Run inference
        with torch.no_grad():
            images = pipeline(
                batch_prompts,
                num_inference_steps=30,
                guidance_scale=7.5
            ).images
        
        # Save images immediately and free memory
        for idx, img in enumerate(images):
            # Unique filename: ClassName_BatchNum_ImageNum.png
            filename = f"{output_dir}/{class_name}_{i}_{idx}.png"
            img.save(filename)
            
    print(f"Finished generating {class_name}")

# 3. Run Generation
# Use the exact class names from your training data
# Adjust batch_size=1 if you get CUDA Out of Memory
generate_batch(
    class_name="brokenphone", 
    prompt="a photo of a broken smartphone screen with cracks", 
    total_count=200, 
    batch_size=4
)

# 2. For the Good class
generate_batch(
    class_name="phone", 
    prompt="a photo of a clean smartphone screen in good condition", 
    total_count=200, 
    batch_size=4
)



üöÄ Generating 200 images for 'brokenphone'...


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

‚úÖ Finished generating brokenphone
üöÄ Generating 200 images for 'phone'...


  0%|          | 0/30 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

‚úÖ Finished generating phone


In [15]:
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch
import shutil

# Setup CLIP
print("‚è≥ Loading CLIP for filtering...")
clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to("cuda")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")

def get_clip_score(image, prompt):
    inputs = clip_processor(
        text=[prompt], 
        images=image, 
        return_tensors="pt", 
        padding=True
    ).to("cuda")
    
    with torch.no_grad():
        outputs = clip_model(**inputs)
    
    # This acts as a similarity score (higher is better)
    return outputs.logits_per_image.item()

# Setup paths
source_folder = "synthetic_data"
filtered_folder = "filtered_data"
os.makedirs(filtered_folder, exist_ok=True)

threshold = 22 # CLIP scores usually range 20-30 for good matches. Start strict.

print("üîç Starting Filtering Process...")

for img_name in os.listdir(source_folder):
    if not img_name.endswith(".png"): continue
    
    img_path = os.path.join(source_folder, img_name)
    
    try:
        image = Image.open(img_path).convert("RGB")
        
        # Extract class name from filename (assuming format "ClassA_0_1.png")
        class_name = img_name.split("_")[0] 
        prompt = f"a photo of {class_name}"
        
        score = get_clip_score(image, prompt)
        
        if score > threshold:
            # Copy good images to the new folder
            shutil.copy(img_path, os.path.join(filtered_folder, img_name))
            # print(f"‚úÖ Kept {img_name} (Score: {score:.2f})")
        else:
            # print(f"‚ùå Dropped {img_name} (Score: {score:.2f})")
            pass

    except Exception as e:
        print(f"Error processing {img_name}: {e}")

print(f"üéâ Filtering complete! Check the '{filtered_folder}' directory.")

‚è≥ Loading CLIP for filtering...


config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/1.71G [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


preprocessor_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/905 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/389 [00:00<?, ?B/s]

üîç Starting Filtering Process...
üéâ Filtering complete! Check the 'filtered_data' directory.


[]

dict_keys(['down_blocks.0.attentions.0.transformer_blocks.0.attn1.processor', 'down_blocks.0.attentions.0.transformer_blocks.0.attn2.processor', 'down_blocks.0.attentions.1.transformer_blocks.0.attn1.processor', 'down_blocks.0.attentions.1.transformer_blocks.0.attn2.processor', 'down_blocks.1.attentions.0.transformer_blocks.0.attn1.processor', 'down_blocks.1.attentions.0.transformer_blocks.0.attn2.processor', 'down_blocks.1.attentions.1.transformer_blocks.0.attn1.processor', 'down_blocks.1.attentions.1.transformer_blocks.0.attn2.processor', 'down_blocks.2.attentions.0.transformer_blocks.0.attn1.processor', 'down_blocks.2.attentions.0.transformer_blocks.0.attn2.processor', 'down_blocks.2.attentions.1.transformer_blocks.0.attn1.processor', 'down_blocks.2.attentions.1.transformer_blocks.0.attn2.processor', 'up_blocks.1.attentions.0.transformer_blocks.0.attn1.processor', 'up_blocks.1.attentions.0.transformer_blocks.0.attn2.processor', 'up_blocks.1.attentions.1.transformer_blocks.0.attn1.pr