In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os
import json
import base64

# -----------------------------------------------
# 1. Set the folder paths
# -----------------------------------------------
# Folder containing your original images
INPUT_FOLDER = "/content/drive/MyDrive/new dataset/new dataset"
# Output JSON file path
JSON_OUTPUT = "/content/dataset.json"

# -----------------------------------------------
# 2. Create a dictionary to hold file names and image data
# -----------------------------------------------
image_data_dict = {}

# Loop through each file in the input folder
for filename in os.listdir(INPUT_FOLDER):
    # Check if the file is an image by extension
    if not any(filename.lower().endswith(ext) for ext in [".png", ".jpg", ".jpeg", ".bmp", ".gif"]):
        continue

    file_path = os.path.join(INPUT_FOLDER, filename)

    # Read the image in binary mode and encode it in base64
    with open(file_path, "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read()).decode("utf-8")

    # Save the encoded string in the dictionary with the filename as the key
    image_data_dict[filename] = encoded_string

# -----------------------------------------------
# 3. Write the dictionary to a JSON file
# -----------------------------------------------
with open(JSON_OUTPUT, "w") as json_file:
    json.dump(image_data_dict, json_file, indent=4)

print(f"JSON file saved at: {JSON_OUTPUT}")


JSON file saved at: /content/dataset.json


In [None]:
import os
import cv2
import numpy as np
from albumentations import (
    Compose, HorizontalFlip, RandomRotate90, ShiftScaleRotate,
    RandomBrightnessContrast, Resize
)
from PIL import Image

# Define paths
INPUT_FOLDER = "/content/drive/MyDrive/new dataset/new dataset"  # Folder with original images
AUGMENTED_FOLDER = "/content/floorplan_augmented"    # Folder to store augmented images

# Create output folder if it doesn't exist
os.makedirs(AUGMENTED_FOLDER, exist_ok=True)

# Define augmentation pipeline: resize to 512x512, then apply random augmentations
augmentations = Compose([
    Resize(512, 512),                   # Resize to 512x512
    HorizontalFlip(p=0.5),               # Random horizontal flip
    RandomRotate90(p=0.5),               # Random rotation by 90 degrees
    ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.5),
    RandomBrightnessContrast(p=0.5)
])

# Function to save images using PIL
def save_image(image_array, save_path):
    try:
        image_pil = Image.fromarray(image_array)
        image_pil.save(save_path)
        print(f"✅ Saved: {save_path}")
    except Exception as e:
        print(f"❌ Error saving {save_path}: {e}")

# Function to augment a single image
def augment_image(image_path, save_path, num_augmented=3):
    # Check if image exists and is readable
    image = cv2.imread(image_path)
    if image is None:
        print(f"❌ Error loading image: {image_path}")
        return

    # Convert BGR (OpenCV format) to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    filename, ext = os.path.splitext(os.path.basename(image_path))

    # Save the original image (resized)
    resized = augmentations(image=image)['image']
    orig_save_path = os.path.join(save_path, f"{filename}_orig{ext}")
    save_image(resized, orig_save_path)

    # Generate and save augmented versions
    for i in range(num_augmented):
        augmented = augmentations(image=image)['image']
        aug_save_path = os.path.join(save_path, f"{filename}_aug{i+1}{ext}")
        save_image(augmented, aug_save_path)

# Process all images in the input folder
image_files = [f for f in os.listdir(INPUT_FOLDER) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))]

if not image_files:
    print("❌ No images found in the input folder. Check the path.")
else:
    print(f"📸 Found {len(image_files)} images. Starting augmentation...")

    for img_file in image_files:
        img_path = os.path.join(INPUT_FOLDER, img_file)
        augment_image(img_path, AUGMENTED_FOLDER, num_augmented=3)

    print("✅ Data augmentation completed! Augmented images are saved in:", AUGMENTED_FOLDER)


📸 Found 13 images. Starting augmentation...
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1c_orig.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1c_aug1.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1c_aug2.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1c_aug3.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1e_orig.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1e_aug1.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1e_aug2.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1e_aug3.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1k_orig.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1k_aug1.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1k_aug2.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1k_aug3.jpg
✅ Saved: /content/floorplan_augmented/apartment-floor-plan-1f_orig.jpg
✅ Saved: /content/floorplan_augme

In [None]:
import os
os.environ["HUGGINGFACE_HUB_HTTP_TIMEOUT"] = "60"  # Increase timeout to 60 seconds


In [None]:
controlnet = ControlNetModel.from_pretrained(
    "lllyasviel/sd-controlnet-canny",
    torch_dtype=torch.float16,
    local_files_only=True  # Use local cache only
)


NameError: name 'ControlNetModel' is not defined

In [None]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Compose, ToTensor, Normalize, Resize
from PIL import Image
from diffusers import ControlNetModel, StableDiffusionPipeline, DDPMScheduler
import torch.nn.functional as F
from torch.cuda.amp import autocast, GradScaler

# ----------------------------
# 1. Custom Dataset Definition
# ----------------------------
AUGMENTED_FOLDER = "/content/floorplan_augmented"

class FloorPlanDatasetFolder(Dataset):
    def __init__(self, folder, transform=None):
        self.folder = folder

        # Check if the folder exists
        if not os.path.exists(folder) or not os.path.isdir(folder):
            raise ValueError(f"Dataset folder '{folder}' does not exist or is not a directory.")

        # Get list of image files
        self.filenames = [
            f for f in os.listdir(folder)
            if f.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif'))
        ]

        # Check if dataset is empty
        if len(self.filenames) == 0:
            raise ValueError(f"No valid image files found in '{folder}'. Please check the dataset.")

        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        filename = self.filenames[idx]
        image_path = os.path.join(self.folder, filename)

        # Load image safely
        try:
            image = Image.open(image_path).convert("RGB")
        except Exception as e:
            raise ValueError(f"Error loading image {image_path}: {e}")

        if self.transform:
            image = self.transform(image)

        prompt = f"Floor plan: {os.path.splitext(filename)[0]}"
        return {"image": image, "prompt": prompt}

# ----------------------------
# 2. Image Transformations
# ----------------------------
transform = Compose([
    Resize((512, 512)),
    ToTensor(),
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Try to create the dataset and print some debug info
try:
    dataset = FloorPlanDatasetFolder(folder=AUGMENTED_FOLDER, transform=transform)
    print(f"Loaded dataset with {len(dataset)} images.")
except ValueError as e:
    print(f"Dataset Error: {e}")
    dataset = None  # Avoid crashing later

# Only create a DataLoader if the dataset is valid
if dataset:
    dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
else:
    raise RuntimeError("Dataset could not be loaded. Check dataset folder and file format.")

# ----------------------------
# 3. Load Pre-trained Models and Scheduler
# ----------------------------
# Load models in float32 instead of float16 for training
controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-canny")
pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet
)
pipe = pipe.to("cuda")

# Use DDPM noise scheduler
noise_scheduler = DDPMScheduler(
    beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000
)

# ----------------------------
# 4. Fine-Tuning with Stable Diffusion Loss (Fixed)
# ----------------------------
optimizer = torch.optim.AdamW(pipe.unet.parameters(), lr=5e-6)  # Reduced LR for stability
scaler = GradScaler()  # Helps with mixed precision training

num_epochs = 1  # Adjust as needed
pipe.unet.train()
pipe.vae.eval()
pipe.text_encoder.eval()

for epoch in range(num_epochs):
    for batch_idx, batch in enumerate(dataloader):
        images = batch["image"].to("cuda")
        prompts = batch["prompt"]

        # ----------------------------
        # a. Encode Images into Latents with Autocast
        # ----------------------------
        with torch.no_grad(), autocast(enabled=True):
            latents = pipe.vae.encode(images).latent_dist.sample()
            latents = latents * pipe.vae.config.scaling_factor

        # ----------------------------
        # b. Sample Noise and Timesteps
        # ----------------------------
        noise = torch.randn_like(latents).to("cuda")
        batch_size = latents.shape[0]
        timesteps = torch.randint(0, noise_scheduler.num_train_timesteps, (batch_size,), device="cuda").long()

        # Add noise to latents
        noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)

        # ----------------------------
        # c. Tokenize Prompts and Get Text Embeddings
        # ----------------------------
        tokenized = pipe.tokenizer(
            prompts, padding="max_length", truncation=True, max_length=77, return_tensors="pt"
        )
        input_ids = tokenized.input_ids.to("cuda")
        with torch.no_grad(), autocast(enabled=True):
            text_embeddings = pipe.text_encoder(input_ids).last_hidden_state

        # ----------------------------
        # d. Predict the Noise using UNet with Gradient Scaling
        # ----------------------------
        optimizer.zero_grad()
        with autocast(enabled=True):
            noise_pred = pipe.unet(noisy_latents, timesteps, encoder_hidden_states=text_embeddings).sample

            # Check for NaNs before computing loss
            if torch.isnan(noise_pred).any():
                print(f"Warning: NaN detected in UNet output. Skipping batch {batch_idx}")
                continue

            # Compute MSE loss
            loss = F.mse_loss(noise_pred, noise)

        # ----------------------------
        # e. Backpropagation with Gradient Clipping
        # ----------------------------
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)
        torch.nn.utils.clip_grad_norm_(pipe.unet.parameters(), max_norm=1.0)
        scaler.step(optimizer)
        scaler.update()

        if batch_idx % 5 == 0:  # Print every 5 batches
            print(f"Epoch {epoch}, Batch {batch_idx}, Loss: {loss.item()}")

# Save the fine-tuned model
pipe.save_pretrained("fine_tuned_floorplan_model")
print("Fine-tuning completed and model saved.")


Loaded dataset with 52 images.


Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

Keyword arguments {'controlnet': ControlNetModel(
  (conv_in): Conv2d(4, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (time_proj): Timesteps()
  (time_embedding): TimestepEmbedding(
    (linear_1): Linear(in_features=320, out_features=1280, bias=True)
    (act): SiLU()
    (linear_2): Linear(in_features=1280, out_features=1280, bias=True)
  )
  (controlnet_cond_embedding): ControlNetConditioningEmbedding(
    (conv_in): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (blocks): ModuleList(
      (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (3): Conv2d(32, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (4): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (5): Conv2d(96, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    )
   

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  scaler = GradScaler()  # Helps with mixed precision training
  with torch.no_grad(), autocast(enabled=True):
  deprecate("direct config name access", "1.0.0", deprecation_message, standard_warn=False)
  with torch.no_grad(), autocast(enabled=True):
  with autocast(enabled=True):


OutOfMemoryError: CUDA out of memory. Tried to allocate 58.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 16.12 MiB is free. Process 20413 has 14.72 GiB memory in use. Of the allocated memory 14.43 GiB is allocated by PyTorch, and 156.28 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)