In [43]:
import pandas as pd
from datasets import load_dataset
import numpy as np
from transformers import CLIPProcessor, CLIPModel
from sklearn.cluster import KMeans
import cv2
import os

In [34]:

dataset = load_dataset("Francesco/furniture-ngpea", split="train")

In [35]:
dataset = dataset.to_pandas()

In [36]:
dataset.shape

(454, 5)

In [37]:
dataset.columns

Index(['image_id', 'image', 'width', 'height', 'objects'], dtype='str')

In [38]:
dataset.head()

Unnamed: 0,image_id,image,width,height,objects
0,406,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,640,640,"{'id': [406], 'area': [219402], 'bbox': [[142...."
1,164,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,640,640,"{'id': [164], 'area': [28743], 'bbox': [[268.0..."
2,329,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,640,640,"{'id': [329], 'area': [206784], 'bbox': [[42.0..."
3,379,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,640,640,"{'id': [379], 'area': [230545], 'bbox': [[2.0,..."
4,60,{'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...,640,640,"{'id': [60], 'area': [207142], 'bbox': [[25.0,..."


Detecting the room type

In [72]:
import torch
import clip
from PIL import Image
import numpy as np
import os

# # Load CLIP model (load only once)
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model, preprocess = clip.load("ViT-B/32", device=device)
# print(f"CLIP loaded on {device}")


In [73]:

# Room label
room_labels = [
    "living room",
    "bedroom",
    "kitchen",
    "bathroom",
    "dining room",
    "home office",
    "hallway",
    "study room",
    "balcony",
    "garage",
    "gaming room"
]

# Precompute text features (embeddings don't change. Run only once)
text_tokens = clip.tokenize(room_labels).to(device)
with torch.no_grad():
    text_features = model.encode_text(text_tokens)
    text_features /= text_features.norm(dim=-1, keepdim=True)


In [97]:
def infer_room_type(image_path, top_k=1):
    try:
        img = Image.open(image_path).convert("RGB") #opens the image using Pillow
    except Exception as e:
        return f"Error: {e}"

    img_input = preprocess(img).unsqueeze(0).to(device)

    with torch.no_grad():
        img_features = model.encode_image(img_input) #encodes the images
        img_features /= img_features.norm(dim=-1, keepdim=True) #normalize

        # Cosine similarity â†’ softmax for probs
        logits = (100.0 * img_features @ text_features.T).softmax(dim=-1)
        probs = logits.cpu().numpy()[0]

    # Top results
    top_idx = np.argsort(probs)[::-1][:top_k]
    
    print("Top match:")
    results = []
    for i, idx in enumerate(top_idx):
        score = probs[idx] * 100
        label = room_labels[idx]
        print(f"{label}")
        results.append((score, label))

    
    return results



Extracting the colour palatte

In [98]:
# Extract dominant color palette from the image
def extract_color_palette(image_path, n_colors=6, resize_to=(400, 400)):
   
    if not os.path.exists(image_path):
        raise FileNotFoundError(f"Image not found: {image_path}")

    # Load image with OpenCV (BGR format)
    img = cv2.imread(image_path)
    if img is None:
        raise ValueError("Failed to load image")

    # Resize for faster clustering 
    img = cv2.resize(img, resize_to)

    # Convert BGR to RGB
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Reshape to (pixels, 3) for K-means
    pixels = img_rgb.reshape(-1, 3).astype(np.float32)

    # Run K-means clustering
    kmeans = KMeans(n_clusters=n_colors, n_init=10, random_state=42)
    kmeans.fit(pixels)

    # Get cluster (colors)
    colors = kmeans.cluster_centers_.astype(int)  

    # Get cluster sizes (how many pixels in each cluster)
    labels, counts = np.unique(kmeans.labels_, return_counts=True)

    # Sort colors cluster size (most dominant first)
    sorted_indices = np.argsort(-counts)  # negative for descending
    sorted_colors = colors[sorted_indices]

    palette = [tuple(color) for color in sorted_colors]

    return palette


In [99]:
#saving the colour palatte
def show_palette(palette, block_size=60, save_path=None):
     
    n = len(palette)
    height = block_size
    width = block_size * n 
    
    palette_img = np.zeros((height, width, 3), dtype=np.uint8)
    
    for i, color in enumerate(palette):
        r, g, b = color
        palette_img[:, i*block_size:(i+1)*block_size] = [r, g, b]
    
    pil_img = Image.fromarray(palette_img)
    
    if save_path:
        pil_img.save(save_path)
        print(f"Palette saved to: {save_path}")
    else:
        pil_img.show()  # Opens in default image viewer



Main method

In [100]:
if __name__ == "__main__":
    image = os.path.join("images", "img2.jpeg")   
    
    if not os.path.exists(image):
        print("Image not found!")
    else:
        try:
            infer_room_type(image, top_k=1)
            palette = extract_color_palette(image, n_colors=6)
            
            show_palette(palette, block_size=80, save_path="palette_output.jpg")
            
        except Exception as e:
            print(f"Error: {e}")


Top match:
bedroom
Palette saved to: palette_output.jpg
