<a href="https://colab.research.google.com/github/IamLucifer0312/Furniture-Recognition-CNN/blob/henry-task-3/Task3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Task 3: (only for those aim HD) The extension of the model in Task 2, the recommended furniture items must be in the same interior styles with the style of the input images. In order to fulfill this task, you are required to build a model to recognize the style of a furniture item.

### First, mount the Google Drive to start downloading necessary files


In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


### Now download and extract the original dataset

In [2]:
import gdown

# Google Drive file ID (from the shared link)
file_id = "1h2SIWZZUESO6xygzF4iMkK-tVc4mZ27M"
file_name = "Furniture_Data.zip"
file_path = "/content/" + file_name

# Download the file
gdown.download(f"https://drive.google.com/uc?id={file_id}", file_name, quiet=False)
print(f"Downloaded: {file_path}")


Downloading...
From (original): https://drive.google.com/uc?id=1h2SIWZZUESO6xygzF4iMkK-tVc4mZ27M
From (redirected): https://drive.google.com/uc?id=1h2SIWZZUESO6xygzF4iMkK-tVc4mZ27M&confirm=t&uuid=094b12e5-e76a-43c0-8a81-095d7f8d801b
To: /content/Furniture_Data.zip
100%|██████████| 2.49G/2.49G [00:31<00:00, 78.0MB/s]

Downloaded: /content/Furniture_Data.zip





In [3]:
!unzip -q $file_path -d /content/extracted_folder

### Define the categories


In [4]:
categories = ['beds', 'chairs', 'dressers', 'lamps', 'sofas', 'tables']

### Now we remove all duplicated images in all classes using hash. This reduces the time to convert the dataset to features, and avoid recommending similar images

In [5]:
import hashlib
from PIL import Image
import cv2
import random
import os

# Define the original dataset path
original_dataset_path = '/content/extracted_folder/Furniture_Data'

# Function to compute image hash
def get_image_hash(image_path):
    """Generate an MD5 hash for an image file."""
    with open(image_path, "rb") as f:
        return hashlib.md5(f.read()).hexdigest()

# Store unique images and duplicates
hashes = {}
deleted_count = 0



# Loop through all categories and subfolders
for category in os.listdir(original_dataset_path):
    category_path = os.path.join(original_dataset_path, category)

    if os.path.isdir(category_path):  # Ensure it's a directory
        for style in os.listdir(category_path):
            style_path = os.path.join(category_path, style)

            if os.path.isdir(style_path):  # Ensure it's a directory
                for image in os.listdir(style_path):
                    img_path = os.path.join(style_path, image)
                    img_hash = get_image_hash(img_path)

                    if img_hash in hashes:
                        os.remove(img_path)  # Delete duplicate image
                        deleted_count += 1
                        print(f"Deleted: {img_path}")
                    else:
                        hashes[img_hash] = img_path  # Store first occurrence


# Report results
print(f"\n Removed {deleted_count} duplicate images.")

Deleted: /content/extracted_folder/Furniture_Data/dressers/Craftsman/28454craftsman-dressers.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Craftsman/dressers999879876546516546.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Farmhouse/5364farmhouse-dressers.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Farmhouse/4903farmhouse-dressers.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Farmhouse/34112farmhouse-dressers.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Farmhouse/5469farmhouse-dressers.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Farmhouse/4710farmhouse-dressers.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Farmhouse/34079farmhouse-dressers.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Farmhouse/33597farmhouse-dressers.jpg
Deleted: /content/extracted_folder/Furniture_Data/dressers/Farmhouse/34077farmhouse-dressers.jpg
Deleted: /content/extracted_fol

### Define the style categories

In [6]:
style_categories = ['Transitional',
 'Industrial',
 'Mediterranean',
 'Tropical',
 'Contemporary',
 'Farmhouse',
 'Scandinavian',
 'Craftsman',
 'Beach',
 'Rustic',
 'Victorian',
 'Modern',
 'Southwestern',
 'Asian',
 'Traditional',
 'Midcentury',
 'Eclectic']


### The original dataset is in the format of nested folder:
 Folder:
  -- Category 1:
    -- Style 1
    -- Style 2
    ...
 ...
### So now we need to reconstruct it to only containing category subfolders, which is more suitable for this task.

In [7]:
import os
import shutil

# Define paths
new_dataset_path = 'Task3_Data'          # Path to the new flattened dataset

# Create the new folder structure
for category in style_categories:
    os.makedirs(os.path.join(new_dataset_path, category), exist_ok=True)

# Copy images to the new structure
for category in categories:
    category_path = os.path.join(original_dataset_path, category)
    for style in os.listdir(category_path):
        style_path = os.path.join(category_path, style)
        if os.path.isdir(style_path):
          for image in os.listdir(style_path):
              src = os.path.join(style_path, image)
              dst = os.path.join(new_dataset_path, style, image)
              shutil.copy(src, dst)

print("Restructuring complete!")

Restructuring complete!


In [10]:

# Zip the directory
shutil.make_archive(new_dataset_path, 'zip', new_dataset_path)

print(f"Restructuring complete! Zipped as {new_dataset_path}" + ".zip")

Restructuring complete! Zipped as Task3_Data.zip


### Download and load the new classification model for task 3

In [11]:
import gdown

# Google Drive file ID (from the shared link)
file_id = "14YWE-zPXWuIzp9CPB6GalFkeMLMnU1AO"
file_name = "classification_style_model.h5"
file_path = "/content/" + file_name

# Download the file
gdown.download(f"https://drive.google.com/uc?id={file_id}", file_name, quiet=False)
print(f"Downloaded: {file_path}")


Downloading...
From (original): https://drive.google.com/uc?id=14YWE-zPXWuIzp9CPB6GalFkeMLMnU1AO
From (redirected): https://drive.google.com/uc?id=14YWE-zPXWuIzp9CPB6GalFkeMLMnU1AO&confirm=t&uuid=d9028451-8e3d-48c1-8807-4b65477a3bd6
To: /content/classification_style_model.h5
100%|██████████| 320M/320M [00:02<00:00, 113MB/s]

Downloaded: /content/classification_style_model.h5





In [12]:
import keras
from tensorflow.keras.models import Model

model = keras.saving.load_model("/content/classification_style_model.h5")


model.summary()




Similar to task 2, this model has no input class, and handle the classification task with the last two layers. Therefore, we can get the embedded vector of images by removing them.


### Use the first layer as the input layer (Images need to be preprocessed to the right shape before feeding to the mode), then slice the last two layers by taking the previous Dense layer as output.

In [15]:
from tensorflow.keras.layers import Lambda

# Get the first layer as input
input_layer = model.layers[0].input  # Explicitly using the first layer's input

# Identify the last convolutional or pooling layer before Flatten
output_layer = model.get_layer("dense_6").output  # Extract features before Flatten

# Add L2 normalization with explicit output shape
normalized_output = Lambda(lambda x: tf.nn.l2_normalize(x, axis=1),
                           name="l2_normalization",
                           output_shape=(output_layer.shape[-1],))(output_layer)

# Create the final feature extraction model
feature_extractor = Model(inputs=input_layer, outputs=normalized_output)

# Print summary to verify
feature_extractor.summary()

### Define the function to normalize and preprocess images

In [16]:
import numpy as np
import tensorflow as tf

def preprocess_image(image_path):
    # Load image
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)  # Decode as RGB

    # Resize to (224, 224)
    image = tf.image.resize(image, (224, 224))

    # Normalize to [0, 1] range
    image = image / 255.0

    # Add batch dimension
    image = np.expand_dims(image, axis=0)  # Shape becomes (1, 224, 224, 3)

    return image


### Define the function to extract embeddings from image paths and flatten the result (for easy comparison)

In [17]:
def extract_features(image_path):
  image_arr = preprocess_image(image_path)
  features = feature_extractor.predict(image_arr, verbose = 0)
  return features.flatten()


### Extract image paths from the database

In [20]:
# Initialize an empty list to store image paths
image_paths = []  # A 2D list of image paths by styles

# Loop through all categories and subfolders
for style in os.listdir(new_dataset_path):
    style_path = os.path.join(new_dataset_path, style)

    if os.path.isdir(style_path):  # Ensure it's a directory
      images_set = [] # Create a list of images by one style
      for image in os.listdir(style_path):
          image_path = os.path.join(style_path, image)
          images_set.append(image_path)  # Add to list of images
      image_paths.append(images_set) # Add to list of styles
# Print sample image paths
print("Total styles found:", len(image_paths))
print("Sample paths:", image_paths[0][:5])  # Print first 5 paths of the first style


Total styles found: 17
Sample paths: ['Task3_Data/Scandinavian/2628scandinavian-platform-beds.jpg', 'Task3_Data/Scandinavian/23380scandinavian-table-lamps.jpg', 'Task3_Data/Scandinavian/21292scandinavian-dining-tables.jpg', 'Task3_Data/Scandinavian/21949scandinavian-dining-chairs.jpg', 'Task3_Data/Scandinavian/7512scandinavian-table-lamps.jpg']


### Run through all images in the dataset and extract their features, then put them all into a file

If you just want to test the code, then skip the next row and download the features from my Google Drive as upcoming instructions to save time.

In [21]:
# Define the folder to store embeddings
save_folder = "furniture_embeddings"

# Create the folder if it doesn't exist
os.makedirs(save_folder, exist_ok=True)

# Extract embeddings from images
for i in range(len(style_categories)):
  feature_vectors = []
  for img_path in image_paths[i]:
      features = extract_features(img_path)  # Extract feature vector (implement this function)
      feature_vectors.append(features)

  # Convert to NumPy array
  feature_vectors = np.array(feature_vectors)

  # Save embeddings inside the folder
  file_path = os.path.join(save_folder, f"{style_categories[i]}_furniture_features.npy")
  np.save(file_path, feature_vectors)

  print(f"✅ {style_categories[i]} embeddings saved in {file_path}!")


✅ Transitional embeddings saved in furniture_embeddings/Transitional_furniture_features.npy!
✅ Industrial embeddings saved in furniture_embeddings/Industrial_furniture_features.npy!
✅ Mediterranean embeddings saved in furniture_embeddings/Mediterranean_furniture_features.npy!
✅ Tropical embeddings saved in furniture_embeddings/Tropical_furniture_features.npy!
✅ Contemporary embeddings saved in furniture_embeddings/Contemporary_furniture_features.npy!
✅ Farmhouse embeddings saved in furniture_embeddings/Farmhouse_furniture_features.npy!
✅ Scandinavian embeddings saved in furniture_embeddings/Scandinavian_furniture_features.npy!
✅ Craftsman embeddings saved in furniture_embeddings/Craftsman_furniture_features.npy!
✅ Beach embeddings saved in furniture_embeddings/Beach_furniture_features.npy!
✅ Rustic embeddings saved in furniture_embeddings/Rustic_furniture_features.npy!
✅ Victorian embeddings saved in furniture_embeddings/Victorian_furniture_features.npy!
✅ Modern embeddings saved in fu

In [22]:
# Zip the directory
shutil.make_archive(save_folder, 'zip', save_folder)

print(f"Zipped as {save_folder}" + ".zip")

Zipped as furniture_embeddings.zip


### Load the image features file and double-check the shape and size

If you performed feature extraction on your own, skip the next two cells. If you wanted to save time, run those rows to download my features.

In [None]:
import gdown
# Google Drive file ID (from the shared link)
file_id = "1G_EFOdo0tqnL_UWJQJa0Ot41jhomalT5"
file_name = "furniture_features.zip"
file_path = "/content/" + file_name

# Download the file
gdown.download(f"https://drive.google.com/uc?id={file_id}", file_name, quiet=False)
print(f"Downloaded: {file_path}")


Downloading...
From: https://drive.google.com/uc?id=1G_EFOdo0tqnL_UWJQJa0Ot41jhomalT5
To: /content/furniture_features.zip
100%|██████████| 21.9M/21.9M [00:00<00:00, 76.3MB/s]

Downloaded: /content/furniture_features.zip





In [None]:
!unzip -q $file_path -d /content/

In [38]:
import numpy as np

embedding_list = []

for style_embedding in os.listdir(save_folder):

  # Load the .npy file
  style_embedding = np.load(f"{save_folder}/{style_embedding}")
  embedding_list.append(style_embedding)

# Print shape and size
for embedding in embedding_list:
  print(embedding.shape)
  print("Size:", embedding.size)    # Total number of elements


(846, 512)
Size: 433152
(6712, 512)
Size: 3436544
(362, 512)
Size: 185344
(2332, 512)
Size: 1193984
(3279, 512)
Size: 1678848
(1374, 512)
Size: 703488
(6535, 512)
Size: 3345920
(2474, 512)
Size: 1266688
(414, 512)
Size: 211968
(1683, 512)
Size: 861696
(17451, 512)
Size: 8934912
(1819, 512)
Size: 931328
(16343, 512)
Size: 8367616
(4873, 512)
Size: 2494976
(2642, 512)
Size: 1352704
(1825, 512)
Size: 934400
(14201, 512)
Size: 7270912


In [48]:
i = 0
names = []
for style_embedding in os.listdir(save_folder):
  names.append(style_embedding)


for style in os.listdir(new_dataset_path):
  style_path = os.path.join(new_dataset_path, style)
  num_items = len(os.listdir(style_path))
  num_vectors = embedding_list[i].shape[0]

  if num_items == num_vectors:
    print(f"✅{style} num of items is correct")
  else:
    print(f"❌{style} num of items is NOT correct: {num_items}")
    print(f"     Compared to {names[i]} of {num_vectors}")

  i += 1

for style_embedding in os.listdir(save_folder):
  print(style_embedding)

❌Scandinavian num of items is NOT correct: 414
     Compared to Contemporary_furniture_features.npy of 846
❌Craftsman num of items is NOT correct: 2474
     Compared to Mediterranean_furniture_features.npy of 6712
❌Midcentury num of items is NOT correct: 6712
     Compared to Beach_furniture_features.npy of 362
❌Victorian num of items is NOT correct: 1819
     Compared to Southwestern_furniture_features.npy of 2332
❌Tropical num of items is NOT correct: 846
     Compared to Farmhouse_furniture_features.npy of 3279
❌Farmhouse num of items is NOT correct: 3279
     Compared to Rustic_furniture_features.npy of 1374
❌Traditional num of items is NOT correct: 16343
     Compared to Asian_furniture_features.npy of 6535
❌Beach num of items is NOT correct: 2642
     Compared to Industrial_furniture_features.npy of 2474
❌Southwestern num of items is NOT correct: 362
     Compared to Transitional_furniture_features.npy of 414
❌Eclectic num of items is NOT correct: 1374
     Compared to Modern_fur

IndexError: list index out of range

In [52]:
!unzip -q $furniture_embeddings.zip -d /content/

unzip:  cannot find or open .zip, .zip.zip or .zip.ZIP.
