In [1]:
import tensorflow as tf
import pandas as pd
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np
import cv2
import os
import random
import matplotlib.pyplot as plt
from PIL import Image
import hashlib
from pathlib import Path

In [2]:
dataset_folder = "Project_ML/Classification-Furniture-Images-main/Data/Raw/Furniture_Data"


In [3]:
desired_size = (256, 256)

image_data = []

image_hashes = set()

main_folder_name = os.path.basename(dataset_folder)

In [4]:
for root, dirs, files in os.walk(dataset_folder):
    for parent_folder in dirs:
        parent_folder_path = os.path.join(root, parent_folder)
        
        for filename in os.listdir(parent_folder_path):
            if filename == ".DS_Store":
                continue
                
            file_path = os.path.join(parent_folder_path, filename)
            
            if os.path.isdir(file_path):
                continue
                
            try:

                img = Image.open(file_path)
                resized_img = img.resize(desired_size)
                image_hash = hashlib.md5(resized_img.tobytes()).hexdigest()
                parent_folder_dir = os.path.dirname(parent_folder_path)
                parent_folder_name = os.path.basename(parent_folder_dir)
                
                # Ignore duplicates
                if image_hash not in image_hashes:
                    # Add new img to hash
                    image_hashes.add(image_hash)
                    subfolder_name = os.path.basename(parent_folder_path)
                    image_data.append((parent_folder_name, subfolder_name, resized_img))

            except Exception as e:
                print(f"Error loading image {file_path}: {e}")

In [5]:
print(len(image_data))
print(len(image_hashes))

85165
85165


In [6]:
df = pd.DataFrame(image_data, columns=['Cat', 'Style', 'Img'])
df.head()

Unnamed: 0,Cat,Style,Img
0,beds,Asian,<PIL.Image.Image image mode=RGB size=256x256 a...
1,beds,Asian,<PIL.Image.Image image mode=RGB size=256x256 a...
2,beds,Asian,<PIL.Image.Image image mode=RGB size=256x256 a...
3,beds,Asian,<PIL.Image.Image image mode=RGB size=256x256 a...
4,beds,Asian,<PIL.Image.Image image mode=RGB size=256x256 a...


In [59]:
df.to_csv("Project_ML/Classification-Furniture-Images-main/Data/Raw/Furniture_Data.csv", index=False)


In [49]:
desired_cat = ["dressers","sofas"]
mini_dataset = [(category, style, img) for category, style, img in image_data if category in desired_cat]

In [50]:
len(mini_dataset)

11891

In [51]:
class CustomDataset(tf.keras.utils.Sequence):
    def __init__(self, data, batch_size):
        self.data = data
        self.batch_size = batch_size
        self.indexes = np.arange(len(self.data))
        
        # Had to encode label because it was still "chairs" and "sofas"
        self.label_encoder = LabelEncoder()
        self.labels = [category for category, _, _ in self.data]
        self.labels_encoded = self.label_encoder.fit_transform(self.labels)
    
    def __len__(self):
        return len(self.data) // self.batch_size
    
    def __getitem__(self, idx):
        batch_indexes = self.indexes[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_data = [self.data[i] for i in batch_indexes]
        images, labels = [], []

        # Ignoring the furniture's style for now
        for category, _, image in batch_data:
            # Resized to 224 x 224 because ResNet likes it that way
            image = image.resize((299, 299))
            image = np.array(image) / 255.0
            images.append(image)
        labels = self.labels_encoded[batch_indexes]
        return np.array(images), np.array(labels)

In [52]:
train_data, remaining_data = train_test_split(mini_dataset, test_size=0.4, shuffle=True)
val_data, test_data = train_test_split(remaining_data, test_size=0.5, shuffle=True)

In [53]:
batch_size = 32

# Data generators based on original dataset and batch size
train_generator = CustomDataset(train_data, batch_size)
val_generator = CustomDataset(val_data, batch_size)
test_generator = CustomDataset(test_data, batch_size)

In [13]:
from keras.applications import InceptionV3

In [14]:
inc = InceptionV3(input_shape=(299,299,3), weights ="imagenet", include_top=False)

In [15]:
for i in inc.layers:
    i.trainable = False

In [16]:
from tensorflow.keras.layers import Flatten

x=Flatten()(inc.output)

In [17]:
from tensorflow.keras.layers import Dense
pred=Dense(10,activation="softmax")(x)

In [18]:
from keras.models import Model

In [19]:
model_inc = Model(inputs=inc.input, outputs=pred)

In [20]:
model_inc.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 149, 149, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization (BatchNorm  (None, 149, 149, 32  96         ['conv2d[0][0]']                 
 alization)                     )                                                             

In [21]:
model_inc.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [54]:
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras import layers, models

# Load InceptionV3 model without top layers
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))

# Freeze the convolutional layers
for layer in base_model.layers:
    layer.trainable = False

# Add custom classification layers
x = layers.Flatten()(base_model.output)

output = layers.Dense(len(desired_cat), activation='softmax')(x)

# Create the final model
model = models.Model(inputs=base_model.input, outputs=output)





In [55]:
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_8 (InputLayer)           [(None, 299, 299, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d_658 (Conv2D)            (None, 149, 149, 32  864         ['input_8[0][0]']                
                                )                                                                 
                                                                                                  
 batch_normalization_658 (Batch  (None, 149, 149, 32  96         ['conv2d_658[0][0]']             
 Normalization)                 )                                                           

In [56]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [57]:
# Now, you can use the resized images to train your model
history = model.fit(
    train_generator, validation_data=val_generator,  epochs=1
)



In [47]:
print(history.history['accuracy'])
print(history.history['loss'])

[0.0]
[0.0]
