In [1]:
import os
import random
import pandas as pd
from PIL import Image
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models

In [None]:
import os
import pandas as pd

coords = True

rooms_folder = "Livingroom"
furniture_folder = "furniture_output"

def list_images(folder):
    return [f for f in os.listdir(folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]

room_files = list_images(rooms_folder)

room_path_dict = {}
for room_file in room_files:
    base, ext = os.path.splitext(room_file)
    room_path_dict[base.lower()] = os.path.join(rooms_folder, room_file)

room_to_furniture = {os.path.splitext(f)[0].lower(): [] for f in room_files}

furniture_files = list_images(furniture_folder)

furniture_rows = []

for f_file in furniture_files:
    base_name, ext = os.path.splitext(f_file)
    
    if coords:
        # Expected format: {num}_{x_min}_{x_max}_{y_min}_{y_max}_{room image name}

        parts = base_name.split("_", maxsplit=5)
        if len(parts) < 6:
            print(f"Skipping file with unexpected format: {f_file}")
            continue
        num, x_min, x_max, y_min, y_max, room_img_name = parts
        
        # Re-append the original extension to the room image 
        room_img_name = room_img_name + ext
    else:
        
        parts = base_name.split("_", maxsplit=1)
        if len(parts) < 2:
            print(f"Skipping file with unexpected format: {f_file}")
            continue
        num, room_img_name = parts
        room_img_name = room_img_name + ext
        x_min = x_max = y_min = y_max = None
    

    furniture_path = os.path.join(furniture_folder, f_file)
    
   
    room_base = os.path.splitext(room_img_name)[0].lower()
    
  
    if room_base in room_path_dict:
     
        room_to_furniture[room_base].append(furniture_path)
        
        # Create a row 
        furniture_rows.append({
            "furniture": furniture_path,
            "from_image": room_path_dict[room_base],
            "xmin": float(x_min) if x_min is not None else None,
            "xmax": float(x_max) if x_max is not None else None,
            "ymin": float(y_min) if y_min is not None else None,
            "ymax": float(y_max) if y_max is not None else None,
        })
    else:
        print(f"Room image with base name '{room_img_name}' for furniture file {f_file} not found in {rooms_folder}")

# Create the df DataFrame
df_rows = []
for room_file in room_files:
    base, ext = os.path.splitext(room_file)
    base_lower = base.lower()
    df_rows.append({
        "room_image": os.path.join(rooms_folder, room_file),
        "furniture_arr": room_to_furniture.get(base_lower, [])
    })

df = pd.DataFrame(df_rows)
furniture_df = pd.DataFrame(furniture_rows)


print("df:")
print(df.head())
print("\nfurniture_df:")
print(furniture_df.head())


df:
                   room_image  \
0     Livingroom\living_1.jpg   
1    Livingroom\living_10.jpg   
2   Livingroom\living_100.jpg   
3  Livingroom\living_1000.jpg   
4  Livingroom\living_1002.jpg   

                                       furniture_arr  
0  [furniture_output\0_67_174_112_175_living_1.pn...  
1  [furniture_output\0_145_185_66_161_living_10.p...  
2  [furniture_output\0_52_203_108_174_living_100....  
3  [furniture_output\0_68_126_113_181_living_1000...  
4  [furniture_output\0_43_73_101_160_living_1002....  

furniture_df:
                                         furniture  \
0  furniture_output\0_0_155_136_175_living_937.png   
1   furniture_output\0_0_173_129_223_living_16.png   
2  furniture_output\0_0_17_122_158_living_1319.png   
3    furniture_output\0_0_43_101_199_living_81.png   
4  furniture_output\0_0_45_141_204_living_1318.png   

                   from_image  xmin   xmax   ymin   ymax  
0   Livingroom\living_937.jpg   0.0  155.0  136.0  175.0  
1    Livi

In [3]:
furniture_df

Unnamed: 0,furniture,from_image,xmin,xmax,ymin,ymax
0,furniture_output\0_0_155_136_175_living_937.png,Livingroom\living_937.jpg,0.0,155.0,136.0,175.0
1,furniture_output\0_0_173_129_223_living_16.png,Livingroom\living_16.jpg,0.0,173.0,129.0,223.0
2,furniture_output\0_0_17_122_158_living_1319.png,Livingroom\living_1319.jpg,0.0,17.0,122.0,158.0
3,furniture_output\0_0_43_101_199_living_81.png,Livingroom\living_81.jpg,0.0,43.0,101.0,199.0
4,furniture_output\0_0_45_141_204_living_1318.png,Livingroom\living_1318.jpg,0.0,45.0,141.0,204.0
...,...,...,...,...,...,...
4672,furniture_output\9_131_182_199_223_living_671.png,Livingroom\living_671.jpg,131.0,182.0,199.0,223.0
4673,furniture_output\9_1_39_151_223_living_1174.png,Livingroom\living_1174.jpg,1.0,39.0,151.0,223.0
4674,furniture_output\9_43_63_165_200_living_539.png,Livingroom\living_539.jpg,43.0,63.0,165.0,200.0
4675,furniture_output\9_53_81_106_129_living_483.png,Livingroom\living_483.jpg,53.0,81.0,106.0,129.0


In [4]:
df = df[df["furniture_arr"].map(lambda x: len(x) > 0)]

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split




# Split df into training (80%) and testing (20%) sets.
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


train_room_paths = set(train_df["room_image"])
test_room_paths = set(test_df["room_image"])
train_furniture_df = furniture_df[furniture_df["from_image"].isin(train_room_paths)]


test_furniture_df = furniture_df[furniture_df["from_image"].isin(test_room_paths)]
print("Train df size:", len(train_df))
print("Test df size:", len(test_df))
print("Train furniture_df size:", len(train_furniture_df))
print("Test furniture_df size:", len(test_furniture_df))


Train df size: 1003
Test df size: 251
Train furniture_df size: 3771
Test furniture_df size: 906


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


img_size = 224  
transform = transforms.Compose([
   transforms.Resize((img_size, img_size)),
   transforms.ToTensor(),
   transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])
import random
from PIL import Image, ImageDraw
class RoomFurnitureDataset(Dataset):
   def __init__(self, df, furniture_df, transform=None):
       """
       df: DataFrame with columns 'room_image' and 'furniture_arr'
       furniture_df: DataFrame with columns 'furniture', 'from_image', 'xmin', 'xmax', 'ymin', 'ymax'
       transform: Image transformations (applied after masking)
       """
       self.df = df.reset_index(drop=True)
       self.furniture_df = furniture_df.reset_index(drop=True)
       self.transform = transform

   def __len__(self):
       return len(self.df)
  
   def __getitem__(self, idx):
       # Loading
       room_path = self.df.loc[idx, "room_image"]
       room_img = Image.open(room_path).convert("RGB")
      
  
       furniture_list = self.df.loc[idx, "furniture_arr"]
       pos_furniture_path = random.choice(furniture_list)
       pos_img = Image.open(pos_furniture_path).convert("RGB")
      
  
       pos_rows = self.furniture_df[self.furniture_df["furniture"] == pos_furniture_path]
       if len(pos_rows) > 0:
           pos_row = pos_rows.iloc[0]
           x_min, x_max = pos_row["xmin"], pos_row["xmax"]
           y_min, y_max = pos_row["ymin"], pos_row["ymax"]
          
           # Compute the center of the bounding box.
           center_x = (x_min + x_max) / 2.0
           center_y = (y_min + y_max) / 2.0
          
           # Compute the max dimension of the bounding box.
           box_width = x_max - x_min
           box_height = y_max - y_min
           max_dim = max(box_width, box_height)
          
          
           multiplier = random.uniform(1.0, 1.5)
           mask_size = multiplier * max_dim
          
        
           img_width, img_height = room_img.size
          
          
           left = center_x - mask_size/2.0
           top = center_y - mask_size/2.0
           right = center_x + mask_size/2.0
           bottom = center_y + mask_size/2.0
          
           
           left = max(0, left)
           top = max(0, top)
           right = min(img_width, right)
           bottom = min(img_height, bottom)
          
          
           draw = ImageDraw.Draw(room_img)
           draw.rectangle([left, top, right, bottom], fill="black")
       else:
           
           print(f"Warning: No bounding box found for {pos_furniture_path}")
      
   
       if self.transform:
           room_img = self.transform(room_img)
      
    
       if self.transform:
           pos_img = self.transform(pos_img)
      
       
       room_id = self.df.loc[idx, "room_image"]
       neg_candidates = self.furniture_df[self.furniture_df["from_image"] != room_id]
       neg_row = neg_candidates.sample(1).iloc[0]
       neg_furniture_path = neg_row["furniture"]
       neg_img = Image.open(neg_furniture_path).convert("RGB")
       if self.transform:
           neg_img = self.transform(neg_img)
      
       return room_img, pos_img, neg_img






In [23]:
train_dataset = RoomFurnitureDataset(train_df, train_furniture_df, transform=transform)
test_dataset = RoomFurnitureDataset(test_df, test_furniture_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, num_workers=0)

In [None]:
import torch.nn as nn
import torchvision.models as models

class EmbeddingNet(nn.Module):
    def __init__(self, embed_dim=128, lock_base=True):
        super(EmbeddingNet, self).__init__()
        # pretrained ResNet18
        self.base_model = models.resnet18(pretrained=True)
        
        
        if lock_base:
            for param in self.base_model.parameters():
                param.requires_grad = False
        
        num_features = self.base_model.fc.in_features
        self.base_model.fc = nn.Identity() 
        
       
        self.fc = nn.Sequential(
            nn.Linear(num_features, 256),
            nn.ReLU(),
            nn.Linear(256, embed_dim)
        )
    
    def forward(self, x):
        x = self.base_model(x)
        x = self.fc(x)
        # L2
        x = nn.functional.normalize(x, p=2, dim=1)
        return x




In [26]:
room_model = EmbeddingNet().to(device)
furniture_model = EmbeddingNet().to(device)

room_model

EmbeddingNet(
  (base_model): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, t

In [None]:
margin = 1.0
triplet_loss = nn.TripletMarginLoss(margin=margin, p=2)


optimizer = optim.Adam(list(room_model.parameters()) + list(furniture_model.parameters()), lr=1e-4)

In [None]:
num_epochs = 10

for epoch in range(num_epochs):
    
    room_model.train()
    furniture_model.train()
    train_running_loss = 0.0
    train_correct = 0
    train_samples = 0

    for batch_idx, (room_imgs, pos_imgs, neg_imgs) in enumerate(train_loader):
        room_imgs = room_imgs.to(device)
        pos_imgs = pos_imgs.to(device)
        neg_imgs = neg_imgs.to(device)
        
       
        room_embeds = room_model(room_imgs)
        pos_embeds = furniture_model(pos_imgs)
        neg_embeds = furniture_model(neg_imgs)
        
        loss = triplet_loss(room_embeds, pos_embeds, neg_embeds)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_running_loss += loss.item()
        
        
        d_pos = torch.norm(room_embeds - pos_embeds, dim=1)
        d_neg = torch.norm(room_embeds - neg_embeds, dim=1)
        correct = (d_pos < d_neg).sum().item()
        train_correct += correct
        train_samples += room_imgs.size(0)
        
        if batch_idx % 10 == 0:
            batch_acc = correct / room_imgs.size(0)
            print(f"Epoch {epoch+1}/{num_epochs} - Batch {batch_idx}/{len(train_loader)}: Train Loss = {loss.item():.4f} | Batch Acc = {batch_acc:.4f}")
    
    train_epoch_loss = train_running_loss / len(train_loader)
    train_epoch_acc = train_correct / train_samples
    
   
    room_model.eval()
    furniture_model.eval()
    test_running_loss = 0.0
    test_correct = 0
    test_samples = 0
    
    with torch.no_grad():
        for batch_idx, (room_imgs, pos_imgs, neg_imgs) in enumerate(test_loader):
            room_imgs = room_imgs.to(device)
            pos_imgs = pos_imgs.to(device)
            neg_imgs = neg_imgs.to(device)
            
            room_embeds = room_model(room_imgs)
            pos_embeds = furniture_model(pos_imgs)
            neg_embeds = furniture_model(neg_imgs)
            
            loss = triplet_loss(room_embeds, pos_embeds, neg_embeds)
            test_running_loss += loss.item()
            
            # Compute distances and accuracy for the batch
            d_pos = torch.norm(room_embeds - pos_embeds, dim=1)
            d_neg = torch.norm(room_embeds - neg_embeds, dim=1)
            correct = (d_pos < d_neg).sum().item()
            test_correct += correct
            test_samples += room_imgs.size(0)
    
    test_epoch_loss = test_running_loss / len(test_loader)
    test_epoch_acc = test_correct / test_samples

    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {train_epoch_loss:.4f} | Train Acc: {train_epoch_acc:.4f} | Test Loss: {test_epoch_loss:.4f} | Test Acc: {test_epoch_acc:.4f}")

print("Training and testing complete.")


Epoch 1/10 - Batch 0/63: Train Loss = 0.4889 | Batch Acc = 0.8750
Epoch 1/10 - Batch 10/63: Train Loss = 0.5754 | Batch Acc = 0.6875
Epoch 1/10 - Batch 20/63: Train Loss = 0.6460 | Batch Acc = 0.6250
Epoch 1/10 - Batch 30/63: Train Loss = 0.5570 | Batch Acc = 0.8125
Epoch 1/10 - Batch 40/63: Train Loss = 0.4884 | Batch Acc = 0.7500
Epoch 1/10 - Batch 50/63: Train Loss = 0.6987 | Batch Acc = 0.6875
Epoch 1/10 - Batch 60/63: Train Loss = 0.6102 | Batch Acc = 0.6875
Epoch 1/10 - Train Loss: 0.5064 | Train Acc: 0.8365 | Test Loss: 0.5378 | Test Acc: 0.8167
Epoch 2/10 - Batch 0/63: Train Loss = 0.5876 | Batch Acc = 0.8125
Epoch 2/10 - Batch 10/63: Train Loss = 0.6327 | Batch Acc = 0.6875
Epoch 2/10 - Batch 20/63: Train Loss = 0.5485 | Batch Acc = 0.8750
Epoch 2/10 - Batch 30/63: Train Loss = 0.2668 | Batch Acc = 1.0000
Epoch 2/10 - Batch 40/63: Train Loss = 0.4735 | Batch Acc = 0.8125
Epoch 2/10 - Batch 50/63: Train Loss = 0.4632 | Batch Acc = 0.8750
Epoch 2/10 - Batch 60/63: Train Loss = 0

In [31]:
torch.save(room_model, "room_model_837%_312am.pth")
torch.save(room_model, "furniture_model_837%_312am.pth")

print("Full models saved successfully!")


Full models saved successfully!


In [None]:
import torch
import torch.nn as nn
import torchvision.models as models


class EmbeddingNet(nn.Module):
   def __init__(self, embed_dim=128):
       super(EmbeddingNet, self).__init__()
       self.base_model = models.resnet18(pretrained=True)
       num_features = self.base_model.fc.in_features
       self.base_model.fc = nn.Identity()
       self.fc = nn.Sequential(
           nn.Linear(num_features, 256),
           nn.ReLU(),
           nn.Linear(256, embed_dim)
       )
  
   def forward(self, x):
       x = self.base_model(x)
       x = self.fc(x)
       x = nn.functional.normalize(x, p=2, dim=1)
       return x



checkpoint = torch.load('furniture_model_837%_312am.pth', map_location=torch.device('cpu'), weights_only=False)
#Initializiing it
model = EmbeddingNet()
model.load_state_dict(checkpoint)
model.eval()








TypeError: Expected state_dict to be dict-like, got <class '__main__.EmbeddingNet'>.