In [None]:
import pandas as pd
from PIL import Image
import os

In [None]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")
print(f"cuDNN version: {torch.backends.cudnn.version()}")


In [None]:
import pandas as pd
import os

data_folder='../datasets'

category_df = pd.read_csv(
    os.path.join(data_folder, 'list_category_cloth.txt'), 
    skiprows=[0], 
    delim_whitespace=True                                      
    
)

category_map = {}
for i, name in enumerate(category_df['category_name']):
    category_map[i+1] = name

attributes=[]
with open(os.path.join(data_folder, 'list_attr_cloth.txt'), 'r') as f:
    next(f)
    next(f)
    for line in f:
        parts = line.split()
        if not parts:
            continue
        attr_type = parts[-1]
        attr_name = " ".join(parts[:-1])
        attributes.append([attr_name, attr_type])

attribute_df = pd.DataFrame(attributes, columns=['attribute_name', 'attribute_type'])

attribute_map = {}
for i, name in enumerate(attribute_df['attribute_name']):
    attribute_map[i+1] = name

attribute_df.head()

In [None]:
import pandas as pd
import os

eval_file = os.path.join(data_folder, 'list_eval_partition.txt')
cate_file = os.path.join(data_folder, 'list_category_img.txt')

if os.path.exists(eval_file) and os.path.exists(cate_file):
    df_partition = pd.read_csv(eval_file, sep='\s+', skiprows=[0, 1], header=None, names=['image_path', 'split'])

    df_category = pd.read_csv(cate_file, sep='\s+', skiprows=[0, 1], header=None, names=['image_path', 'category_id'])

    train_df = pd.merge(df_partition, df_category, on='image_path')
    
    print(f"\nSuccessfully loaded {len(train_df)} images.")
    print("Split breakdown:")
    print(train_df['split'].value_counts())
    
    print("\nSample Data:")
    print(train_df.head())

else:
    print(f"ERROR: Could not find 'list_eval_partition.txt' or 'list_category_img.txt'")

In [None]:
train_path_df = pd.read_csv(
    os.path.join(data_folder, 'train.txt'),
    delim_whitespace=True,
    header=None,  
    names=['image_path']
)

train_attr_df = pd.read_csv(
    os.path.join(data_folder, 'train_attr.txt'),
    delim_whitespace=True,
    header=None,           
    names=['image_path_copy'] + list(range(1, 26))
)
train_attr_full_df = pd.concat([train_path_df, train_attr_df], axis=1)

train_attr_full_df = train_attr_full_df.drop(columns=['image_path_copy'])


print("\nSuccessfully merged training paths with attributes:")
print(train_attr_full_df.head())

print("\nShape of the final attribute table:", train_attr_full_df.shape)

final_train_df = pd.merge(train_attr_full_df, df_category, on='image_path', how='inner')

print("Final Training Data Ready.")
print(len(final_train_df))
print( final_train_df.columns.tolist())
final_train_df.head()

In [None]:
import torch
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

class FashionDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.df = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = self.df['image_path'].values
        self.categories = self.df['category_id'].values - 1
        self.attributes = self.df.loc[:, 1:25].values.astype('float32')

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_paths[idx])
        try:
            image = Image.open(img_name).convert('RGB')
        except (OSError, FileNotFoundError):
            image = Image.new('RGB', (224, 224), (0, 0, 0))
        if self.transform:
            image = self.transform(image)
        category_label = torch.tensor(self.categories[idx], dtype=torch.long)
        attribute_labels = torch.tensor(self.attributes[idx], dtype=torch.float32)
        return image, category_label, attribute_labels

transform_pipeline = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = FashionDataset(
    dataframe=final_train_df, 
    root_dir=data_folder, 
    transform=transform_pipeline
)
print(f"Dataset created with {len(dataset)} samples.")

In [None]:
img, cat, attr = dataset[0]
print("Image shape:", img.shape)      
print("Category:", cat)               

print("Attributes shape:", attr.shape) 

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models

class MultiTaskResNet(nn.Module):
    def __init__(self, num_categories=50, num_attributes=25):
        super(MultiTaskResNet, self).__init__()
        self.backbone = models.resnet50(weights='DEFAULT')
        n_inputs = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        self.category_head = nn.Linear(n_inputs, num_categories)
        self.attribute_head = nn.Linear(n_inputs, num_attributes)

    def forward(self, x):
        features = self.backbone(x)
        cat_output = self.category_head(features)
        attr_output = self.attribute_head(features)
        return cat_output, attr_output

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f" Training on Device: {device}")

model = MultiTaskResNet(num_categories=50, num_attributes=25)
model = model.to(device)

criterion_cat = nn.CrossEntropyLoss()

criterion_attr = nn.BCEWithLogitsLoss()

optimizer = optim.Adam(model.parameters(), lr=0.001)

train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

num_epochs = 3  

print(f"\n Starting Training for {num_epochs} Epochs...")

for epoch in range(num_epochs):
    model.train() 
    running_loss = 0.0
    
    for i, (images, cat_labels, attr_labels) in enumerate(train_loader):
        images = images.to(device)
        cat_labels = cat_labels.to(device)
        attr_labels = attr_labels.to(device)
        
        optimizer.zero_grad()
        
        cat_preds, attr_preds = model(images)
        
        loss_cat = criterion_cat(cat_preds, cat_labels)
        loss_attr = criterion_attr(attr_preds, attr_labels)
        
        total_loss = loss_cat + loss_attr 
        
        total_loss.backward()
        
        optimizer.step()

        running_loss += total_loss.item()
        
        if (i + 1) % 100 == 0:
            print(f"  > Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{len(train_loader)}], Loss: {total_loss.item():.4f}")

    avg_loss = running_loss / len(train_loader)
    print(f"üèÅ Epoch {epoch+1} Finished. Average Loss: {avg_loss:.4f}")

print("\n Training Complete!")

torch.save(model.state_dict(), "fashion_model_v1.pth")
print(" Model saved to 'fashion_model_v1.pth'")

In [None]:
from torch.utils.data import DataLoader

full_catalog_df = pd.read_csv(
    os.path.join(data_folder, 'list_eval_partition.txt'),
    sep='\s+', 
    skiprows=[0, 1], 
    header=None, 
    names=['image_path', 'split']
)

print(f"Total images to index: {len(full_catalog_df)}")

class FashionInferenceDataset(Dataset):
    def __init__(self, dataframe, root_dir, transform=None):
        self.df = dataframe
        self.root_dir = root_dir
        self.transform = transform
        self.image_paths = self.df['image_path'].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = os.path.join(self.root_dir, self.image_paths[idx])
        try:
            image = Image.open(img_name).convert('RGB')
        except:
            image = Image.new('RGB', (224, 224), (0, 0, 0))
            
        if self.transform:
            image = self.transform(image)
            
        return image, self.image_paths[idx]

inference_dataset = FashionInferenceDataset(
    full_catalog_df, 
    data_folder, 
    transform=transform_pipeline
)

inference_loader = DataLoader(inference_dataset, batch_size=64, shuffle=False)
print("‚úÖ Inference Loader Ready!")

In [None]:
import torch
import torch.nn as nn
from torchvision import models
import numpy as np
import pickle
from tqdm import tqdm
import os

class MultiTaskResNet(nn.Module):
    def __init__(self, num_categories=50, num_attributes=25):
        super(MultiTaskResNet, self).__init__()
        self.backbone = models.resnet50(weights=None) 
        n_inputs = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        self.category_head = nn.Linear(n_inputs, num_categories)
        self.attribute_head = nn.Linear(n_inputs, num_attributes)

    def forward(self, x):
        features = self.backbone(x)
        cat_output = self.category_head(features)
        attr_output = self.attribute_head(features)
        return cat_output, attr_output

MODEL_PATH = "fashion_model_v1.pth"
OUTPUT_INDEX_PATH = "fashion_index_v1.pkl"

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f" Device set to: {device}")

model = MultiTaskResNet(num_categories=50, num_attributes=25)

if os.path.exists(MODEL_PATH):
    print(f"üìÇ Loading weights from '{MODEL_PATH}'...")
    model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
    model = model.to(device)
    model.eval()
    print(" Model loaded and ready for indexing.")
else:
    raise FileNotFoundError(f" Error: {MODEL_PATH} not found. Did you run the training cell?")

print("Starting Feature Extraction...")
all_features = []
all_paths = []
with torch.no_grad():
    for images, paths in tqdm(inference_loader, desc="Indexing"):
        images = images.to(device)
        features = model.backbone(images)
        all_features.append(features.cpu().numpy())
        all_paths.extend(paths)

print("\n Stacking features...")
final_embeddings = np.concatenate(all_features, axis=0)

index_data = {
    "paths": all_paths,
    "embeddings": final_embeddings
}

with open(OUTPUT_INDEX_PATH, "wb") as f:
    pickle.dump(index_data, f)

print(f"SUCCESS! Index saved to '{OUTPUT_INDEX_PATH}'")
print(f"Matrix Shape: {final_embeddings.shape}")

In [None]:
import torch
import torch.nn as nn
from torchvision import models, transforms
from PIL import Image
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import os

MODEL_PATH = "fashion_model_v1.pth"
INDEX_PATH = "fashion_index_v1.pkl"
DATA_FOLDER = '../datasets' 

class MultiTaskResNet(nn.Module):
    def __init__(self, num_categories=50, num_attributes=25):
        super(MultiTaskResNet, self).__init__()
        self.backbone = models.resnet50(weights=None) 
        n_inputs = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        self.category_head = nn.Linear(n_inputs, num_categories)
        self.attribute_head = nn.Linear(n_inputs, num_attributes)

    def forward(self, x):
        features = self.backbone(x)
        cat_output = self.category_head(features)
        attr_output = self.attribute_head(features)
        return cat_output, attr_output

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MultiTaskResNet()
model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
model = model.to(device)
model.eval()

print("Loading Search Index...")
with open(INDEX_PATH, "rb") as f:
    index_data = pickle.load(f)

search_engine = NearestNeighbors(n_neighbors=5, metric="cosine")
search_engine.fit(index_data["embeddings"])
print("Search Engine Ready!")

def search_similar_images(query_image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    try:
        image = Image.open(query_image_path).convert('RGB')
    except:
        print(f"Error: Could not open {query_image_path}")
        return
    img_tensor = transform(image).unsqueeze(0).to(device) 
    with torch.no_grad():
        query_vector = model.backbone(img_tensor).cpu().numpy()
    distances, indices = search_engine.kneighbors(query_vector)
    fig, axes = plt.subplots(1, 6, figsize=(20, 5))
    axes[0].imshow(image)
    axes[0].set_title("Your Query")
    axes[0].axis("off")
    for i, idx in enumerate(indices[0]):
        match_path = index_data["paths"][idx]
        full_path = os.path.join(DATA_FOLDER, match_path)
        try:
            match_img = Image.open(full_path)
            axes[i+1].imshow(match_img)
            axes[i+1].set_title(f"Match {i+1}\nDist: {distances[0][i]:.3f}")
            axes[i+1].axis("off")
        except:
            print(f"Could not load match: {full_path}")
    plt.show()

test_image = os.path.join(DATA_FOLDER, index_data["paths"][110]) 
search_similar_images(test_image)

In [None]:
test_image = os.path.join(DATA_FOLDER, index_data["paths"][6]) 
search_similar_images(test_image)

In [None]:
test_image = os.path.join(DATA_FOLDER, index_data["paths"][64]) 
search_similar_images(test_image)

In [None]:
test_image = os.path.join(DATA_FOLDER, index_data["paths"][43644]) 
search_similar_images(test_image)

In [None]:
test_image = os.path.join(DATA_FOLDER, index_data["paths"][93644]) 
search_similar_images(test_image)