In [None]:
import pandas as pd
from PIL import Image
import os

In [None]:
import pandas as pd
import os

data_folder='../datasets'

category_df = pd.read_csv(
    os.path.join(data_folder, 'list_category_cloth.txt'), 
    skiprows=[0], 
    delim_whitespace=True                                      
    
)

category_map = {}
for i, name in enumerate(category_df['category_name']):
    category_map[i+1] = name

attributes=[]
with open(os.path.join(data_folder, 'list_attr_cloth.txt'), 'r') as f:
    next(f)
    next(f)
    for line in f:
        parts = line.split()
        if not parts:
            continue
        attr_type = parts[-1]
        attr_name = " ".join(parts[:-1])
        attributes.append([attr_name, attr_type])

attribute_df = pd.DataFrame(attributes, columns=['attribute_name', 'attribute_type'])

attribute_map = {}
for i, name in enumerate(attribute_df['attribute_name']):
    attribute_map[i+1] = name


In [None]:

train_path_df = pd.read_csv(
    os.path.join(data_folder, 'train.txt'),
    delim_whitespace=True,
    header=None,  
    names=['image_path']
)

train_cate_df = pd.read_csv(
    os.path.join(data_folder, 'train_cate.txt'),
    delim_whitespace=True,
    header=None, 
    names=['category_id']
)

train_df = pd.concat([train_path_df, train_cate_df], axis=1)

print(train_df.head())

In [None]:
train_path_df = pd.read_csv(
    os.path.join(data_folder, 'train.txt'),
    delim_whitespace=True,
    header=None,  
    names=['image_path']
)

train_attr_df = pd.read_csv(
    os.path.join(data_folder, 'train_attr.txt'),
    delim_whitespace=True,
    header=None,           
    names=['image_path_copy'] + list(range(1, 26))
)
train_attr_full_df = pd.concat([train_path_df, train_attr_df], axis=1)

train_attr_full_df = train_attr_full_df.drop(columns=['image_path_copy'])


print("\nSuccessfully merged training paths with attributes:")
print(train_attr_full_df.head())

print("\nShape of the final attribute table:", train_attr_full_df.shape)

In [None]:
from PIL import Image
from IPython.display import display
import os

image_path_to_test = train_df['image_path'].iloc[1]

print(f"\n--- Analysis for: {image_path_to_test} ---")

category_row = train_df[train_df['image_path'] == image_path_to_test]
category_id = category_row['category_id'].iloc[0]
category_name = category_map[category_id]

attribute_row = train_attr_full_df[train_attr_full_df['image_path'] == image_path_to_test]
present_attributes_series = attribute_row.iloc[0, 1:]
present_attribute_ids = present_attributes_series[present_attributes_series == 1].index.tolist()
attribute_names = [attribute_map[attr_id] for attr_id in present_attribute_ids]

full_image_path = os.path.join(data_folder, image_path_to_test)
image = Image.open(full_image_path)
display(image)

print(f"\n✅ Category: {category_name}")
print("\n✅ Attributes:")
if attribute_names:
    for name in attribute_names:
        print(f"- {name}")
else:
    print("No attributes found.")


In [None]:
import tensorflow 

In [None]:


from tensorflow.keras.preprocessing import image
import numpy as np

def preprocess_image(img_path):
    """Loads and prepares an image for ResNet50."""
    
    # Load the image from the path, resizing it to 224x224 pixels
    img = image.load_img(img_path, target_size=(224, 224))
    
    # Convert the image to a NumPy array
    img_array = image.img_to_array(img)
    
    # Add an extra dimension because the model expects a "batch" of images
    img_array_expanded = np.expand_dims(img_array, axis=0)
    
    # Use the special ResNet50 preprocess_input function to scale pixel values
    return preprocess_input(img_array_expanded)




In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model


base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

x = base_model.output
x = GlobalAveragePooling2D()(x)

feature_extractor_model = Model(inputs=base_model.input, outputs=x)

print("\nModel created successfully!")

In [None]:

image_path_to_test = train_df['image_path'].iloc[0]
full_image_path = os.path.join(data_folder, image_path_to_test)

try:
    processed_image = preprocess_image(full_image_path)

    feature_vector = feature_extractor_model.predict(processed_image)

    print(f"Successfully extracted features for: {image_path_to_test}")
    print(f"Shape of the feature vector: {feature_vector.shape}")
    print("\nThis vector is the numerical representation of your image!")

except FileNotFoundError:
    print(f"[ERROR] Could not find the image file at: {full_image_path}")

In [None]:
import numpy as np
import pickle

features_dict = {}

total_images = len(train_df)
print(f"Starting feature extraction for {total_images} images...")

for i, row in train_df.iterrows():
    image_path = row['image_path']
    full_image_path = os.path.join(data_folder, image_path)
    try:
        processed_image = preprocess_image(full_image_path)

        feature_vector = feature_extractor_model.predict(processed_image)

        features_dict[image_path] = feature_vector
        
    except FileNotFoundError:
        print(f"\n[WARNING] Could not find image file, skipping: {full_image_path}")
        continue
    if (i + 1) % 100 == 0:
        print(f"  Processed {i + 1} / {total_images} images")

print("\n--- Feature extraction complete! ---")

with open('extracted_features.pkl', 'wb') as f:
    pickle.dump(features_dict, f)

print(f"Successfully saved {len(features_dict)} feature vectors to 'extracted_features.pkl'")

In [None]:
category_df['category_id'] = range(1, len(category_df) + 1)

train_df_full = pd.merge(train_df, category_df, on='category_id')

In [None]:
upper_body_paths = train_df_full[train_df_full['category_type'] == 1]['image_path'].tolist()
lower_body_paths = train_df_full[train_df_full['category_type'] == 2]['image_path'].tolist()
len(lower_body_paths)

In [None]:
len(upper_body_paths)

In [None]:
import random

positive_pairs=[]
negative_pairs=[]

no_of_pairs_needed=10000

for i in range(no_of_pairs_needed):
    top=random.choice(upper_body_paths)
    bottom=random.choice(lower_body_paths)
    positive_pairs.append([top,bottom,1])

for i in range(no_of_pairs_needed//2):
    top1=random.choice(upper_body_paths)
    top2=random.choice(upper_body_paths)
    negative_pairs.append([top1,top2,0])

for i in range(no_of_pairs_needed//2):
    bottom1=random.choice(lower_body_paths)
    bottom2=random.choice(lower_body_paths)
    negative_pairs.append([bottom1,bottom2,0])

In [None]:
all_pairs=positive_pairs+negative_pairs
random.shuffle(all_pairs)

In [None]:
len(all_pairs)

In [None]:
import pickle

with open('extracted_features.pkl', 'rb') as f:
    features_dict = pickle.load(f)

In [None]:
len(features_dict)

In [None]:
processed_data=[]
train_attr_full_df_indexed=train_attr_full_df.set_index('image_path')

print("Assembling final DataFrame... This may take a moment.")

for path_a, path_b, label in all_pairs:
    try:
        features_a = features_dict[path_a].flatten()
        features_b = features_dict[path_b].flatten()
        
        attributes_a = train_attr_full_df_indexed.loc[path_a].values
        attributes_b = train_attr_full_df_indexed.loc[path_b].values
      
        combined_row = np.concatenate([features_a, attributes_a, features_b, attributes_b, [label]])
        processed_data.append(combined_row)
        
    except KeyError:
        continue


In [None]:
num_features = features_a.shape[0]
num_attributes = attributes_a.shape[0]

columns_a = [f'feat_A_{i}' for i in range(num_features)] + [f'attr_A_{i}' for i in range(num_attributes)]
columns_b = [f'feat_B_{i}' for i in range(num_features)] + [f'attr_B_{i}' for i in range(num_attributes)]
final_columns = columns_a + columns_b + ['label']

final_training_df = pd.DataFrame(processed_data, columns=final_columns)

print("\n--- Final Training DataFrame ---")
print(f"Shape of the DataFrame: {final_training_df.shape}")
print(final_training_df.head())

In [None]:
X=final_training_df.drop('label',axis=1)
y=final_training_df['label']

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("--- Data Split Complete ---")
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_val:", X_val.shape)
print("Shape of y_val:", y_val.shape)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt

# --- Step 1: Define the Model Architecture ---

# Get the number of input features from the shape of our training data
input_dim = X_train.shape[1]

model = Sequential([
    # Input Layer: Takes in the combined feature vector for a pair.
    # 'relu' is a standard activation function that works well.
    Dense(64, activation='relu', input_dim=input_dim),
    
    # Hidden Layer: A "thinking" layer to find complex patterns.
    Dense(64, activation='relu'),
    
    # Output Layer: A single neuron with a 'sigmoid' activation.
    # Sigmoid squishes the output to a probability score between 0 and 1.
    Dense(1, activation='sigmoid')
])

# --- Step 2: Compile the Model ---

# Configure the model with its learning plan
model.compile(
    optimizer='adam',                 # Adam is an efficient, all-purpose optimizer.
    loss='binary_crossentropy',       # Best for yes/no (binary) classification problems.
    metrics=['accuracy']              # The metric we want to track during training.
)

# Print a summary of the model's architecture
print("--- Model Architecture ---")
model.summary()


# --- Step 3: Train the Model ---

print("\n--- Starting Model Training ---")
# The .fit() command starts the training process
# We save the results of the training into a 'history' object
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=20,          # How many times to go through the entire dataset.
    batch_size=64,      # How many examples the model sees at once.
    verbose=1           # Set to 1 to see the progress bar, 2 for just numbers per epoch.
)
print("\n--- Model Training Complete ---")


# --- Step 4: Visualize the Training History ---

print("\n--- Plotting Training History ---")
# Create a DataFrame from the history object
history_df = pd.DataFrame(history.history)

# Plot the accuracy
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_df['accuracy'], label='Training Accuracy')
plt.plot(history_df['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

# Plot the loss
plt.subplot(1, 2, 2)
plt.plot(history_df['loss'], label='Training Loss')
plt.plot(history_df['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


# --- Step 5: Save the Trained Model ---
# Save the final model to a single file
model.save('2nd_outfit_compatibility_model.h5')
print("\n--- Model Saved Successfully to 'outfit_compatibility_model.h5' ---")

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
import matplotlib.pyplot as plt
import pandas as pd

# --- Step 1: Define the Model Architecture with Dropout ---

# Get the number of input features from the shape of our training data
input_dim = X_train.shape[1]

model = Sequential([
    # Input Layer and first hidden layer
    Dense(128, activation='relu', input_dim=input_dim),
    
    # NEW: Dropout Layer to prevent overfitting
    # It will randomly "turn off" 40% of the neurons from the layer above during training.
    Dropout(0.4),
    
    # A second hidden layer
    Dense(64, activation='relu'),
    
    # NEW: A second Dropout layer for further regularization
    Dropout(0.3),
    
    # Output Layer
    Dense(1, activation='sigmoid')
])

# --- Step 2: Compile the Model ---

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Print a summary of the new architecture
print("--- New Model Architecture with Dropout ---")
model.summary()


# --- Step 3: Train the Model ---

print("\n--- Starting Model Training ---")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,  # Increased epochs slightly to give the regularized model more time to learn
    batch_size=64,
    verbose=1
)
print("\n--- Model Training Complete ---")


# --- Step 4: Visualize the Training History ---

print("\n--- Plotting Training History ---")
history_df = pd.DataFrame(history.history)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_df['accuracy'], label='Training Accuracy')
plt.plot(history_df['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_df['loss'], label='Training Loss')
plt.plot(history_df['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


# --- Step 5: Save the Final, Tuned Model ---
model.save('3rd_outfit_compatibility_model_tuned.h5')
print("\n--- Tuned Model Saved Successfully to 'outfit_compatibility_model_tuned.h5' ---")

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
import matplotlib.pyplot as plt
import pandas as pd

# --- Step 1: Define the Model Architecture ---

# Get the number of input features from the shape of our training data
input_dim = X_train.shape[1]

model = Sequential([
    # Input Layer and first hidden layer
    Dense(128, activation='relu', input_dim=input_dim),
    
    # NEW: Batch Normalization layer for stability
    BatchNormalization(),
    
    # Dropout Layer to prevent overfitting
    Dropout(0.4),
    
    # A second hidden layer
    Dense(64, activation='relu'),
    
    # NEW: A second Batch Normalization layer
    BatchNormalization(),
    
    # A second Dropout layer
    Dropout(0.3),
    
    # Output Layer
    Dense(1, activation='sigmoid')
])

# --- Step 2: Compile the Model ---

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# Print a summary of the new architecture
print("--- New Model Architecture with Batch Normalization & Dropout ---")
model.summary()


# --- Step 3: Train the Model ---

print("\n--- Starting Model Training ---")
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=30,
    batch_size=64,
    verbose=1
)
print("\n--- Model Training Complete ---")


# --- Step 4: Visualize the Training History ---

print("\n--- Plotting Training History ---")
history_df = pd.DataFrame(history.history)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history_df['accuracy'], label='Training Accuracy')
plt.plot(history_df['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history_df['loss'], label='Training Loss')
plt.plot(history_df['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()


# --- Step 5: Save the Final, Tuned Model ---
model.save('4th_outfit_compatibility_model_final.h5')
print("\n--- Final Tuned Model Saved Successfully to 'outfit_compatibility_model_final.h5' ---")

In [None]:
import sys
!{sys.executable} -m pip list

In [None]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# --- NEW: GPU Verification Step ---
print("--- Verifying GPU Setup ---")
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"✅ Success! GPU(s) found and configured: {gpus}")
    except RuntimeError as e:
        print(e)
else:
    print("❌ No GPU found. TensorFlow will run on the CPU.")
print("---------------------------\n")


# --- 1. Prepare Data and Split ---
# This assumes 'train_attr_df' and 'data_folder' are already loaded
for col in range(1, 26):
    train_attr_full_df[col] = pd.to_numeric(train_attr_full_df[col])

train_df_image, val_df_image = train_test_split(train_attr_full_df, test_size=0.2, random_state=42)
print(f"Data split into {len(train_df_image)} training and {len(val_df_image)} validation samples.")


# --- 2. Create a Data Generator ---
def data_generator(df, batch_size=32):
    while True:
        batch_df = df.sample(n=batch_size)
        batch_images, batch_labels = [], []
        for index, row in batch_df.iterrows():
            img_path = os.path.join(data_folder, row['image_path'])
            try:
                img = load_img(img_path, target_size=(224, 224))
                img_array = img_to_array(img)
                batch_images.append(img_array)
                labels = row[1:].values.astype('float32')
                batch_labels.append(labels)
            except FileNotFoundError:
                continue
        batch_images_preprocessed = preprocess_input(np.array(batch_images))
        yield batch_images_preprocessed, np.array(batch_labels)


# --- 3. Build the Model using Transfer Learning ---
print("\n--- Building the transfer learning model... ---")
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
base_model.trainable = False
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.4)(x)
output_layer = Dense(25, activation='sigmoid')(x)
model = Model(inputs=base_model.input, outputs=output_layer)


# --- 4. Compile and Train the Model ---
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
early_stopper = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

print("\n--- Starting model training... (This will now run on the GPU) ---")
history = model.fit(
    data_generator(train_df_image, batch_size=64),
    steps_per_epoch=len(train_df_image) // 64,
    epochs=25,
    validation_data=data_generator(val_df_image, batch_size=64),
    validation_steps=len(val_df_image) // 64,
    callbacks=[early_stopper]
)
print("--- Model training complete. ---")


# --- 5. Save the Final Model ---
model.save('attribute_predictor_model.h5')
print("\n--- Attribute predictor model saved to 'attribute_predictor_model.h5' ---")

In [None]:
import tensorflow as tf
print(tf.config.list_physical_devices('GPU'))
