In [7]:
!pip install sentence_transformers einops

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)

KeyboardInterrupt



In [None]:
import os
import pandas as pd
import random
import numpy as np
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
# Load CSV file
df = pd.read_csv("/kaggle/input/demo-csv-2/Demo_captions_2.csv")

# Define the base directory for the images
image_dir = "/kaggle/input/demo-pics-2/Demo closet"

# Update the image file paths
df['Image File Path'] = df['Image File Name'].apply(lambda x: os.path.join(image_dir, x))

# Load the model
model_name = "nomic-ai/nomic-embed-text-v1"
model = SentenceTransformer(model_name, trust_remote_code=True, device='cpu')


In [None]:
# Function to generate embedding for a text
def generate_embedding(text):
    embedding = model.encode([text])[0]  # Generate embedding
    return embedding

# Combine relevant columns to form item description for embeddings
df['description'] = df[['Outfit Piece', 'Pattern', 'Color', 'Material', 'Season', 'Weather', 'Dress code']].apply(
    lambda row: ' '.join(row.values.astype(str)), axis=1
)

# Generate embeddings for each clothing item
df['embedding'] = df['description'].apply(lambda x: generate_embedding(x))

# Set Outfit Context based on 'Dress code'
df['Outfit Context'] = df['Dress code']


In [None]:

# Global variables for adjustable weights
embedding_weight = 0.8  # Adjustable weight for embedding similarity
context_weight = 10   # Adjustable weight for context similarity
# Calculate context similarity based on 'Dress code'
def calculate_context_similarity(item_context, other_context):
    """
    Apply a penalty if the contexts mismatch
    """
    penalty_factor = 0.7  # Deduct this factor for mismatched contexts
    return penalty_factor if item_context != other_context else 1.0

def check_for_inappropriate_pairing(top, bottom):
    # Ensure we access the values directly
    top_context = top['Outfit Context'] if isinstance(top, pd.Series) else top['Outfit Context'].iloc[0]
    bottom_context = bottom['Outfit Context'] if isinstance(bottom, pd.Series) else bottom['Outfit Context'].iloc[0]
    
    # Check for inappropriate pairing
    if top_context == 'formal' and bottom_context == 'casual':
        return -2  # Penalty for incompatible pairing
    return 0  # No penalty for compatible pair

# Random penalty for variety
def apply_random_penalty():
    if random.random() < 0.1:  # 10% chance of applying penalty
        return -0.5  # Random penalty
    return 0  # No penalty

In [None]:
# Select item with combined similarity bias (context + embedding similarity)
def select_with_combined_bias(df, reference_embedding, reference_context, category):
    category_df = df[df['Master Category'] == category]
    if category_df.empty:
        return pd.DataFrame()  # Return empty if no items in category
    
    category_embeddings = np.vstack(category_df['embedding'].values)
    embedding_similarities = cosine_similarity([reference_embedding], category_embeddings)[0]
    context_similarities = category_df['Outfit Context'].apply(lambda x: calculate_context_similarity(reference_context, x)).values
    
    # Combine similarities using global weights
    combined_similarities = (embedding_weight * embedding_similarities) + (context_weight * context_similarities)
    
    # Normalize combined similarities for valid probability distribution
    probabilities = combined_similarities / combined_similarities.sum()
    if np.isnan(probabilities).any() or probabilities.sum() == 0:
        probabilities = np.ones_like(probabilities) / len(probabilities)
    
    selected_item = category_df.sample(1, weights=probabilities)
    return selected_item

# Select random outfit ensuring a top is selected
def select_random_outfit(df):
    tops_df = df[df['Master Category'] == 'tops']
    if tops_df.empty:
        print("No tops available.")
        return pd.DataFrame()

    top_item = tops_df.sample(1)
    reference_embedding = top_item['embedding'].values[0]
    reference_context = top_item['Outfit Context'].values[0]

    remaining_items_df = df[df['Master Category'] != 'tops']
    other_items = remaining_items_df['Master Category'].unique()
    selected_items = [top_item]

    for category in other_items:
        selected_item = select_with_combined_bias(remaining_items_df, reference_embedding, reference_context, category)
        if not selected_item.empty:
            selected_items.append(selected_item)

    selected_items_df = pd.concat(selected_items, ignore_index=True)
    return selected_items_df

# Calculate similarity score of an outfit, considering global weights
def calculate_outfit_similarity(outfit):
    top_item = outfit[outfit['Master Category'] == 'tops']
    if top_item.empty:
        return 0
    
    top_embedding = top_item['embedding'].values[0]
    top_context = top_item['Outfit Context'].values[0]
    total_score = 0
    
    for index, item in outfit.iterrows():
        if item['Master Category'] != 'tops':
            embedding_similarity = cosine_similarity([top_embedding], [item['embedding']])[0][0]
            context_similarity = calculate_context_similarity(top_context, item['Outfit Context'])
            combined_similarity = (embedding_weight * embedding_similarity) + (context_weight * context_similarity)
            total_score += combined_similarity
            
            # Apply penalties
            total_score += check_for_inappropriate_pairing(top_item, item)
            total_score += apply_random_penalty()
    
    return total_score

# Generate, rank, and select the top outfits with a minimum similarity threshold
def generate_and_rank_outfits(df, num_outfits=10, min_similarity_score=27):
    generated_outfits = []

    # Continue generating outfits until we have at least 3 that meet the minimum score
    while len([outfit for outfit in generated_outfits if outfit['similarity_score'] >= min_similarity_score]) < 3:
        batch_outfits = []
        
        # Generate a batch of outfits
        for i in range(num_outfits):
            print(f"Generating outfit {i+1}/{num_outfits}")
            outfit = select_random_outfit(df)
            
            if not outfit.empty:
                similarity_score = calculate_outfit_similarity(outfit)
                
                # Only add outfits that meet the minimum similarity score threshold
                if similarity_score >= min_similarity_score:
                    batch_outfits.append({
                        'outfit': outfit,
                        'similarity_score': similarity_score
                    })

        # Add qualified outfits from this batch to the main list
        generated_outfits.extend(batch_outfits)

    # Filter to only those meeting the minimum similarity score
    qualified_outfits = [outfit for outfit in generated_outfits if outfit['similarity_score'] >= min_similarity_score]

    # Sort outfits by similarity score in descending order and return the top 3
    ranked_outfits = sorted(qualified_outfits, key=lambda x: x['similarity_score'], reverse=True)
    return ranked_outfits[:3]  # Ensure only the top 3 outfits are returned

# Display and print images for each outfit in top 3
def display_outfit_images(outfits):
    for idx, outfit_info in enumerate(outfits, 1):
        print(f"Displaying images for Rank {idx} - Similarity Score: {outfit_info['similarity_score']}")
        
        outfit = outfit_info['outfit']
        categories_order = ['tops', 'bottoms', 'footwear', 'accessories']
        images = []

        for category in categories_order:
            item = outfit[outfit['Master Category'] == category]
            if not item.empty:
                image_path = item['Image File Path'].values[0]
                try:
                    img = Image.open(image_path)
                    img = ImageOps.fit(img, (200, 200))  # Resize for uniform display
                    images.append(img)
                except Exception as e:
                    print(f"Could not open image {image_path}: {e}")

        if images:
            combined_image = Image.new('RGB', (200 * len(images), 200))
            for i, img in enumerate(images):
                combined_image.paste(img, (i * 200, 0))
            
            plt.figure(figsize=(10, 5))
            plt.imshow(combined_image)
            plt.axis('off')
            plt.title(f"Outfit for Rank {idx}")
            plt.show()

In [None]:
# Run the generation and selection loop
for m in range(1, 101):
    print("Iteration #", m)
    
    # Generate and rank outfits with global weights
    top_outfits = generate_and_rank_outfits(df, num_outfits=10)
    
    # Display images of top 3 outfits
    display_outfit_images(top_outfits)
    
    # Randomly select one final outfit from the top 3
    final_outfit = random.choice(top_outfits)['outfit']
    print("Final selected outfit:")
    print(final_outfit[['Master Category', 'Outfit Piece', 'Pattern', 'Color', 'Material', 'Season', 'Weather', 'Dress code']])
