In [2]:
!pip install sentence_transformers einops


Collecting sentence_transformers
  Downloading sentence_transformers-3.2.1-py3-none-any.whl.metadata (10 kB)
Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Downloading sentence_transformers-3.2.1-py3-none-any.whl (255 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m255.8/255.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops, sentence_transformers
Successfully installed einops-0.8.0 sentence_transformers-3.2.1


In [None]:
import os
import pandas as pd
import random
import numpy as np
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

# Load CSV file
df = pd.read_csv("/kaggle/input/demo-csv-2/Demo_captions_2.csv")

# Define the base directory for the images
image_dir = "/kaggle/input/demo-pics-2/Demo closet"

# Update the image file paths
df['Image File Path'] = df['Image File Name'].apply(lambda x: os.path.join(image_dir, x))

# Load the new model: nomic-ai/nomic-embed-text-v1
model_name = "nomic-ai/nomic-embed-text-v1"
model = SentenceTransformer(model_name, trust_remote_code=True, device='cpu')

# Function to generate embedding for a text
def generate_embedding(text):
    embedding = model.encode([text])[0]  # Generate embedding
    return embedding

# Combine relevant columns to form item description for embeddings
df['description'] = df[['Outfit Piece', 'Pattern', 'Color', 'Material', 'Season', 'Weather', 'Dress code']].apply(
    lambda row: ' '.join(row.values.astype(str)), axis=1
)

# Generate embeddings for each clothing item
df['embedding'] = df['description'].apply(lambda x: generate_embedding(x))

# Convert the embeddings to a matrix for similarity calculations
embedding_matrix = np.vstack(df['embedding'].values)

# Calculate cosine similarity matrix
similarity_matrix = cosine_similarity(embedding_matrix)

# Use 'Dress code' as the context for matching similar styles
df['Outfit Context'] = df['Dress code']


  from tqdm.autonotebook import tqdm, trange


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/128 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/70.9k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/2.03k [00:00<?, ?B/s]

configuration_hf_nomic_bert.py:   0%|          | 0.00/1.96k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- configuration_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling_hf_nomic_bert.py:   0%|          | 0.00/85.7k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/nomic-ai/nomic-bert-2048:
- modeling_hf_nomic_bert.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


pytorch_model.bin:   0%|          | 0.00/547M [00:00<?, ?B/s]

In [None]:
# Function to calculate context similarity based on 'Dress code'
def calculate_context_similarity(item_context, other_context):
    return 1.0 if item_context == other_context else 0.0  # Full similarity if contexts match, otherwise 0

# Function to select an item using combined similarity bias (context + embedding similarity)
def select_with_combined_bias(df, reference_embedding, reference_context, category):
    # Filter the items by category
    category_df = df[df['Master Category'] == category]
    
    if category_df.empty:
        return pd.DataFrame()  # Return empty if no items in category

    # Calculate cosine similarity between reference item and items in this category
    category_embeddings = np.vstack(category_df['embedding'].values)
    embedding_similarities = cosine_similarity([reference_embedding], category_embeddings)[0]

    # Calculate context similarity between reference item and items in this category
    context_similarities = category_df['Outfit Context'].apply(lambda x: calculate_context_similarity(reference_context, x)).values

    # Combine both similarities with a weighting factor
    embedding_weight = 0.3
    context_weight = 3
    combined_similarities = (embedding_weight * embedding_similarities) + (context_weight * context_similarities)

    # Normalize combined similarities for valid probability distribution
    probabilities = combined_similarities / combined_similarities.sum()

    # Sample an item based on the combined similarity weights
    selected_item = category_df.sample(1, weights=probabilities)

    return selected_item

# Function to select a random outfit ensuring a top is selected
def select_random_outfit(df):
    # Ensure at least one 'top' is selected
    tops_df = df[df['Master Category'] == 'tops']

    if tops_df.empty:
        print("No tops available.")
        return pd.DataFrame()

    # Select one top item
    top_item = tops_df.sample(1)

    # Reference embedding and context for similarity calculations
    reference_embedding = top_item['embedding'].values[0]
    reference_context = top_item['Outfit Context'].values[0]

    # Select other items based on similarity to the top (both embedding and context)
    remaining_items_df = df[df['Master Category'] != 'tops']
    other_items = remaining_items_df['Master Category'].unique()

    selected_items = [top_item]

    # Select one item per category, with bias towards similarity to the top item
    for category in other_items:
        selected_item = select_with_combined_bias(remaining_items_df, reference_embedding, reference_context, category)
        if not selected_item.empty:
            selected_items.append(selected_item)

    # Concatenate all selected items into a single DataFrame
    selected_items_df = pd.concat(selected_items, ignore_index=True)

    return selected_items_df

In [None]:
# Function to generate and save 100 outfits
def generate_and_save_outfits(df, iterations=100, save_file="/kaggle/working/generated_outfits.csv"):
    results = []

    for i in range(iterations):
        print(f"Generating outfit {i+1}/{iterations}")
        selected_outfit_items = select_random_outfit(df)
        
        if not selected_outfit_items.empty:
            for index, item in selected_outfit_items.iterrows():
                results.append({
                    'Iteration': i+1,
                    'Master Category': item['Master Category'],
                    'Outfit Piece': item['Outfit Piece'],
                    'Pattern': item['Pattern'],
                    'Color': item['Color'],
                    'Material': item['Material'],
                    'Season': item['Season'],
                    'Weather': item['Weather'],
                    'Dress Code': item['Dress code'],
                    'Image File Path': item['Image File Path'],
                    'Description': item['description']
                })
    
    # Convert results to DataFrame and save as CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv(save_file, index=False)
    print(f"{iterations} outfits have been generated and saved to '{save_file}'.")

# Function to load CSV and create combined outfit images, save them in a folder, and print them
def create_combined_outfit_image(csv_file, save_folder="/kaggle/working/outfit_images"):
    # Create a directory to store the combined outfit images if it doesn't exist
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    
    # Load the CSV file
    outfits_df = pd.read_csv(csv_file)
    
    # Group the outfits by 'Iteration' to combine top, bottom, footwear, and accessory
    grouped_outfits = outfits_df.groupby('Iteration')
    
    for iteration, group in grouped_outfits:
        # Initialize a list to hold images in the order: top, bottom, footwear, accessory
        images = []

        # Ensure the items are ordered in the expected way
        categories_order = ['tops', 'bottoms', 'footwear', 'accessories']

        for category in categories_order:
            item = group[group['Master Category'] == category]
            if not item.empty:
                image_path = item['Image File Path'].values[0]
                try:
                    img = Image.open(image_path)
                    img = ImageOps.fit(img, (200, 200))  # Resize each item to a fixed size (200x200 for example)
                    images.append(img)
                except Exception as e:
                    print(f"Could not open image {image_path}: {e}")
        
        # If all categories have been found, combine the images
        if len(images) == len(categories_order):
            combined_image = Image.new('RGB', (200 * len(images), 200))  # Combine images horizontally
            for i, img in enumerate(images):
                combined_image.paste(img, (i * 200, 0))
            
            # Save the combined image for this iteration in the specified folder
            combined_image_path = os.path.join(save_folder, f"outfit_combined_{iteration}.png")
            combined_image.save(combined_image_path)
            
            # Display the image inline using matplotlib
            plt.figure(figsize=(10, 5))
            plt.imshow(combined_image)
            plt.axis('off')  # Hide axes
            plt.title(f"Outfit for iteration {iteration}")
            plt.show()  # Display the image
            
            print(f"Outfit for iteration {iteration} saved as '{combined_image_path}'")
        else:
            print(f"Skipping iteration {iteration}: Missing categories.")


In [None]:
# Run the generation for 100 iterations and save the outfits to CSV
generate_and_save_outfits(df, iterations=100)

# Generate and display combined outfit images
create_combined_outfit_image("/kaggle/working/generated_outfits.csv", save_folder="/kaggle/working/outfit_images")