In [None]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from collections import defaultdict

# Load user preferences and outfits data from CSV files
user_preferences_df = pd.read_csv('feedbackdatatype1.csv')
outfits_df = pd.read_csv('final_outfit_characteristics.csv')

In [None]:
user_preferences_df

In [None]:
outfits_df

In [None]:
pd.DataFrame(outfits_df.columns.values, columns = ["Variables"])

In [None]:
pd.DataFrame(user_preferences_df.columns.values, columns = ["Variables"])

In [None]:
user_preferences_df = user_preferences_df.rename(columns={'Age\n': 'Age'})
user_preferences_df = user_preferences_df.rename(columns={'Weather\n': 'Weather'})
user_preferences_df = user_preferences_df.rename(columns={'Preferred_Styles\n': 'Preferred_Styles'})
user_preferences_df = user_preferences_df.rename(columns={'Clothing_items\n': 'Clothing_items'})
user_preferences_df = user_preferences_df.rename(columns={'Patterns\n': 'Patterns'})
user_preferences_df = user_preferences_df.rename(columns={'Fabrics\n': 'Fabrics'})
user_preferences_df = user_preferences_df.rename(columns={'Top_Fit\n': 'Top_Fit'})
user_preferences_df = user_preferences_df.rename(columns={'Bottom_Fit\n': 'Bottom_Fit'})

In [None]:
user_preferences_df

In [None]:
pd.DataFrame(user_preferences_df.columns.values, columns = ["Variables"])

In [None]:
user_preferences_df["ID"].value_counts()

In [None]:
outfits_df["ID"].value_counts()

In [None]:
user_preferences_df["Gender"].value_counts()

In [None]:
outfits_df["Gender"].value_counts()

In [None]:
user_preferences_df["Age"].value_counts()

In [None]:
outfits_df["Age"].value_counts()

In [None]:
user_preferences_df["Work_Attire"].value_counts()

In [None]:
outfits_df["Work_Attire"].value_counts()

In [None]:
user_preferences_df["Weather"].value_counts()

In [None]:
outfits_df["Weather"].value_counts()

In [None]:
user_preferences_df["Casual"].value_counts()

In [None]:
outfits_df["Casual"].value_counts()

In [None]:
user_preferences_df["Preferred_Styles"].value_counts()

In [None]:
outfits_df["Preferred_Styles"].value_counts()

In [None]:
user_preferences_df["Clothing_items"].value_counts()

In [None]:
outfits_df["Clothing_items"].value_counts()

In [None]:
user_preferences_df["Bottom_Type"].value_counts()

In [None]:
outfits_df["Bottom_Type"].value_counts()

In [None]:
user_preferences_df["Top_Type"].value_counts()

In [None]:
outfits_df["Top_Type"].value_counts()

In [None]:
user_preferences_df["Shoe_Type"].value_counts()

In [None]:
outfits_df["Shoe_Type"].value_counts()

In [None]:
user_preferences_df["Colors"].value_counts()

In [None]:
outfits_df["Colors"].value_counts()

In [None]:
user_preferences_df["Patterns"].value_counts()

In [None]:
outfits_df["Patterns"].value_counts()

In [None]:
user_preferences_df["Fabrics"].value_counts()

In [None]:
outfits_df["Fabrics"].value_counts()

In [None]:
user_preferences_df["Top_Fit"].value_counts()

In [None]:
outfits_df["Top_Fit"].value_counts()

In [None]:
user_preferences_df["Bottom_Fit"].value_counts()

In [None]:
outfits_df["Bottom_Fit"].value_counts()

In [None]:
# Ensure both dataframes have the same columns before concatenating
if list(user_preferences_df.columns) != list(outfits_df.columns):
    raise ValueError("The columns of user preferences and outfits dataframes do not match.")

# Convert all columns to strings to ensure uniform data type
user_preferences_df = user_preferences_df.astype(str)
outfits_df = outfits_df.astype(str)

# Define a threshold for matching characteristics
threshold = 11  # At least 11 matching characteristics out of 16

In [None]:
def hasMatch(userValue, outfitValue):
        # Check if both values are strings
    if type(userValue) != str or type(outfitValue) != str:
        return False
    
    # Split values into attributes and create sets
    userValue = getSplitAttributes(userValue)
    outfitValue = getSplitAttributes(outfitValue)

    # Check if there is any intersection between user and outfit attributes
    return len(userValue.intersection(outfitValue)) > 0

def getSplitAttributes(values):
    # Split the values by delimiter ";" and create a set of attributes
    return set([v.strip() for v in values.split(";")])

# Match user preferences to outfits
user_outfit_matches = []

# Dictionary to store matched outfits for each user
matchedOutfits = defaultdict(list)

# Set of attributes in outfits dataframe (excluding ID)
outfitAttributes = set(outfits_df.columns.values)
outfitAttributes.remove("ID")

# Iterate over each user and their preferences
for userIndex, user in user_preferences_df.iterrows():
    # Get user's gender for filtering outfits
    userGender = user['Gender']
    
    # Iterate over each outfit to find matches for the user
    for outfitIndex, outfitData in outfits_df.iterrows():
        # Check if outfit gender matches user's gender
        if outfitData['Gender'] != userGender:
            continue
        
        # Initialize counter for matched attributes
        matchedAttributeCt = 0
        
        # Iterate over each attribute in the outfit
        for attribute in outfitAttributes:
            # Get attribute values for user and outfit
            outfitAttrData = outfitData[attribute]
            userAttrData = user[attribute]
            
            # Check if there is a match between user and outfit attributes
            if hasMatch(outfitAttrData, userAttrData):
                matchedAttributeCt += 1
         
        # If number of matched attributes exceeds threshold, add outfit to user's matches
        if matchedAttributeCt >= threshold:
            matchedOutfits[userIndex].append(outfitData["ID"])
    
    # Count and print the number of matched outfits for each user
    matchedOutfitCt = len(matchedOutfits[userIndex])
    print(f"User {userIndex + 1} matched with {matchedOutfitCt} outfits")

    # Print the matched outfits for each user    
for userIndex in sorted(matchedOutfits.keys()):
    print(f"User {userIndex + 1} outfit matches: {matchedOutfits[userIndex]}")

In [None]:
from tabulate import tabulate

# Prepare data for table format
table_data = []
for userIndex in sorted(matchedOutfits.keys()):
    table_data.append([userIndex + 1, len(matchedOutfits[userIndex]), ", ".join(matchedOutfits[userIndex])])

# Print the table
headers = ["User Index", "Number of Matches", "Matched Outfits"]
print(tabulate(table_data, headers=headers, tablefmt="pretty"))

In [None]:
# Prepare data for table format
table_data = []
for userIndex in sorted(matchedOutfits.keys()):
    table_data.append([f"User {userIndex + 1}", f"{len(matchedOutfits[userIndex])} matched outfits"])

# Print the table
headers = ["User", "Number of Matched Outfits"]
print(tabulate(table_data, headers=headers, tablefmt="pretty"))

In [None]:
# Prepare data for table format
table_data = []
for userIndex in sorted(matchedOutfits.keys()):
    user_id = f"User {userIndex + 1}"
    matched_outfit_ct = f"{len(matchedOutfits[userIndex])} matched outfits"
    matched_outfit_list = ", ".join(matchedOutfits[userIndex])
    table_data.append([user_id, matched_outfit_ct, matched_outfit_list])

# Print the table
headers = ["User", "Number of Matched Outfits", "Matched Outfits"]
print(tabulate(table_data, headers=headers, tablefmt="pretty"))

In [None]:
so that it looks like this, matching the following header:` Header: ID,Gender,Age,Work_Attire,Weather,Casual,Preferred_Styles,Clothing_items,Bottom_Type,Top_Type,Shoe_Type,Colors,Patterns,Fabrics,Top_Fit,Bottom_Fit

row1: 2,Man,Adult,Semi-Formal;,Cold;,Often,Formal; Casual;,T-shirts; Shirts; Jeans; Pants;,Trousers;,T-shirts;,Boots;,Neutral; Warm;,Plain;,Cotton; Denim; Silk; Faux Fur; Leather; Wool; Faux Leather;,Loose;,Loose;