In [6]:
import pandas as pd
import numpy as np
import glob
from sklearn.metrics.pairwise import cosine_similarity
import ast
import re

In [7]:
def extract_2d_array(tensor_str):
    
    tensor_str = tensor_str.replace('tensor(','').replace('grad_fn=<TanhBackward0>)', '').replace(' ', '').replace('\n', '')
    tensor_str = tensor_str[:len(tensor_str)-1]
    numbers = eval(tensor_str)
    
    array = np.array(numbers).reshape((-1, 768))
    
    return array

def extract_2d_array_list(data_str):

    data_str = data_str.replace('\n', ' ')

    # Replace one or more spaces with a single comma
    data_str = re.sub(r'\s+', ',', data_str)

    # Ensure there are no extra commas at the end
    data_str = '[' + data_str.strip(',') + ']'

    # Evaluate the string safely
    numbers = ast.literal_eval(data_str)

    array = np.array(numbers).reshape((-1, 768))  
    return array


In [8]:
def mean_of_2d_arrays(arrays):
    stacked_arrays = np.vstack(arrays)
    return np.mean(stacked_arrays, axis=0).reshape(-1, 768)

def findMeanEmbeddings(df):

    # Group by the specified column and calculate the mean of 2D arrays
    result = df.groupby('sub_category')['embeddings'].apply(lambda x: mean_of_2d_arrays(x))

    # Convert the result to a DataFrame
    result_df = pd.DataFrame(result)

    # Reset the index if necessary
    result_df.reset_index(inplace=True)
    return result_df

In [9]:
def constructAverageDataframe(path):
    files = glob.glob(path)
    dfs = []
    for file in files:
        df = pd.read_csv(file)
        df = df.drop(columns=["Unnamed: 0", "Unnamed: 0.1", 'Unnamed: 0.2'], errors="ignore")
        dfs.append(df)
    
    df = pd.concat(dfs, ignore_index=True)
    df_unique = df.drop_duplicates(subset='name', keep='first')

    df_unique['embeddings'] = df_unique['embeddings'].apply(extract_2d_array)
    print(df_unique.shape)

    result_df = findMeanEmbeddings(df_unique)
    result_df.to_csv("average_embeddings.csv", index=False)

In [10]:
# path = "./final_csvs/*/*.csv"
# constructAverageDataframe(path)

In [11]:
def rankAvgDfBySimilarity(df, user_embedding):

    df['embeddings'] = df['embeddings'].apply(lambda x: x.flatten())
    embeddings_matrix = np.vstack(df['embeddings'].values)
    print(embeddings_matrix.shape)

    df['similarities'] = cosine_similarity(embeddings_matrix, user_embedding).flatten()
    return df.sort_values(by='similarities', ascending=False).head(5)

def getRandomEmbeddingVector(length):
    vector = np.random.rand(length)
    vector = 2 * vector - 1
    return vector.reshape(1, -1)

df = pd.read_csv("average_embeddings.csv")
df['embeddings'] = df['embeddings'].apply(extract_2d_array_list)

df = rankAvgDfBySimilarity(df,getRandomEmbeddingVector(768))
df

(24, 768)


Unnamed: 0,sub_category,embeddings,similarities
17,Shirts,"[-0.752649218, -0.472511213, -0.807391897, 0.7...",-0.02028
12,Innerwear,"[-0.73928861, -0.47255215, -0.80987534, 0.7168...",-0.023251
15,Lingerie & Nightwear,"[-0.75945913, -0.52629979, -0.87041634, 0.7896...",-0.024545
21,Sunglasses,"[-0.79422574, -0.53368691, -0.81616387, 0.7570...",-0.024793
19,Sports Shoes,"[-0.794324868, -0.535850741, -0.850778818, 0.7...",-0.025974


In [13]:
categories_list = df['sub_category']
print(categories_list)
hidden_df = pd.read_csv("dataset_truncated.csv")

final_df = []

for category in categories_list:
    print(category)
    df_filtered = hidden_df[hidden_df['sub_category'] == category]
    df_sampled = df_filtered.sample(n=2)
    final_df.append(df_sampled)

recommendations = pd.concat(final_df, ignore_index=True)

print(recommendations)

17                  Shirts
12               Innerwear
15    Lingerie & Nightwear
21              Sunglasses
19            Sports Shoes
Name: sub_category, dtype: object
Shirts
Innerwear
Lingerie & Nightwear
Sunglasses
Sports Shoes
                                                name     main_category  \
0  Destiny Fashion Men's Regular Fit Shirt (Black...    men's clothing   
1        Super weston Men's Regular Fit Formal Shirt    men's clothing   
2                                    GenX Men Trunks    men's clothing   
3  Bstar Vest Slim n Lift Tummy Tucker Body Shape...    men's clothing   
4  Amante Women Non Padded Non Wired Full Coverag...  women's clothing   
5  Jaanshi Women's Cotton Honeymoon Babydoll Ling...  women's clothing   
6  Le Specs AIR HEART 2102333 womens OATMEAL eyewear       accessories   
7  EYLRIM Polarized Heart Shape Sunglasses for Wo...       accessories   
8        Puma Unisex-Adult Electron 2.0 Walking Shoe       men's shoes   
9  Action Athleo ATG-498 Men'