# My Encoding  
In this notebook, I implement my approach to creating outfit encodings based on outfit price and categories.

In [None]:
from google.colab import drive
drive.mount('/content/drive')
path='/content/drive/MyDrive/RecSys_206894495'

Mounted at /content/drive


In [None]:
!pip install pyarrow



In [None]:
#import
#warning
import warnings
warnings.filterwarnings('ignore')


#general
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

In [None]:
outfits=pd.read_parquet(path+'/archive/data/outfits.parquet',engine='pyarrow')
outfit_tags=pd.read_parquet(path+'/archive/data/outfit_tags.parquet',engine='pyarrow')
orders=pd.read_parquet(path+'/archive/data/orders.parquet',engine='pyarrow')

In [None]:
#normalized_pricePerMonth
outfits['normalized_pricePerMonth'] = (outfits['pricePerMonth'] - outfits['pricePerMonth'].min()) / (outfits['pricePerMonth'].max() - outfits['pricePerMonth'].min())
final_df=outfits[['id','normalized_pricePerMonth']]

In [None]:
# Find a row with the category 'Seasons'
seasons = outfit_tags[outfit_tags['category'] == 'Seasons']

In [None]:
seasons['tag'].unique()

array(['Spring', 'Summer', 'Multi Season', 'Winter', 'Fall'], dtype=object)

In [None]:
seasons.columns

Index(['id', 'tag', 'category'], dtype='object')

In [None]:
new_seasons = pd.DataFrame(seasons['id'].unique(), columns=['id'])

# Add columns for each season
seasons_list = ['Spring', 'Summer', 'Winter', 'Fall']
for season in seasons_list:
    new_seasons[season] = False

# Update the new data frame based on the tags
for index, row in seasons.iterrows():
    if row['tag'] == 'Multi Season':
        new_seasons.loc[new_seasons['id'] == row['id'], seasons_list] = True
    else:
        new_seasons.loc[new_seasons['id'] == row['id'], row['tag']] = True

final_df=pd.merge(final_df,new_seasons,on='id',how='left')

In [None]:
# Find a row with the category 'Brand'
brands = outfit_tags[outfit_tags['category'] == 'Brand']

new_brands = pd.DataFrame(brands['id'].unique(), columns=['id'])

# Add columns for each season
brand_list = brands['tag'].unique()
for i in brand_list:
    new_brands[i] = False

# Update the new data frame based on the tags
for index, row in brands.iterrows():
    new_brands.loc[new_brands['id'] == row['id'], row['tag']] = True

final_df=pd.merge(final_df,new_brands,on='id',how='left')

In [None]:
# Find a row with the category 'Category'
categories = outfit_tags[outfit_tags['category'] == 'Category']

new_df = pd.DataFrame(categories['id'].unique(), columns=['id'])

# Add columns for each season
l = categories['tag'].unique()
for i in l:
    new_df[i] = False

# Update the new data frame based on the tags
for index, row in categories.iterrows():
    new_df.loc[new_df['id'] == row['id'], row['tag']] = True

final_df=pd.merge(final_df,new_df,on='id',how='left')

In [None]:
# Find a row with the category 'Color'
filterd_df = outfit_tags[outfit_tags['category'] == 'Color']

new_df = pd.DataFrame(filterd_df['id'].unique(), columns=['id'])

# Add columns for each season
l = filterd_df['tag'].unique()
for i in l:
    new_df[i] = False

# Update the new data frame based on the tags
for index, row in filterd_df.iterrows():
    new_df.loc[new_df['id'] == row['id'], row['tag']] = True

final_df=pd.merge(final_df,new_df,on='id',how='left')

In [None]:
# Find a row with the category 'Fit'
filterd_df = outfit_tags[outfit_tags['category'] == 'Fit']

new_df = pd.DataFrame(filterd_df['id'].unique(), columns=['id'])

# Add columns for each season
l = filterd_df['tag'].unique()
for i in l:
    new_df[i] = False

# Update the new data frame based on the tags
for index, row in filterd_df.iterrows():
    new_df.loc[new_df['id'] == row['id'], row['tag']] = True

final_df=pd.merge(final_df,new_df,on='id',how='left')

In [None]:
# Find a row with the category 'Gender'
filterd_df = outfit_tags[outfit_tags['category'] == 'Gender']

new_df = pd.DataFrame(filterd_df['id'].unique(), columns=['id'])

# Add columns for each season
l = filterd_df['tag'].unique()
for i in l:
    new_df[i] = False

# Update the new data frame based on the tags
for index, row in filterd_df.iterrows():
    new_df.loc[new_df['id'] == row['id'], row['tag']] = True

final_df=pd.merge(final_df,new_df,on='id',how='left')

In [None]:
# Find a row with the category 'Material'
filterd_df = outfit_tags[outfit_tags['category'] == 'Material']

new_df = pd.DataFrame(filterd_df['id'].unique(), columns=['id'])

# Add columns for each season
l = filterd_df['tag'].unique()
for i in l:
    new_df[i] = False

# Update the new data frame based on the tags
for index, row in filterd_df.iterrows():
    new_df.loc[new_df['id'] == row['id'], row['tag']] = True

final_df=pd.merge(final_df,new_df,on='id',how='left')

In [None]:
# Find a row with the category 'Occasion'
filterd_df = outfit_tags[outfit_tags['category'] == 'Occasion']

new_df = pd.DataFrame(filterd_df['id'].unique(), columns=['id'])

# Add columns for each season
l = filterd_df['tag'].unique()
for i in l:
    new_df[i] = False

# Update the new data frame based on the tags
for index, row in filterd_df.iterrows():
    new_df.loc[new_df['id'] == row['id'], row['tag']] = True

final_df=pd.merge(final_df,new_df,on='id',how='left')

Size

In [None]:
# Find a row with the category 'Size'
filtered_df = outfit_tags[outfit_tags['category'] == 'Size']

#Quick conversion of the sizes
size_mapping = {
    '32': 1,   # XXS
    '34': 2,   # XS
    '36': 3,   # S
    '37': 4,   # M
    '38': 4,   # M
    '39': 5,   # L
    '40': 5,   # L
    '41': 5,   # L
    '42': 6,   # XL
    '44': 7,   # XXL
    '46': 8,   # 3XL
    '48': 9,   # 4XL
    '50': 10,  # 5XL
    'XXS': 1,
    'XS': 2,
    'S': 3,
    'M': 4,
    'L': 5,
    'XL': 6,
    'XXL': 7,
    '3XL': 8,
    '4XL': 9,
    '5XL': 10,
    'Onesize': 4
}

new_df = filtered_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
# Convert the sizes in the dataframe
new_df['tag'] = new_df['tag'].apply(lambda x: size_mapping.get(x, x))
new_df.rename(columns={'tag':'Size'},inplace=True)
final_df=pd.merge(final_df,new_df[['id','Size']],on='id',how='left')

https://wunderlabel.com/en-gb/blog/p/one-size-more-than-a-clothing-size-uk/

Length

In [None]:
# Find a row with the category 'Length'
filtered_df = outfit_tags[outfit_tags['category'] == 'Length']

In [None]:
#Quick conversion of the sizes
length_mapping = {
    'Mini':0,
    'Midi':0.5,
    'Maxi':1
}

new_df = filtered_df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
# Convert the sizes in the dataframe
new_df['tag'] = new_df['tag'].apply(lambda x: length_mapping.get(x, x))
new_df.rename(columns={'tag':'Length'},inplace=True)
final_df=pd.merge(final_df,new_df[['id','Length']],on='id',how='left')

count the number of time customer bought an outfit

pricePerMonth

In [None]:
final_df = final_df.applymap(lambda x: int(x) if isinstance(x, bool) else x)

In [None]:
final_df.fillna(0,inplace=True)

In [None]:
final_df.shape

(16037, 626)

In [None]:
final_df['id']=final_df['id'].astype('string')

In [None]:
orders= orders.merge(outfits[['id', 'group']],left_on='outfit.id' ,right_on='id', how='left').drop('id', axis=1)
count_pairs_outfit = orders.groupby(['outfit.id', 'customer.id']).size().reset_index(name='count')
count_pairs_group = orders.groupby(['group', 'customer.id']).size().reset_index(name='count')

count_pairs_outfit.merge(final_df, left_on='outfit.id', right_on='id', how='left').drop('id', axis=1).to_parquet(path+'/models/my_encoding_count_pairs_outfit.parquet', engine='pyarrow')

In [None]:
final_df.to_parquet(path+'/models/my_encoding.parquet',engine='pyarrow')