## Images preprocess
In this notebook will be created the corresponding dataframe with the all data needed for training purposes.

This is assuming the datasets created before.


In [11]:
# Libraries
import pandas as pd
import json
import cv2
import numpy as np

import warnings
warnings.filterwarnings('ignore')

In [30]:
# Functions
def get_routes_images(df_json):
    number_outfits = len(df_json)

    df_fashion = pd.DataFrame(columns=['Outfit','likes'])

    for i in range(number_outfits):
        df_fashion.loc[i, 'Outfit'] = df_json[i]['set_id']
        for j in range(len(df_json[i]['items'])):    
            df_fashion.loc[i, str(df_json[i]['items'][j]['categoryid'])] = df_json[i]['items'][j]['image']
        
        df_fashion.loc[i, 'likes'] = df_json[i]['likes']
        
    return df_fashion


### Main Flow

In [31]:
# Loading the dataset
train_path = '../Datasets/womens_fashion_defined.json'
with open(train_path) as file:
  womens_fashion_datasetLimited = json.load(file)

train_path = '../Datasets/womens_fashion_wholeDataSet.json'
with open(train_path) as file:
  womens_fashion_wholeDataset = json.load(file)

**Dataframe with the outfit information**

In [32]:
outfits_df_limited = get_routes_images(womens_fashion_datasetLimited)
display(outfits_df_limited.head(5))

Unnamed: 0,Outfit,likes,19,25,28,261,21,237,49,17,...,264,236,257,244,4517,255,256,309,239,15
0,199683429,14,../Images/199683429/1.jpg,../Images/199683429/2.jpg,../Images/199683429/3.jpg,../Images/199683429/4.jpg,,,,,...,,,,,,,,,,
1,122754559,8,,../Images/122754559/2.jpg,,,../Images/122754559/1.jpg,../Images/122754559/3.jpg,../Images/122754559/4.jpg,,...,,,,,,,,,,
2,208049788,452,,,,,../Images/208049788/1.jpg,,../Images/208049788/4.jpg,../Images/208049788/2.jpg,...,,,,,,,,,,
3,171294961,25,,,,../Images/171294961/4.jpg,,../Images/171294961/3.jpg,,,...,,,,,,,,,,
4,215569834,14,,,,,,,,,...,,,,,,,,,,


In [33]:
outfits_df_whole = get_routes_images(womens_fashion_wholeDataset)
display(outfits_df_whole.head(5))

Unnamed: 0,Outfit,likes,21,237,49,106,11,236,9,261,...,320,4481,152,4493,132,113,317,159,4488,4431
0,120161271,9,../Images/120161271/1.jpg,../Images/120161271/2.jpg,../Images/120161271/3.jpg,../Images/120161271/8.jpg,,,,,...,,,,,,,,,,
1,206969379,3,,,,,../Images/206969379/1.jpg,../Images/206969379/2.jpg,../Images/206969379/3.jpg,../Images/206969379/4.jpg,...,,,,,,,,,,
2,216220312,233,,,,../Images/216220312/5.jpg,,,,,...,,,,,,,,,,
3,213824660,492,,../Images/213824660/3.jpg,,,../Images/213824660/1.jpg,,,../Images/213824660/4.jpg,...,,,,,,,,,,
4,118117317,3979,,,,,,,,,...,,,,,,,,,,


In [34]:
print("Number of outfits for all women:",len(outfits_df_whole))
print("Number of limited outfits",len(outfits_df_limited))

Number of outfits for all women: 7661
Number of limited outfits 142


### Processing the images with Cv2

In [35]:
def prepare_images(image_path):
    """
    Resize the all images at 64x64 and normalize the values of each image.
    """
    nparr = cv2.imread(image_path, cv2.IMREAD_COLOR)
    im = cv2.resize(nparr, (64,64))
    im = im/255
    return im

In [40]:
list_clothes = outfits_df_limited.columns[2:]
outfits_df = outfits_df_limited.replace('nan', np.NaN)
for clothing in list_clothes:
    for j in range(len(outfits_df[clothing])):
        if not pd.isna(outfits_df[clothing][j]):
                # print(outfits_df[clothing][j])
                prepare_images(outfits_df[clothing][j])

### Are there enough data?
I will eliminate the outfits that correspond to those columns that only have a single piece of data.

In [42]:
for _ in range(2):
    list_clothes = outfits_df.columns[2:]
    categorias_eliminar = []
    print(list_clothes)
    for clothing in list_clothes:
        # Fit the model created
        features = []
        for i in range(len(outfits_df[clothing])):
            if not pd.isna(outfits_df[clothing][i]):
                features.append(prepare_images(outfits_df[clothing][i]))
        
        # Convert the list of images into a single NumPy array
        x_train = np.array(features)
        if len(x_train) <= 3:
            categorias_eliminar.append(clothing)
            print(clothing)
            print("Size of the outfit:",len(x_train))

    for i in categorias_eliminar:
        outfits_df = outfits_df[outfits_df[i].isnull()]

    for i in categorias_eliminar:
        outfits_df = outfits_df.drop(i, axis=1)

    outfits_df = outfits_df.reset_index(drop=True)
outfits_df

Index(['19', '25', '28', '261', '21', '237', '49', '17', '27', '104', '18',
       '4', '5', '2', '4495', '41', '29', '4496', '11', '23', '245', '52',
       '24', '7', '3', '241', '9', '8', '26', '253', '240', '6', '263', '262',
       '264', '236', '257', '244', '4517', '255', '256', '309', '239', '15'],
      dtype='object')
23
Size of the outfit: 2
245
Size of the outfit: 2
52
Size of the outfit: 2
3
Size of the outfit: 2
253
Size of the outfit: 2
6
Size of the outfit: 1
262
Size of the outfit: 2
264
Size of the outfit: 1
257
Size of the outfit: 2
244
Size of the outfit: 1
4517
Size of the outfit: 1
255
Size of the outfit: 2
256
Size of the outfit: 1
309
Size of the outfit: 1
239
Size of the outfit: 1
15
Size of the outfit: 1
Index(['19', '25', '28', '261', '21', '237', '49', '17', '27', '104', '18',
       '4', '5', '2', '4495', '41', '29', '4496', '11', '24', '7', '241', '9',
       '8', '26', '240', '263', '236'],
      dtype='object')
5
Size of the outfit: 3


Unnamed: 0,Outfit,likes,19,25,28,261,21,237,49,17,...,11,24,7,241,9,8,26,240,263,236
0,199683429,14,../Images/199683429/1.jpg,../Images/199683429/2.jpg,../Images/199683429/3.jpg,../Images/199683429/4.jpg,,,,,...,,,,,,,,,,
1,122754559,8,,../Images/122754559/2.jpg,,,../Images/122754559/1.jpg,../Images/122754559/3.jpg,../Images/122754559/4.jpg,,...,,,,,,,,,,
2,208049788,452,,,,,../Images/208049788/1.jpg,,../Images/208049788/4.jpg,../Images/208049788/2.jpg,...,,,,,,,,,,
3,171294961,25,,,,../Images/171294961/4.jpg,,../Images/171294961/3.jpg,,,...,,,,,,,,,,
4,203338168,16,,../Images/203338168/2.jpg,,,../Images/203338168/1.jpg,../Images/203338168/3.jpg,../Images/203338168/4.jpg,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,151982595,6,,../Images/151982595/2.jpg,,../Images/151982595/7.jpg,../Images/151982595/1.jpg,,,,...,,,,../Images/151982595/5.jpg,,../Images/151982595/4.jpg,,,,../Images/151982595/3.jpg
116,164701178,21,,,,,../Images/164701178/3.jpg,../Images/164701178/7.jpg,,,...,../Images/164701178/5.jpg,,,../Images/164701178/8.jpg,,,,,,
117,122342892,25,,../Images/122342892/2.jpg,,,,,../Images/122342892/4.jpg,,...,../Images/122342892/1.jpg,,,,,,,,,
118,112415841,372,,,,,,,../Images/112415841/4.jpg,,...,../Images/112415841/1.jpg,../Images/112415841/2.jpg,,,,,,,,


In [43]:
outfits_df.to_csv('../Datasets/outfits_training.csv', index=False)