In [6]:
import json
import pandas as pd
import shutil
import os

In [2]:
def move_image(source_path, destination_folder):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    
    image_name = os.path.basename(source_path)
    
    destination_path = os.path.join(destination_folder, image_name)
    
    try:
        shutil.move(source_path, destination_path)
    except FileNotFoundError:
        print(source_path)

In [4]:
def extract_lat_labels_from_json(json_path):
    with open(json_path, "r") as file:
        data = json.load(file)
    
    labels = {}
    for item in data:
        for img_id in item['question']:
            category, img_num = img_id.split('_')
            labels[img_num] = category
        for img_id in item['answers']:
            category, img_num = img_id.split('_')
            labels[img_num] = category

    label_to_int = {'Neckline': 0, 'Bracelet': 0, 'Earing': 0, 'Hat': 0, 'Bags':1, 'Skirt':2, 'Dress':2, 'Pants':3, 
                   'Shoes':4, 'Sunglasses': 5, 'Top':6, 'Outwear':6, 'Watches':7}
    

    int_labels = {img_num: label_to_int[label] for img_num, label in labels.items()}
    
    return int_labels

In [7]:
lat_label_path = 'C:/Users/FranciscoX/Desktop/fashion-dataset/LAT/label/LAT.json'
lat_path = 'C:/Users/FranciscoX/Desktop/fashion-dataset/LAT/image'
lat_labels = extract_labels_from_json(lat_label_path)

In [32]:
df1 = pd.DataFrame(list(lat_labels.items()), columns=['id', 'label'])

In [49]:
def get_cat(num):
    if num == 1:
        return 0
    elif num in [2,3,6]:
        return 1
    elif num in [0,5,7]:
        return 2
    else:
        return 3

In [54]:
df1['category'] =  df1['label'].apply(get_cat)
df1 = df1.reindex(columns=['id', 'category', 'label'])

In [81]:
def extract_aat_labels_from_json(json_path):
    with open(json_path, "r") as file:
        data = json.load(file)
    
    labels = {}
    for item in data:
        for img_id in item['question']:
            category, img_num = img_id.split('_')
            labels[img_num] = category
        for img_id in item['answers']:
            category, img_num = img_id.split('_')
            labels[img_num] = category

    for keys in labels:
        labels[keys] = labels[keys].split('/')
        if labels[keys][0] in ['Bags', 'Shoes']:
            labels[keys] = labels[keys][0]
        elif labels[keys][1] in ['Outwear', 'Tops']:
            labels[keys] = 'Tops'
        elif labels[keys][1] in ['Trousers', 'Jumpsuits']:
            labels[keys] = 'Pants'
        elif labels[keys][1] in ['Skirts', 'Dresses']:
            labels[keys] = 'Dress'
        elif labels[keys][1] == 'Watches':
            labels[keys] = 'Watches'
        elif labels[keys][1] == 'Eyewears':
            labels[keys] = 'Sunglasses'
        else:
            labels[keys] = labels[keys][0]
    
    label_to_int = {'Accessories': 0, 'Bags': 1, 'Dress': 2, 'Pants': 3, 
                    'Shoes': 4, 'Sunglasses': 5, 'Tops': 6, 'Watches': 7}
    int_labels = {img_num: label_to_int[label] for img_num, label in labels.items()}
    
    return int_labels

In [82]:
aat_label_path = 'C:/Users/FranciscoX/Desktop/fashion-dataset/AAT/label/AAT.json'
aat_path = 'C:/Users/FranciscoX/Desktop/fashion-dataset/AAT/image'
aat_labels = extract_aat_labels_from_json(aat_label_path)

In [84]:
df2 = pd.DataFrame(list(aat_labels.items()), columns=['id', 'label'])
df2['category'] =  df2['label'].apply(get_cat)
df2 = df2.reindex(columns=['id', 'category', 'label'])

In [86]:
total_tag = pd.concat([df1, df2], ignore_index= False)

In [87]:
total_tag

Unnamed: 0,id,category,label
0,P00462138,1,3
1,P00447042,3,4
2,P00462123,1,6
3,P00425745,0,1
4,P00440101,1,6
...,...,...,...
978,100A01,2,0
979,100A03,2,0
980,100A10,2,0
981,100A02,2,0


In [89]:
total_tag.to_csv('a100_tag.csv', index = False)

In [90]:
for i in range(len(df1)):
    image_path = 'C:/Users/FranciscoX/Desktop/fashion-dataset/LAT/image/' + str(df1.iloc[i, 0]) + '.jpg'
    move_image(image_path, 'C:/Users/FranciscoX/Desktop/fashion-dataset/a100/labelled')

In [92]:
for i in range(len(df2)):
    image_path = 'C:/Users/FranciscoX/Desktop/fashion-dataset/AAT/image/' + str(df2.iloc[i, 0]) + '.jpg'
    move_image(image_path, 'C:/Users/FranciscoX/Desktop/fashion-dataset/a100/labelled')

In [93]:
total_tag['category'].value_counts()

category
1    639
2    421
3    314
0    289
Name: count, dtype: int64

In [94]:
total_tag['label'].value_counts()

label
0    371
4    314
6    296
1    289
2    215
3    128
7     43
5      7
Name: count, dtype: int64