In [None]:
import pandas as pd
import os

# Amount of models with 1 and 2 images

In [3]:
dirpath = '../datathon-fme-mango/archive/'

# Load the CSV file into a DataFrame
product_data = pd.read_csv(dirpath+'product_data.csv')

# Get the unique values from the 'cod_modelo_color' column
unique_values = product_data['cod_modelo_color'].unique()

# Path to the folder containing the images
image_folder = dirpath+'images/images/'

# Get a list of all files in the folder
image_files = os.listdir(image_folder)

# Initialize a list to store results
data = []

# Loop through each unique value and count matching files
for value in unique_values:
    count = sum(1 for file in image_files if value in file)
    data.append({'cod_modelo_color': value, 'file_count': count})

# Create a new DataFrame with the results
result_df = pd.DataFrame(data)

Unnamed: 0,file_count
count,33443.0
mean,1.838471
std,0.368023
min,1.0
25%,2.0
50%,2.0
75%,2.0
max,2.0


In [4]:
result_df['file_count'].value_counts()

file_count
2    28041
1     5402
Name: count, dtype: int64

# Amount of unique values per attribute

In [32]:
dirpath = '../datathon-fme-mango/archive/'

# Load the dataframe
attribute_data = pd.read_csv(dirpath+'attribute_data.csv')

# Group by 'attribute_name' and count the unique values of 'des_value'
result = (
    attribute_data.groupby('attribute_name')['des_value']
    .nunique()
    .reset_index(name='unique_des_value_count')
)
result2 = (
    attribute_data.groupby('attribute_name')['cod_value']
    .nunique()
    .reset_index(name='unique_des_value_count')
)

# Display the results
print(result)
print(result2)

        attribute_name  unique_des_value_count
0     cane_height_type                       6
1    closure_placement                       6
2      heel_shape_type                      11
3       knit_structure                       6
4          length_type                      12
5      neck_lapel_type                      33
6      silhouette_type                      33
7   sleeve_length_type                       6
8          toecap_type                       4
9           waist_type                       4
10     woven_structure                       4
        attribute_name  unique_des_value_count
0     cane_height_type                       3
1    closure_placement                       6
2      heel_shape_type                      11
3       knit_structure                       5
4          length_type                      12
5      neck_lapel_type                      33
6      silhouette_type                      33
7   sleeve_length_type                       6
8          to

# Which are those unique values

In [29]:
dirpath = '../datathon-fme-mango/archive/'
# Load the dataframe
attribute_data = pd.read_csv(dirpath+'attribute_data.csv')

# Group by 'attribute_name' and list unique values of 'des_value'
result = (
    attribute_data.groupby('attribute_name')['des_value']
    .apply(lambda x: list(x.unique()))
    .reset_index(name='unique_des_values')
)

# Display the results
print(result)

        attribute_name                                  unique_des_values
0     cane_height_type  [Cuña abotinada, Alta, Bloque, Cuña, Baja, Media]
1    closure_placement  [Cierre Delantero, Sin cierre, Cuello, Lateral...
2      heel_shape_type  [Kitten, Plano, Bloque, Embudo, Rectangular, P...
3       knit_structure  [Punto fino, Punto medio, Punto Grueso, Punto ...
4          length_type  [Crop, Medio, Largo, Standard, Corto, Midi, Mi...
5      neck_lapel_type  [Redondo, Pico, Regular, Caja, Chimenea, Perki...
6      silhouette_type  [Slim, Oversize, Recto, Regular, Evase, Slouch...
7   sleeve_length_type  [Larga, Corta, Sin Manga, Tirante Ancho, Tres ...
8          toecap_type            [Redonda, Abierta, Con punta, Cuadrada]
9           waist_type  [Ajustable/Goma, High Waist, Regular Waist, Lo...
10     woven_structure                  [Ligero, Medio, Pesado, Elástico]


In [30]:
# Optionally, save the results to a CSV file
result.to_csv('unique_des_values_by_attribute.csv', index=False)

# Check attributes for each category

In [25]:
import pandas as pd
dirpath = '../datathon-fme-mango/archive/'

# Load the CSV files
product_data = pd.read_csv(dirpath+'product_data.csv')
attribute_data = pd.read_csv(dirpath+'attribute_data.csv')

# Merge the DataFrames on the cod_modelo_color column
merged_data = pd.merge(product_data, attribute_data, on='cod_modelo_color', how='inner')

division_method = 'des_product_type'

# Group by product category and list unique attribute names
attributes_per_category = merged_data.groupby(division_method)['attribute_name'].unique()

# Check if all pieces of cloth in each category have the same attributes
category_consistency = {}
for category, group in merged_data.groupby(division_method):
    # Get a set of attributes for each cod_modelo_color
    attributes_per_model = group.groupby('cod_modelo_color')['attribute_name'].apply(lambda x: tuple(sorted(x.unique())))
    
    print("#########################")
    print(category)    
    print("#########################")
    print(attributes_per_model.unique())
    # Check if all sets are equal
    category_consistency[category] = len(attributes_per_model.unique()) == 1

# Print results
print("Attributes per Product Category:")
print(attributes_per_category)

print("\nCategory Consistency:")
for category, consistent in category_consistency.items():
    status = "consistent" if consistent else "not consistent"
    print(f"{category}: {status}")


#########################
Ankle Boots
#########################
[('cane_height_type',) ('heel_shape_type',)
 ('cane_height_type', 'heel_shape_type', 'toecap_type')
 ('heel_shape_type', 'toecap_type')
 ('cane_height_type', 'heel_shape_type')
 ('cane_height_type', 'toecap_type')]
#########################
Beach Shoes
#########################
[('heel_shape_type', 'toecap_type')]
#########################
Bermudas
#########################
[('silhouette_type',) ('waist_type',)
 ('silhouette_type', 'waist_type', 'woven_structure')
 ('knit_structure', 'silhouette_type', 'waist_type', 'woven_structure')
 ('silhouette_type', 'waist_type')
 ('knit_structure', 'silhouette_type', 'waist_type')
 ('waist_type', 'woven_structure') ('silhouette_type', 'woven_structure')]
#########################
Blazer
#########################
[('neck_lapel_type', 'silhouette_type') ('neck_lapel_type',)
 ('neck_lapel_type', 'silhouette_type', 'woven_structure')
 ('closure_placement', 'silhouette_type')
 ('length_t