In [230]:
import pandas as pd
import numpy as np
adidas = pd.read_csv('adidas.csv')

In [231]:

adidas_by_columns = adidas.columns
columns_categories = adidas.drop(['selling_price','average_rating','reviews_count'], axis=1).columns
columns_categories

Index(['url', 'name', 'sku', 'original_price', 'currency', 'availability',
       'color', 'category', 'source', 'source_website', 'breadcrumbs',
       'description', 'brand', 'images', 'country', 'language', 'crawled_at'],
      dtype='object')

In [232]:
valid_columns = []
many_levels_columns = []
one_level_column = []

umbral = .98
size_adidas = adidas.shape[0]

for c_ in columns_categories:
    levels = adidas.loc[:,c_].drop_duplicates().shape[0]
    
    if (levels/size_adidas)>=umbral:
        many_levels_columns.append(c_)
    elif levels == 1:
        one_level_column.append(c_)
    else:
        valid_columns.append(c_)
    

In [233]:
valid_columns,many_levels_columns,one_level_column

(['name',
  'original_price',
  'availability',
  'color',
  'category',
  'breadcrumbs',
  'description'],
 ['url', 'sku', 'images', 'crawled_at'],
 ['currency', 'source', 'source_website', 'brand', 'country', 'language'])

In [234]:
levels = adidas.groupby( ["color"],as_index=False).agg({"sku": ["count"] })
levels.columns = levels.columns.droplevel(1)
levels

Unnamed: 0,color,sku
0,Beige,6
1,Black,187
2,Blue,104
3,Brown,1
4,Burgundy,9
5,Gold,3
6,Green,59
7,Grey,81
8,Multi,4
9,Multicolor,20


In [235]:
#porcentaje de colores
levels["porcentage"] = levels["sku"]/adidas.shape[0]
levels.sort_values(["porcentage"], ascending=False)

Unnamed: 0,color,sku,porcentage
16,White,222,0.262722
1,Black,187,0.221302
2,Blue,104,0.123077
7,Grey,81,0.095858
11,Pink,62,0.073373
6,Green,59,0.069822
12,Purple,31,0.036686
13,Red,25,0.029586
9,Multicolor,20,0.023669
17,Yellow,17,0.020118


In [236]:
levels["color_t"] = "Others"
lambda_function = lambda r: r if r['porcentage']>0.9 else r['porcentage']
levels.apply(lambda_function, axis=1)
levels

Unnamed: 0,color,sku,porcentage,color_t
0,Beige,6,0.007101,Others
1,Black,187,0.221302,Others
2,Blue,104,0.123077,Others
3,Brown,1,0.001183,Others
4,Burgundy,9,0.010651,Others
5,Gold,3,0.00355,Others
6,Green,59,0.069822,Others
7,Grey,81,0.095858,Others
8,Multi,4,0.004734,Others
9,Multicolor,20,0.023669,Others


In [237]:
adidas = adidas.merge(levels, on=["color"], how="inner")
adidas

Unnamed: 0,url,name,sku_x,selling_price,original_price,currency,availability,color,category,source,...,brand,images,country,language,average_rating,reviews_count,crawled_at,sku_y,porcentage,color_t
0,https://www.adidas.com/us/beach-shorts/FJ5089....,Beach Shorts,FJ5089,40,,USD,InStock,Black,Clothing,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.5,35,2021-10-23 17:50:17.331255,187,0.221302,Others
1,https://www.adidas.com/us/five-ten-hiangle-pro...,Five Ten Hiangle Pro Competition Climbing Shoes,FV4744,160,,USD,InStock,Black,Shoes,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,3.7,7,2021-10-23 17:50:17.615054,187,0.221302,Others
2,https://www.adidas.com/us/adicross-hybrid-shor...,Adicross Hybrid Shorts,GM5505,80,,USD,InStock,Black,Clothing,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.5,17,2021-10-23 17:50:17.860269,187,0.221302,Others
3,https://www.adidas.com/us/tiro-21-windbreaker/...,Tiro 21 Windbreaker,GP4975,60,,USD,InStock,Black,Clothing,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.5,16,2021-10-23 17:50:17.942359,187,0.221302,Others
4,https://www.adidas.com/us/classic-3-stripes-sw...,Classic 3-Stripes Swimsuit,FS3923,40,,USD,InStock,Black,Clothing,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.5,35,2021-10-23 17:50:18.136849,187,0.221302,Others
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
840,https://www.adidas.com/us/aggies-long-sleeve-q...,Aggies Long Sleeve Quarter-Zip Knit Sweatshirt,H48154,60,$75,USD,InStock,Multi,Clothing,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,5.0,1,2021-10-23 17:52:26.097508,4,0.004734,Others
841,https://www.adidas.com/us/zx-1k-boost-shoes/FX...,ZX 1K Boost Shoes,FX6865,70,$100,USD,InStock,Turquoise,Shoes,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.2,77,2021-10-23 17:51:25.535763,2,0.002367,Others
842,https://www.adidas.com/us/ultraboost-dna-1.0-s...,Ultraboost DNA 1.0 Shoes,H05263,144,$180,USD,InStock,Turquoise,Shoes,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.8,584,2021-10-23 17:51:37.749001,2,0.002367,Others
843,https://www.adidas.com/us/gamemode-firm-ground...,Gamemode Firm Ground Cleats,GY7535,80,$100,USD,InStock,Silver,Shoes,adidas United States,...,adidas,"https://assets.adidas.com/images/w_600,f_auto,...",USA,en,4.7,24,2021-10-23 17:51:35.761152,1,0.001183,Others


In [238]:
levels.to_csv("color_categories.csv", index_label=False)