In [1]:
import requests
from bs4 import BeautifulSoup
import string
from datetime import datetime
import pandas as pd
import json



In [2]:
pd.set_option('display.max_rows', None)

In [3]:
# Woman's page
woman = "https://ecoalf.com/en/women-100"

In [4]:
def get_category(url):
    output_list = list()
    page = BeautifulSoup(requests.get(url).content,'lxml')
    # Find all the div classes associated with a clothes category
    category_class = page.find_all('div',class_="elementor-iqit-banner")
    
    # Create an empty category dictionary to store category name and link
    category_dict = {}
    for item in category_class:
        try:
            # Retrieve category name
            c = item.find('div').get_text()
        except:
            c = None
        # Retrieve category link
        l = item.find('a').get('href')
        if "de" in l:
            l = l.replace("de", "en")

        # Store the name-link pair in category dictionary
        category_dict[str(c)]=l
    
    # Loop through each category in the dictionary
    for category, link in category_dict.items():
        
        # Run get_product function on each category page to retrieve individual product
        lst = get_product(category,link)
        
        # Append to output list
        output_list.extend(lst)
        
    # Create dataframe
    df = pd.DataFrame(output_list, columns =['display_name','product_material','color','size','price','product_url','image_link_color','brand_name','description','scrapped_date','low_level','gender','secondhand'])
    return df


In [5]:
def get_product(category, category_link):
    page = BeautifulSoup(requests.get(category_link).content,'lxml')
    # Find all the div classes associated with a product
    product_class = page.find_all('div',class_="thumbnail-container")
    
    # Create an empty list to store products
    products = []
    
    # Create an empty category dictionary to store product name and link
    product_dic = {}
    for item in product_class:
        try:
            # Retrieve product name
            p = item.find("img").get("alt")
        except:
            p = None
        # Retrieve product link
        l = item.find('a').get('href')
        if "women" in l or "woman" in l:
            # Store the name-link pair in category dictionary
            product_dic[str(p)]=l
    
    # Loop through each product in the dictionary
    for product, link in product_dic.items():
        
        # Store brand_name
        brand_name = "ecoalf"
        # Store scrapped_date
        scrapped_date = datetime.today().strftime('%Y-%m-%d')
        # Store gender
        gender = "Woman"
        # Store secondhand
        secondhand = "No"
        
        # Run get_details function on each product page to retrieve details on each individual product
        product_material, color, image_link_color, size, price, description = get_details(link)
        
        # Append the new product to the products list
        products.append([product,product_material,color,size,price,link,image_link_color,brand_name,description,scrapped_date,category,gender,secondhand])
    
    return products
   

In [6]:
def get_details(product_link):
    page = BeautifulSoup(requests.get(product_link).content,'lxml')
    
    # Retrieve product materials
    details = page.find('div', class_="product-description-short rte-content").find_all('li')
    # Create an empty product_material dictionary
    product_material = {}
    for item in details:
        text = item.get_text()
        if "Main fabric" in text:
            materials = text.strip().split("Main fabric: ")[1].split(" / ")
            for item in materials:
                pct = item.split('%')[0]
                pct = f'{pct}%'
                mtr = item.split('%')[1]
                product_material[str(mtr)]=pct
    product_material = json.dumps(product_material)
    
    # Create an empty color list
    color = []
    # Create an empty image_link_color dictionary
    image_link_color = {}
    # Retrieve available colors if multiple colors are offered
    try:
        color_class = page.find('div', class_="block-content d-flex justify-content-center justify-content-md-start mb-2 mt-3").find_all('a')
        for item in color_class:
            link = item.get('href')
            image = BeautifulSoup(requests.get(link).content,'lxml').find('div', class_="product-lmage-large swiper-slide").find('a').get('href')
            c = item.get('title').strip()
            # Store available colors in color list
            color.append(c)
            # Store color-link pairs in image_link_color dictionary
            image_link_color[str(c)]=image
            
    # Retrieve single color if only single color is offered
    except:
        c = page.find('div',class_="nomcolor text-capitalize mb-4 text-center text-md-left").get_text()
        color.append(c)
        try:
            image = page.find('div', class_="product-lmage-large swiper-slide").find('a').get('href')
            image_link_color[str(c)]=image
        except:
            image_link_color[str(c)]=None
    color = json.dumps(color)
    image_link_color = json.dumps(image_link_color)
    
        
    # Retrieve available sizes
    size_class = page.find_all('li',class_="input-container float-left")
    # Create an empty size list
    size = []
    for item in size_class:
        s = item.find('span').get_text()
        size.append(s)
    size = json.dumps(size)
        
    # Retrieve price
    price = page.find('span', class_="product-price").get_text()
    price = price.split('$')[1]
    price = f'USD {price}'
    
    # Retrieve description
    description = page.find('div',class_="product-description text-left").find('div',class_="rte-content").get_text().strip()
        
    return product_material, color, image_link_color, size, price, description

In [7]:
df_woman = get_category(woman)

In [8]:
df_woman

Unnamed: 0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,secondhand
0,Mandu Jacket Woman,"{"" Polyester"": ""49%"", "" S.CAF\u00c9\u00ae Recy...","[""offwhite"", ""oldmustard"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 575.00,https://ecoalf.com/en/women/2120302-mandu-jack...,"{""offwhite"": ""https://ecoalf.com/26366-thickbo...",ecoalf,A 3-in-1 jacket with an outer shell that is wa...,2021-11-03,Jackets and coats,Woman,No
1,Lenox Long Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""darkbronze"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 429.90,https://ecoalf.com/en/women/2120291-lenoxalf-l...,"{""armygreen"": ""https://ecoalf.com/26343-thickb...",ecoalf,The Lenox coat is made with recycled polyester...,2021-11-03,Jackets and coats,Woman,No
2,Asp Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""deepnavy"", ""oldmustard"",...","[""XS"", ""S"", ""M"", ""L""]",USD 275.00,https://ecoalf.com/en/women/2120227-asp-jacket...,"{""armygreen"": ""https://ecoalf.com/27548-thickb...",ecoalf,The Asp jacket is made from recycled polyester...,2021-11-03,Jackets and coats,Woman,No
3,Marangu Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""whitesand"", ""deepforest"", ""armygreen"", ""deep...","[""XS"", ""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 375.00,https://ecoalf.com/en/women/2120309-marangu-ja...,"{""whitesand"": ""https://ecoalf.com/26390-thickb...",ecoalf,The three-quarter MARANGU coat is made from re...,2021-11-03,Jackets and coats,Woman,No
4,Amu New Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""darkbronze"", ""asphalt""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 375.00,https://ecoalf.com/en/women/2120202-amu-new-ja...,"{""armygreen"": ""https://ecoalf.com/27536-thickb...",ecoalf,"The Amu jacket is a must-have this winter, com...",2021-11-03,Jackets and coats,Woman,No
5,Gedre Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""darkpurple"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 315.00,https://ecoalf.com/en/women/2120261-gedre-jack...,"{""armygreen"": ""https://ecoalf.com/26301-thickb...",ecoalf,Look after the Planet and yourself while weari...,2021-11-03,Jackets and coats,Woman,No
6,Glacier Jacket Woman,"{"" Recycled polyester (UTO)"": ""60%"", "" Recycle...","[""antartica"", ""armygreen"", ""deepnavy"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 685.00,https://ecoalf.com/en/women/2120266-glacier-ja...,"{""antartica"": ""https://ecoalf.com/26313-thickb...",ecoalf,With the Glacier jacket rainy days will be fun...,2021-11-03,Jackets and coats,Woman,No
7,Marangu Vest Woman,"{""\u00a0Recycled polyester"": ""100%""}","[""whitesand"", ""armygreen"", ""black""]","[""S"", ""L""]",USD 289.90,https://ecoalf.com/en/women/2120314-marangu-ve...,"{""whitesand"": ""https://ecoalf.com/24920-thickb...",ecoalf,"Marangu padded vest, made from recycled polyes...",2021-11-03,Jackets and coats,Woman,No
8,Mulhacen Woman Jacket,"{"" Recycled polyester (UTO)"": ""60%"", "" Recycle...","[""armygreen"", ""ash"", ""deepnavy"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 505.00,https://ecoalf.com/en/women/2120321-mulhacen-w...,"{""armygreen"": ""https://ecoalf.com/26443-thickb...",ecoalf,The Mulhacen jacket will allow you to enjoy yo...,2021-11-03,Jackets and coats,Woman,No
9,Noir Reversible Oversize...,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""darkbronze"", ""rosewood""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 399.90,https://ecoalf.com/en/women/2120325-noir-rever...,"{""armygreen"": ""https://ecoalf.com/25145-thickb...",ecoalf,The Noir jacket is made from recycled polyeste...,2021-11-03,Jackets and coats,Woman,No


In [9]:
# Man's page
man = "https://ecoalf.com/en/men-200"

In [10]:
def get_product(category, category_link):
    page = BeautifulSoup(requests.get(category_link).content,'lxml')
    # Find all the div classes associated with a product
    product_class = page.find_all('div',class_="thumbnail-container")
    
    # Create an empty list to store products
    products = []
    
    # Create an empty category dictionary to store product name and link
    product_dic = {}
    for item in product_class:
        try:
            # Retrieve product name
            p = item.find("img").get("alt")
        except:
            p = None
        # Retrieve product link
        l = item.find('a').get('href')
        if "men" in l or "man" in l:
            # Store the name-link pair in category dictionary
            product_dic[str(p)]=l
    
    # Loop through each product in the dictionary
    for product, link in product_dic.items():
        
        # Store brand_name
        brand_name = "ecoalf"
        # Store scrapped_date
        scrapped_date = datetime.today().strftime('%Y-%m-%d')
        # Store gender
        gender = "Man"
        # Store secondhand
        secondhand = "No"
        
        # Run get_details function on each product page to retrieve details on each individual product
        product_material, color, image_link_color, size, price, description = get_details(link)
        
        # Append the new product to the products list
        products.append([product,product_material,color,size,price,link,image_link_color,brand_name,description,scrapped_date,category,gender,secondhand])
    
    return products
   

In [11]:
df_man = get_category(man)

In [None]:
df_man

Unnamed: 0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,secondhand
0,Asp Jacket Man,"{"" Recycled polyester"": ""100%""}","[""offwhite"", ""oldgold"", ""darkkhaki"", ""armygree...","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 275.00,https://ecoalf.com/en/men/2120221-asp-jacket-man,"{""offwhite"": ""https://ecoalf.com/26708-thickbo...",ecoalf,The Asp jacket is made from recycled polyester...,2021-10-18,jackets ANDcoats,Man,No
1,Armada Jacket Man,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae....","[""khaki"", ""midnightnavy"", ""black""]","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 519.90,https://ecoalf.com/en/men/2120203-armada-jacke...,"{""khaki"": ""https://ecoalf.com/26689-thickbox_d...",ecoalf,A basic garment that’s an absolute must-have t...,2021-10-18,jackets ANDcoats,Man,No
2,Beret Jacket Man,"{"" Recycled polyester"": ""100%""}","[""darkkhaki"", ""olive"", ""deepnavy"", ""steelblue""...","[""S"", ""M"", ""L"", ""XL""]",USD 259.90,https://ecoalf.com/en/men/2120240-beret-jacket...,"{""darkkhaki"": ""https://ecoalf.com/26784-thickb...",ecoalf,Beret is a thin but warm padded jacket. It com...,2021-10-18,jackets ANDcoats,Man,No
3,Baza Jacket Man,"{"" Recycled polyester"": ""100%""}","[""olive"", ""deepnavy"", ""darkbronze"", ""black""]","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 325.00,https://ecoalf.com/en/men/2120233-baza-jacket-man,"{""olive"": ""https://ecoalf.com/26767-thickbox_d...",ecoalf,"The Baza jacket is warm, with an elegant cut, ...",2021-10-18,jackets ANDcoats,Man,No
4,Cardiff Vest Man,"{"" Recycled polyester"": ""100%""}","[""offwhite"", ""darkkhaki"", ""olive"", ""deepnavy"",...","[""XS"", ""M"", ""L"", ""XL"", ""XXL""]",USD 215.00,https://ecoalf.com/en/men/2120254-cardiff-vest...,"{""offwhite"": ""https://ecoalf.com/26880-thickbo...",ecoalf,The Cardiff vest is made 100% from recycled po...,2021-10-18,jackets ANDcoats,Man,No
5,Ampuero Jacket Man,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae....","[""khaki"", ""midnightnavy"", ""black""]","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 575.00,https://ecoalf.com/en/men/2120196-ampuero-jack...,"{""khaki"": ""https://ecoalf.com/26665-thickbox_d...",ecoalf,"Lightweight and very comfortable, as it is mad...",2021-10-18,jackets ANDcoats,Man,No
6,Cartes Jacket Man,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae....","[""armygreen"", ""midnightnavy"", ""red"", ""caviar""]","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 289.90,https://ecoalf.com/en/men/2120257-cartes-jacke...,"{""armygreen"": ""https://ecoalf.com/26961-thickb...",ecoalf,This sporty-style jacket will be your perfect ...,2021-10-18,jackets ANDcoats,Man,No
7,Charco Raincoat Man,"{"" Polyester"": ""56%"", "" Recycled polyester"": ""...","[""graphite""]","[""S"", ""M"", ""L""]",USD 245.00,https://ecoalf.com/en/men/2120260-charco-jacke...,"{""graphite"": ""https://ecoalf.com/26995-thickbo...",ecoalf,The Charco jacket is made from polyester and r...,2021-10-18,jackets ANDcoats,Man,No
8,Hampshire Long Jacket Man,"{"" Polyester"": ""56%"", "" Recycled polyester"": ""...","[""tobacco"", ""deepnavy"", ""graphite""]","[""S"", ""M"", ""L"", ""XXL""]",USD 399.90,https://ecoalf.com/en/men/2120271-hampshire-lo...,"{""tobacco"": ""https://ecoalf.com/27466-thickbox...",ecoalf,"The Hampshire trench coat is a timeless piece,...",2021-10-18,jackets ANDcoats,Man,No
9,Livorno Jacket Man,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae....","[""koreangreen"", ""deepnavy"", ""black""]","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 535.00,https://ecoalf.com/en/men/2120294-livorno-jack...,"{""koreangreen"": ""https://ecoalf.com/27136-thic...",ecoalf,"Classic in style, the Livorno jacket will keep...",2021-10-18,jackets ANDcoats,Man,No


In [None]:
# Kids' page

In [None]:
kids = "https://ecoalf.com/en/kids-300"

In [None]:
def get_product(category, category_link):
    page = BeautifulSoup(requests.get(category_link).content,'lxml')
    # Find all the div classes associated with a product
    product_class = page.find_all('div',class_="thumbnail-container")
    
    # Create an empty list to store products
    products = []
    
    # Create an empty category dictionary to store product name and link
    product_dic = {}
    for item in product_class:
        try:
            # Retrieve product name
            p = item.find("img").get("alt")
        except:
            p = None
        # Retrieve product link
        l = item.find('a').get('href')
        if "kids" in l:
            # Store the name-link pair in category dictionary
            product_dic[str(p)]=l
    
    # Loop through each product in the dictionary
    for product, link in product_dic.items():
        
        # Store brand_name
        brand_name = "ecoalf"
        # Store scrapped_date
        scrapped_date = datetime.today().strftime('%Y-%m-%d')
        # Store gender
        gender = "Kids"
        # Store secondhand
        secondhand = "No"
        
        # Run get_details function on each product page to retrieve details on each individual product
        product_material, color, image_link_color, size, price, description = get_details(link)
        
        # Append the new product to the products list
        products.append([product,product_material,color,size,price,link,image_link_color,brand_name,description,scrapped_date,category,gender,secondhand])
    
    return products
   

In [None]:
df_kids = get_category(kids)

In [None]:
df_kids

Unnamed: 0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,secondhand
0,Asp Jacket Kids,"{"" Recycled polyester"": ""100%""}","[""lightmauve"", ""olive"", ""deepnavy"", ""shinyyell...","[""6"", ""8"", ""10"", ""12"", ""14"", ""16""]",USD 159.90,https://ecoalf.com/en/kids/2120210-asp-jacket-...,"{""lightmauve"": ""https://ecoalf.com/25396-thick...",ecoalf,The Asp jacket is made from recycled polyester...,2021-10-18,JACKETS &COATS,Kids,No
1,Marangu Coat Kids,"{"" Recycled polyester"": ""100%""}","[""lightmauve"", ""deepnavy"", ""asphalt""]","[""6"", ""8"", ""10"", ""12"", ""14"", ""16""]",USD 185.00,https://ecoalf.com/en/kids/2120303-marangu-coa...,"{""lightmauve"": ""https://ecoalf.com/25461-thick...",ecoalf,The three-quarter MARANGU coat is made from re...,2021-10-18,JACKETS &COATS,Kids,No
2,Gum Jacket Kids,"{"" Recycled polyester"": ""100%""}","[""olive"", ""deepnavy""]","[""6"", ""8"", ""10"", ""12"", ""14"", ""16""]",USD 185.00,https://ecoalf.com/en/kids/2120268-gum-jacket-...,"{""olive"": ""https://ecoalf.com/25420-thickbox_d...",ecoalf,The Gum jacket protects you from the cold. Its...,2021-10-18,JACKETS &COATS,Kids,No
3,Kiko Vest Kids,"{"" Recycled polyester"": ""100%""}","[""lightmauve"", ""olive"", ""deepnavy"", ""shinyyell...","[""6"", ""8"", ""10"", ""12"", ""14"", ""16""]",USD 115.00,https://ecoalf.com/en/kids/2120285-kiko-vest-kids,"{""lightmauve"": ""https://ecoalf.com/25429-thick...",ecoalf,The padded Kiko vest is perfect for extra warm...,2021-10-18,JACKETS &COATS,Kids,No
4,Kim Jacket Kids,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae....","[""khaki"", ""deepnavy""]","[""6"", ""8"", ""10"", ""12"", ""14"", ""16""]",USD 199.90,https://ecoalf.com/en/kids/2120287-kim-jacket-...,"{""khaki"": ""https://ecoalf.com/25449-thickbox_d...",ecoalf,The Kim jacket combines a sporty design with d...,2021-10-18,JACKETS &COATS,Kids,No
5,Barca Sweatshirt Kids,"{"" Recycled cotton"": ""50%"", "" Organic cotton"":...","[""lightpink"", ""caviar""]","[""6"", ""8"", ""10"", ""12"", ""14"", ""16""]",USD 95.00,https://ecoalf.com/en/kids/2120434-barca-sweat...,"{""lightpink"": ""https://ecoalf.com/25486-thickb...",ecoalf,The hooded sporty-style Barca sweatshirt has a...,2021-10-18,sweatshirts,Kids,No
6,Celo Sweatshirt Kids,"{"" Recycled cotton"": ""50%"", "" Organic cotton"":...","[""washedblue"", ""navy"", ""red""]","[""6"", ""8"", ""10"", ""12"", ""14""]",USD 85.00,https://ecoalf.com/en/kids/2120452-celo-sweats...,"{""washedblue"": ""https://ecoalf.com/25510-thick...",ecoalf,"The Celo classic sweatshirt, with the Ecoalf l...",2021-10-18,sweatshirts,Kids,No
7,Bardera Sweatshirt Kids,"{"" Recycled cotton"": ""50%"", "" Organic cotton"":...","[""offwhite"", ""navy""]","[""6"", ""8"", ""10"", ""12"", ""14"", ""16""]",USD 85.00,https://ecoalf.com/en/kids/2120436-bardera-swe...,"{""offwhite"": ""https://ecoalf.com/25500-thickbo...",ecoalf,"The Bardera basic sweatshirt, with the Ecoalf ...",2021-10-18,sweatshirts,Kids,No
8,Estuche Sweatshirt Kids,"{"" Recycled cotton"": ""30%"", "" Organic cotton"":...","[""lightgreymelange""]","[""8"", ""10"", ""12"", ""14"", ""16""]",USD 95.00,https://ecoalf.com/en/kids/2120459-estuche-swe...,"{""lightgreymelange"": ""https://ecoalf.com/25524...",ecoalf,A super-comfortable hooded sweatshirt that the...,2021-10-18,sweatshirts,Kids,No
9,Great B Sweatshirt Kids,"{"" Organic cotton"": ""60%"", "" Recycled cotton"":...","[""navy"", ""red"", ""lightgreymelange""]","[""6"", ""8"", ""10"", ""12"", ""14"", ""16""]",USD 85.00,https://ecoalf.com/en/kids/2120468-great-b-swe...,"{""navy"": ""https://ecoalf.com/25531-thickbox_de...",ecoalf,"The Great B sweatshirt, with text “Because the...",2021-10-18,sweatshirts,Kids,No


In [None]:
# Woman shoes

In [None]:
def get_shoes(url):
    lst = get_product_shoes("shoes",url)
        
    # Create dataframe
    df = pd.DataFrame(lst, columns =['display_name','product_material','color','size','price','product_url','image_link_color','brand_name','description','scrapped_date','low_level','gender','secondhand'])
    return df

In [None]:
def get_product_shoes(category, category_link):
    page = BeautifulSoup(requests.get(category_link).content,'lxml')
    # Find all the div classes associated with a product
    product_class = page.find_all('div',class_="thumbnail-container")
    
    # Create an empty list to store products
    products = []
    
    # Create an empty category dictionary to store product name and link
    product_dic = {}
    for item in product_class:
        try:
            # Retrieve product name
            p = item.find("img").get("alt")
        except:
            p = None
        # Retrieve product link
        l = item.find('a').get('href')
        if "woman" in l or "man" in l:
            # Store the name-link pair in category dictionary
            product_dic[str(p)]=l
    
    # Loop through each product in the dictionary
    for product, link in product_dic.items():
        
        # Store brand_name
        brand_name = "ecoalf"
        # Store scrapped_date
        scrapped_date = datetime.today().strftime('%Y-%m-%d')
        # Store gender
        gender = "Woman"
        # Store secondhand
        secondhand = "No"
        
        # Run get_details function on each product page to retrieve details on each individual product
        product_material, color, image_link_color, size, price, description = get_details(link)
        
        # Append the new product to the products list
        products.append([product,product_material,color,size,price,link,image_link_color,brand_name,description,scrapped_date,category,gender,secondhand])
    
    return products
   

In [None]:
woman_shoes = get_shoes("https://ecoalf.com/en/sneakers-180")

In [None]:
woman_shoes

Unnamed: 0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,secondhand
0,Prince Sneakers Woman,"{"" Recycled nylon"": ""100%""}","[""offwhite"", ""fucsia"", ""burnedorange"", ""grape""...","[""36""]",USD 115.00,https://ecoalf.com/en/women/2120633-eliot-grap...,"{""offwhite"": ""https://ecoalf.com/23508-thickbo...",ecoalf,The comfort and durability of recycled nylon m...,2021-10-18,shoes,Woman,No
1,Eliot Grape Sneakers Woman,"{"" Recycled polyester /31.7"": ""46.7%"", "" PU"": ...","[""offwhite"", ""black""]","[""36"", ""37"", ""38"", ""39"", ""40"", ""41""]",USD 175.00,https://ecoalf.com/en/women/2120596-oregon-sne...,"{""offwhite"": ""https://ecoalf.com/23445-thickbo...",ecoalf,The Eliot sneakers have been made with fabric ...,2021-10-18,shoes,Woman,No
2,Riera Sneakers Woman,"{"" Recycled nylon"": ""100%""}","[""white"", ""camel"", ""midnightnavy""]","[""36"", ""37"", ""38"", ""39"", ""40"", ""41""]",USD 115.00,https://ecoalf.com/en/sneakers/2120638-mulhace...,"{""white"": ""https://ecoalf.com/23634-thickbox_d...",ecoalf,The Riera sneakers are made mainly from recycl...,2021-10-18,shoes,Woman,No
3,Oregon Sneakers Woman,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae....","[""darksand"", ""silver"", ""khaki"", ""black""]","[""36"", ""37"", ""38"", ""39"", ""40"", ""41""]",USD 115.00,https://ecoalf.com/en/women/2120616-sandford-s...,"{""darksand"": ""https://ecoalf.com/23475-thickbo...",ecoalf,The Oregon sneakers are designed for walking i...,2021-10-18,shoes,Woman,No
4,Sandford Sneakers Woman,"{"" Recycled nylon"": ""100%""}","[""offwhite"", ""molegrey"", ""black""]","[""36"", ""37"", ""38"", ""39"", ""40"", ""41""]",USD 115.00,https://ecoalf.com/en/sneakers/2120651-prince-...,"{""offwhite"": ""https://ecoalf.com/23642-thickbo...",ecoalf,The Sandford sneakers are perfect for a casual...,2021-10-18,shoes,Woman,No
5,Sandford Basic Sneakers Woman,"{"" Recycled nylon"": ""100%""}","[""offwhite""]","[""36"", ""37"", ""38"", ""39"", ""40"", ""41""]",USD 115.00,https://ecoalf.com/en/sneakers/2120642-prince-...,"{""offwhite"": ""https://ecoalf.com/23544-thickbo...",ecoalf,The Sandford sneakers are perfect for a casual...,2021-10-18,shoes,Woman,No
6,Patri Sneakers Woman,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae""...","[""black""]","[""37"", ""38"", ""39"", ""40"", ""41""]",USD 185.00,https://ecoalf.com/en/women/2120620-snow-sneak...,"{""black"": ""https://ecoalf.com/23626-thickbox_d...",ecoalf,The combination of recycled fabrics and other ...,2021-10-18,shoes,Woman,No
7,Tenis Sneakers Woman,"{"" Recycled nylon"": ""100%""}","[""offwhite""]","[""36"", ""37"", ""38"", ""39"", ""40"", ""41""]",USD 115.00,https://ecoalf.com/en/sneakers/2120657-riera-s...,"{""offwhite"": ""https://ecoalf.com/23565-thickbo...",ecoalf,"The Tenis sneakers, in a classic but renewed s...",2021-10-18,shoes,Woman,No
8,Mulhacen Mid Boot Woman,"{"" Recycled polyester"": ""50%"", "" Cotton"": ""50%""}","[""linen"", ""brown"", ""black""]","[""41""]",USD 145.00,https://ecoalf.com/en/women/2120602-jonic-snea...,"{""linen"": ""https://ecoalf.com/23451-thickbox_d...",ecoalf,The Mulhacen boots are made from recycled poly...,2021-10-18,shoes,Woman,No
9,Snow Boots Woman,"{"" Polyester"": ""56%"", "" Recycled polyester"": ""...","[""darksand"", ""dustygreen"", ""black""]","[""36"", ""37"", ""38"", ""39"", ""40"", ""41""]",USD 175.00,https://ecoalf.com/en/sneakers/2120653-riera-s...,"{""darksand"": ""https://ecoalf.com/23645-thickbo...",ecoalf,"The Snow boots are comfortable, warm and susta...",2021-10-18,shoes,Woman,No


In [None]:
# Man shoes

In [None]:
def get_product_shoes(category, category_link):
    page = BeautifulSoup(requests.get(category_link).content,'lxml')
    # Find all the div classes associated with a product
    product_class = page.find_all('div',class_="thumbnail-container")
    
    # Create an empty list to store products
    products = []
    
    # Create an empty category dictionary to store product name and link
    product_dic = {}
    for item in product_class:
        try:
            # Retrieve product name
            p = item.find("img").get("alt")
        except:
            p = None
        # Retrieve product link
        l = item.find('a').get('href')
        if "woman" in l or "man" in l:
            # Store the name-link pair in category dictionary
            product_dic[str(p)]=l
    
    # Loop through each product in the dictionary
    for product, link in product_dic.items():
        
        # Store brand_name
        brand_name = "ecoalf"
        # Store scrapped_date
        scrapped_date = datetime.today().strftime('%Y-%m-%d')
        # Store gender
        gender = "Man"
        # Store secondhand
        secondhand = "No"
        
        # Run get_details function on each product page to retrieve details on each individual product
        product_material, color, image_link_color, size, price, description = get_details(link)
        
        # Append the new product to the products list
        products.append([product,product_material,color,size,price,link,image_link_color,brand_name,description,scrapped_date,category,gender,secondhand])
    
    return products
   

In [None]:
man_shoes = get_shoes("https://ecoalf.com/en/sneakers-270")

In [None]:
man_shoes

Unnamed: 0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,secondhand
0,Prince Sneakers Man,"{"" Recycled nylon"": ""100%""}","[""offwhite"", ""koreangreen"", ""deepnavy"", ""cavia...","[""42""]",USD 115.00,https://ecoalf.com/en/men/2120621-snow-sneaker...,"{""offwhite"": ""https://ecoalf.com/23628-thickbo...",ecoalf,The comfort and durability of the recycled nyl...,2021-10-18,shoes,Man,No
1,Eliot Grape Sneakers Man,"{"" Recycled polyester /31.7"": ""46.7%"", "" PU"": ...","[""offwhite"", ""black""]","[""41"", ""42"", ""43"", ""44"", ""45"", ""46""]",USD 175.00,https://ecoalf.com/en/men/2120593-oregon-sneak...,"{""offwhite"": ""https://ecoalf.com/23608-thickbo...",ecoalf,The Eliot sneakers have been made with fabric ...,2021-10-18,shoes,Man,No
2,Caspian Sneakers Man,"{"" Recycled nylon"": ""100%""}","[""steelblue"", ""caviar"", ""black""]","[""40"", ""41"", ""42"", ""43"", ""44"", ""45"", ""46""]",USD 115.00,https://ecoalf.com/en/men/2120590-oregon-sneak...,"{""steelblue"": ""https://ecoalf.com/23436-thickb...",ecoalf,The comfort of the recycled Nylon fabric and i...,2021-10-18,shoes,Man,No
3,Sandford Basic Sneakers Man,"{"" Recycled nylon"": ""100%""}","[""offwhite"", ""black""]","[""42"", ""43"", ""44"", ""45"", ""46""]",USD 115.00,https://ecoalf.com/en/sneakers/2120641-prince-...,"{""offwhite"": ""https://ecoalf.com/23538-thickbo...",ecoalf,The Sandford sneakers are perfect for a casual...,2021-10-18,shoes,Man,No
4,Yale Sneakers Man,"{"" Recycled nylon"": ""100%""}","[""koreangreen"", ""deepnavy"", ""black""]","[""40"", ""41"", ""42"", ""43"", ""44"", ""45"", ""46""]",USD 115.00,https://ecoalf.com/en/sneakers/2120660-yale-sn...,"{""koreangreen"": ""https://ecoalf.com/23569-thic...",ecoalf,The vintage-style Yale sneakers are the best e...,2021-10-18,shoes,Man,No
5,Oregon Sneakers Man,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae....","[""khaki"", ""midnightnavy"", ""caviar"", ""black"", ""...","[""40"", ""41"", ""42"", ""43"", ""44"", ""45"", ""46""]",USD 115.00,https://ecoalf.com/en/men/2120610-sandford-bas...,"{""khaki"": ""https://ecoalf.com/23460-thickbox_d...",ecoalf,The Oregon sneakers are designed for walking i...,2021-10-18,shoes,Man,No
6,Riera Sneakers Man,"{"" Recycled nylon"": ""100%""}","[""offwhite"", ""deepnavy"", ""black""]","[""40"", ""41"", ""42"", ""43"", ""44"", ""45"", ""46""]",USD 115.00,https://ecoalf.com/en/sneakers/2120636-mulhace...,"{""offwhite"": ""https://ecoalf.com/23526-thickbo...",ecoalf,The Riera sneakers are made mainly from recycl...,2021-10-18,shoes,Man,No
7,Sandford Sneakers Man,"{"" Recycled nylon"": ""100%""}","[""offwhite"", ""deepnavy"", ""red"", ""black""]","[""40"", ""41"", ""42"", ""43"", ""44"", ""45"", ""46""]",USD 115.00,https://ecoalf.com/en/sneakers/2120647-prince-...,"{""offwhite"": ""https://ecoalf.com/23591-thickbo...",ecoalf,The Sandford sneakers are perfect for a casual...,2021-10-18,shoes,Man,No
8,Tenis Sneakers Man,"{"" Recycled nylon"": ""100%""}","[""khaki"", ""black""]","[""40"", ""41"", ""42"", ""43"", ""44"", ""45"", ""46""]",USD 115.00,https://ecoalf.com/en/sneakers/2120656-riera-s...,"{""khaki"": ""https://ecoalf.com/23559-thickbox_d...",ecoalf,The lightweight Tenis sneakers are made from r...,2021-10-18,shoes,Man,No
9,Beaufort Sneakers Man,"{"" Recycled polyester"": ""59%"", "" Sorona\u00ae....","[""black""]","[""40"", ""41"", ""42"", ""43"", ""44"", ""45"", ""46""]",USD 129.90,https://ecoalf.com/en/men/2120586-oregon-mid-b...,"{""black"": ""https://ecoalf.com/23434-thickbox_d...",ecoalf,The Beaufort sneakers are made mainly from rec...,2021-10-18,shoes,Man,No


In [None]:
# Kids shoes

In [None]:
def get_product_shoes(category, category_link):
    page = BeautifulSoup(requests.get(category_link).content,'lxml')
    # Find all the div classes associated with a product
    product_class = page.find_all('div',class_="thumbnail-container")
    
    # Create an empty list to store products
    products = []
    
    # Create an empty category dictionary to store product name and link
    product_dic = {}
    for item in product_class:
        try:
            # Retrieve product name
            p = item.find("img").get("alt")
        except:
            p = None
        # Retrieve product link
        l = item.find('a').get('href')
        if "man" in l or "woman" in l:
            # Store the name-link pair in category dictionary
            product_dic[str(p)]=l
    
    # Loop through each product in the dictionary
    for product, link in product_dic.items():
        
        # Store brand_name
        brand_name = "ecoalf"
        # Store scrapped_date
        scrapped_date = datetime.today().strftime('%Y-%m-%d')
        # Store gender
        gender = "Kids"
        # Store secondhand
        secondhand = "No"
        
        # Run get_details function on each product page to retrieve details on each individual product
        product_material, color, image_link_color, size, price, description = get_details(link)
        
        # Append the new product to the products list
        products.append([product,product_material,color,size,price,link,image_link_color,brand_name,description,scrapped_date,category,gender,secondhand])
    
    return products
   

In [None]:
kids_shoes = get_shoes("https://ecoalf.com/en/sneakers-505")

In [None]:
kids_shoes

Unnamed: 0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,secondhand
0,Sandford Sneakers Kids,"{"" Recycled nylon"": ""100%""}","[""offwhite"", ""pink""]","[""32"", ""33"", ""34"", ""35"", ""36"", ""37""]",USD 99.90,https://ecoalf.com/en/sneakers/2120644-prince-...,"{""offwhite"": ""https://ecoalf.com/23679-thickbo...",ecoalf,The Sandford sports shoes are perfect for a ca...,2021-10-18,shoes,Kids,No
1,Yale Mid Boot Sneakers Kids,"{"" Recycled nylon"": ""100%""}","[""darkkhaki"", ""deepnavy""]","[""32"", ""33"", ""34"", ""35"", ""36"", ""37""]",USD 129.90,https://ecoalf.com/en/sneakers/2120662-yale-mi...,"{""darkkhaki"": ""https://ecoalf.com/23673-thickb...",ecoalf,The vintage-style Yale sneakers are the best e...,2021-10-18,shoes,Kids,No


In [None]:
# Combine three dfs to get a final df

In [None]:
ecoalf_table = pd.concat([df_woman,df_man,df_kids,woman_shoes,man_shoes,kids_shoes],ignore_index = True)

In [None]:
ecoalf_table

Unnamed: 0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,secondhand
0,Lenox Long Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""darkbronze"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 429.90,https://ecoalf.com/en/women/2120291-lenoxalf-l...,"{""armygreen"": ""https://ecoalf.com/26343-thickb...",ecoalf,The Lenox coat is made with recycled polyester...,2021-10-18,Jackets and coats,Woman,No
1,Asp Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""deepnavy"", ""oldmustard"",...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 275.00,https://ecoalf.com/en/women/2120227-asp-jacket...,"{""armygreen"": ""https://ecoalf.com/27548-thickb...",ecoalf,The Asp jacket is made from recycled polyester...,2021-10-18,Jackets and coats,Woman,No
2,Marangu Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""whitesand"", ""deepforest"", ""armygreen"", ""deep...","[""XS"", ""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 375.00,https://ecoalf.com/en/women/2120309-marangu-ja...,"{""whitesand"": ""https://ecoalf.com/26390-thickb...",ecoalf,The three-quarter MARANGU coat is made from re...,2021-10-18,Jackets and coats,Woman,No
3,Amu New Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""darkbronze"", ""asphalt""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 375.00,https://ecoalf.com/en/women/2120202-amu-new-ja...,"{""armygreen"": ""https://ecoalf.com/27536-thickb...",ecoalf,"The Amu jacket is a must-have this winter, com...",2021-10-18,Jackets and coats,Woman,No
4,Gedre Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""darkpurple"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 315.00,https://ecoalf.com/en/women/2120261-gedre-jack...,"{""armygreen"": ""https://ecoalf.com/26301-thickb...",ecoalf,Look after the Planet and yourself while weari...,2021-10-18,Jackets and coats,Woman,No
5,Glacier Jacket Woman,"{"" Recycled polyester (UTO)"": ""60%"", "" Recycle...","[""antartica"", ""armygreen"", ""deepnavy"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 685.00,https://ecoalf.com/en/women/2120266-glacier-ja...,"{""antartica"": ""https://ecoalf.com/26313-thickb...",ecoalf,With the Glacier jacket rainy days will be fun...,2021-10-18,Jackets and coats,Woman,No
6,Mandu Jacket Woman,"{"" Polyester"": ""49%"", "" S.CAF\u00c9\u00ae Recy...","[""offwhite"", ""oldmustard"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 575.00,https://ecoalf.com/en/women/2120300-mandu-jack...,"{""offwhite"": ""https://ecoalf.com/26366-thickbo...",ecoalf,A 3-in-1 jacket with an outer shell that is wa...,2021-10-18,Jackets and coats,Woman,No
7,Marangu Vest Woman,"{""\u00a0Recycled polyester"": ""100%""}","[""whitesand"", ""armygreen"", ""black""]","[""S"", ""M"", ""L""]",USD 289.90,https://ecoalf.com/en/women/2120314-marangu-ve...,"{""whitesand"": ""https://ecoalf.com/24920-thickb...",ecoalf,"Marangu padded vest, made from recycled polyes...",2021-10-18,Jackets and coats,Woman,No
8,Mulhacen Woman Jacket,"{"" Recycled polyester (UTO)"": ""60%"", "" Recycle...","[""armygreen"", ""ash"", ""deepnavy"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 505.00,https://ecoalf.com/en/women/2120321-mulhacen-w...,"{""armygreen"": ""https://ecoalf.com/26443-thickb...",ecoalf,The Mulhacen jacket will allow you to enjoy yo...,2021-10-18,Jackets and coats,Woman,No
9,Noir Reversible Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""darkbronze"", ""rosewood""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 399.90,https://ecoalf.com/en/women/2120325-noir-rever...,"{""armygreen"": ""https://ecoalf.com/25145-thickb...",ecoalf,The Noir jacket is made from recycled polyeste...,2021-10-18,Jackets and coats,Woman,No


In [None]:
# Save df to csv
ecoalf_table.to_csv('ecoalf_table.csv')

In [None]:
# Cleaning and combining data from all brands

In [None]:
pip install psycopg2-binary

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.1-cp37-cp37m-macosx_10_14_x86_64.macosx_10_9_intel.macosx_10_9_x86_64.macosx_10_10_intel.macosx_10_10_x86_64.whl (2.1 MB)
[K     |████████████████████████████████| 2.1 MB 7.9 MB/s eta 0:00:01
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.1
Note: you may need to restart the kernel to use updated packages.


In [None]:
import psycopg2
import pandas as pd
import numpy as np
import re
import datetime as dt

In [None]:
df = pd.read_csv('ecoalf_table.csv')

In [None]:
GranCat = pd.read_csv("LowLevelCatagoryRegex.csv", header=None, skiprows=[0])

def process_cat(df):
    # get all the values in category regex
    subCats = GranCat.iloc[:, 1:]
    Cats = []
    for index, row in subCats.iterrows():
        for i in GranCat.columns[1:]:
            if pd.isnull(row[i]): continue
            else: Cats.append(row[i])
               
    # assign potential category for each clothing row based on title, else other
    clothesCat = []
    for index, row in df.iterrows():
        rowCat = float("NaN")
        for cat in Cats:
            if re.search(cat, row['display_name'].lower()):
                rowCat = cat
                print(rowCat)
                break
        if pd.isnull(rowCat):
            clothesCat.append('other')
        else:
            clothesCat.append(rowCat)

    return clothesCat

# set parent category to each product
def get_cat(x):
    if x == 'other': return x
    else: return [GranCat[0][i] for i in GranCat.index if x in GranCat.iloc[i].to_list()][0]

# get low_level column given a df
def get_lowlevel(df):
    df['low_level_new'] = process_cat(df)
    df['low_level_new'] = df['low_level_new'].apply(lambda x: get_cat(x))
    return df

In [None]:
def get_id(df, date='20211025'):
    brand = df['brand_name'][0]
    clothing_ids = []
    for i in range(len(df.index)):
        clothing_id = date + brand + str(i)
        clothing_ids.append(clothing_id)
    return clothing_ids

In [None]:
df = pd.read_csv('ecoalf_table.csv')

In [None]:
df = get_lowlevel(df)

jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
vest
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
vest$
tops?
dresse?s?$
cardigan
jackets?
sweatshirts?
hoodies?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
shirts?
shirts?
shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
pants?
pants?
pants?
pants?
sk[io]rts?
pants?
pants?
dresse?s?$
dresse?s?
dresse?s?
dresse?s?$
dresse?s?
dresse?s?
sk[io]rts?
sk[io]rts?
sk[io]rts?
sk[io]rts?
dresse?s?
[jl]eggings?
vest$
cardigan
jackets?
sk[io]rts?
sk[io]rts?
jackets?
sk[io]rts?
jackets?
pants?
sneakers?
shirts?
jackets?
dresse?s?
sk[io]rts?
shirts?
jackets?
pants?
bags?
sweatshirts?
t-?shirts?
t-?shirts?
t-?shirts?
jackets?
jackets?
jackets?
jackets?
cardi
jackets?
jackets?
coats?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
vest
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?
jackets?


In [None]:
df[['display_name','low_level_new']]

Unnamed: 0,display_name,low_level_new
0,Lenox Long Woman,other
1,Asp Jacket Woman,Jackets
2,Marangu Jacket Woman,Jackets
3,Amu New Jacket Woman,Jackets
4,Gedre Jacket Woman,Jackets
...,...,...
257,Yale Mid Boot Sneakers Man,Sneakers
258,Jonic Slip On Man,other
259,Jonic Sneakers Man,Sneakers
260,Sandford Sneakers Kids,Sneakers


In [None]:
df[df['low_level_new']=='other'][['display_name','low_level_new']]

Unnamed: 0,display_name,low_level_new
0,Lenox Long Woman,other
23,Pino Jersey Woman,other
24,Pino Round Woman Jersey,other
26,Val Sleeve Woman,other
27,Vielha Turtle Seamless Woman,other
28,Vielha Round Seamless Woman,other
30,Cea Oversize Woman,other
31,Inma Jersey Woman,other
34,Llanes Because Woman...,other
36,Aldan Woman Sweat,other


In [None]:
df=df.drop(['low_level'], axis=1)

In [None]:
df = df.rename(columns={"low_level_new": "low_level"})

In [None]:
df['clothing_id'] = get_id(df)

In [None]:
df = df.set_index('clothing_id')

In [None]:
df=df.drop(['Unnamed: 0'], axis=1)

In [None]:
df_ecoalf = df

In [None]:
df_ecoalf.to_csv('ecoalf_table.csv')

In [None]:
# Cleaning low_level for OrganicBasic

In [None]:
df = pd.read_csv('OB+NJ_Combined_clean.csv')

In [None]:
df = df.set_index('clothing_id')

In [None]:
df = df.rename(columns={"materials": "product_material"})

In [None]:
df

Unnamed: 0_level_0,display_name,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,overallscore,secondhand,product_material
clothing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
20211017organicbasics1,TENCEL� Lite Boxers 2-Pack,"[""Black"", ""Navy"", ""Grey Melange"", ""White"", ""Da...","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",$70,https://us.organicbasics.com/products/mens-ten...,{'Navy': 'http://cdn.shopify.com/s/files/1/008...,Orgainc Basics,Classic boxer briefs made with an eco-friendly...,10/18/21,Underwear,Men,,No,"['95% lyocell', '5% elastane']"
20211017organicbasics2,Organic Cotton Boxers 2-pack,"[""Black"", ""Navy"", ""Grey Melange"", ""Cobalt"", ""W...","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",$55,https://us.organicbasics.com/products/mens-org...,{'Black': 'http://cdn.shopify.com/s/files/1/00...,Orgainc Basics,Boxer briefs made with long-form organic cotto...,10/18/21,Underwear,Men,,No,"['95% gots', '5% elastane']"
20211017organicbasics3,Organic Cotton Tee,"[""TerraToned Pine"", ""TerraToned Wood"", ""TerraT...","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",$70,https://us.organicbasics.com/products/mens-org...,{'TerraToned Slate': 'http://cdn.shopify.com/s...,Orgainc Basics,"Our crewneck tee made with mid weight, long-fo...",10/18/21,T-Shirts,Men,,No,['100% gots']
20211017organicbasics4,Organic Cotton Mid-Weight Sweat,"[""Navy"", ""Grey Melange"", ""TerraToned Wood"", ""T...","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",$100,https://us.organicbasics.com/products/mens-org...,{'TerraToned Slate': 'http://cdn.shopify.com/s...,Orgainc Basics,"Mid-weight, crewneck sweatshirt made with pure...",10/18/21,other,Men,,No,['100% gots']
20211017organicbasics5,Organic Cotton Briefs 2-pack,"[""Black"", ""Oak"", ""Light Blue"", ""Navy"", ""Cobalt...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",$50,https://us.organicbasics.com/products/womens-o...,{'Black': 'http://cdn.shopify.com/s/files/1/00...,Orgainc Basics,Our classic organic cotton bikini style briefs...,10/18/21,Underwear,Women,,No,"['95% gots', '5% elastane']"
20211017organicbasics6,Organic Cotton Triangle Bra,"[""Black"", ""Oak"", ""Light Blue"", ""Navy"", ""Cobalt...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",$55,https://us.organicbasics.com/products/womens-o...,{'Rose Nude': 'http://cdn.shopify.com/s/files/...,Orgainc Basics,"Triangle bra made in organic cotton,Adjustable...",10/18/21,Bra,Women,,No,"['95% gots', '5% elastane']"
20211017organicbasics7,Organic Cotton Stretch Tee,"[""Black"", ""Navy"", ""Grey"", ""White""]","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",$39,https://us.organicbasics.com/products/mens-org...,{'Grey': 'http://cdn.shopify.com/s/files/1/008...,Orgainc Basics,"Our basic crewneck tee made with mid weight, l...",10/18/21,T-Shirts,Men,,No,"['95% gots', '5% elastane']"
20211017organicbasics8,Organic Cotton Turtleneck,"[""Black"", ""Navy"", ""Grey"", ""White""]","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",$45,https://us.organicbasics.com/products/mens-org...,{'Navy': 'http://cdn.shopify.com/s/files/1/008...,Orgainc Basics,Our high neck Turtleneck is made with organic ...,10/18/21,Sweaters,Men,,No,"['95% gots', '5% elastane']"
20211017organicbasics9,SilverTech� Active Tee,"[""Black"", ""Sea Blue"", ""Forest Green"", ""Stone G...","[""S-M"", ""M-L"", ""L-XL""]",$90,https://us.organicbasics.com/products/mens-sil...,{'Forest Green': 'http://cdn.shopify.com/s/fil...,Orgainc Basics,"High performance, odorless t-shirt made with r...",10/18/21,T-Shirts,Men,,No,['100% recycled nylon']
20211017organicbasics10,TENCEL� Lite Long Johns,"[""Black"", ""Grey Melange"", ""Navy"", ""White"", ""Da...","[""S"", ""M"", ""L"", ""XL"", ""XXL""]",$75,https://us.organicbasics.com/products/mens-ten...,{'Grey Melange': 'http://cdn.shopify.com/s/fil...,Orgainc Basics,"Our Lite Long Johns are made for lounging, sle...",10/18/21,other,Men,,No,"['95% lyocell', '5% elastane']"


In [None]:
df=df.drop(['materials1'], axis=1)

In [None]:
df['scrapped_date']='10/18/21'

In [None]:
df_ob_nj = df

In [None]:
df_kto = pd.read_csv('KnowtheOrigin_table.csv')

In [None]:
df_kto = df_kto.set_index('clothing_id')

In [None]:
df_kto = df_kto.rename(columns={"image_color_link": "image_link_color"})

In [None]:
df_kto

Unnamed: 0_level_0,display_name,product_url,price,size,description,brand_name,secondhand,scrapped_date,low_level,gender,color,image_link_color,product_material
clothing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
20211021KnowtheOrigin0,Topanga Recycled PET Bra Plum,https://knowtheorigin.com/products/copy-of-top...,$48.99,"[""XXS"", ""XS"", ""S"", ""M"", ""L"", ""XL"", ""XXL"", ""XXX...","The high-neck cross-back sports bra, produced ...",KnowtheOrigin,No,2021-10-21,Bra,Women,Plum,{'Plum': 'https://cdn.shopify.com/s/files/1/23...,"['79% recycled pet', '21% spandex']"
20211021KnowtheOrigin1,Flora Organic Cotton Check Dress Navy,https://knowtheorigin.com/products/flora-check...,$130.99,"[""XS"", ""S"", ""M"", ""L"", ""XL""]",Loose fit navy turtleneck dress in check print...,KnowtheOrigin,No,2021-10-21,Midi Dresses,Women,Check,{'Check': 'https://cdn.shopify.com/s/files/1/2...,['100% organic cotton']
20211021KnowtheOrigin2,Modelo 89' Recycled Vegan Trainer Off White,https://knowtheorigin.com/products/copy-of-mod...,$171.99,"[""36"", ""37"", ""38"", ""39"", ""40"", ""41"", ""42"", ""43...",The Model 89 is the signature style from 80s i...,KnowtheOrigin,No,2021-10-21,Sneakers,Women,White,{'White': 'https://cdn.shopify.com/s/files/1/2...,"['35% rpet', '11% polyester', '54% highsolid']"
20211021KnowtheOrigin3,Modelo 89' Recycled Vegan Trainer Beige & White,https://knowtheorigin.com/products/modelo-89-r...,$171.99,"[""36"", ""37"", ""38"", ""39"", ""40"", ""41"", ""42"", ""43...",The Model 89 is the signature style from 80s i...,KnowtheOrigin,No,2021-10-21,Sneakers,Women,Beige,{'Beige': 'https://cdn.shopify.com/s/files/1/2...,"['35% rpet', '11% polyester', '54% highsolid']"
20211021KnowtheOrigin4,Handy 100% Recycled PET Backpack Rust,https://knowtheorigin.com/products/handy-100-r...,$61.99,[],The stylishly simple Handy Backpack is one of ...,KnowtheOrigin,No,2021-10-21,Backpack,Women,Rust,{'Rust': 'https://cdn.shopify.com/s/files/1/23...,['100% organic cotton']
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20211021KnowtheOrigin250,Space Socks Denim Blue,https://knowtheorigin.com/products/space-socks...,"$6.99,$9.99","[""7-11""]",A sustainable space print sock. Produced in a ...,KnowtheOrigin,No,2021-10-21,Tights/Socks,Men,Sock,{'Sock': 'https://cdn.shopify.com/s/files/1/23...,"['54% viscose', '22% organic cotton', '22% rec..."
20211021KnowtheOrigin251,Organic Cotton Short Sleeved T-Shirt Washed Grey,https://knowtheorigin.com/products/grey-washed...,$38.99,"[""S"", ""M"", ""L"", ""XL""]",Model is wearing Size M and is 182cm.Made in: ...,KnowtheOrigin,No,2021-10-21,T-Shirts,Men,Grey,{'Grey': 'https://cdn.shopify.com/s/files/1/23...,['100% organic cotton']
20211021KnowtheOrigin252,Recycled Denim Tote Bag,https://knowtheorigin.com/products/wawwa-recyc...,$48.99,[],A logo tote in recycled denim. This useful tot...,KnowtheOrigin,No,2021-10-21,Handbag,Men,,{'': 'https://cdn.shopify.com/s/files/1/2364/4...,['100% recycled materials']
20211021KnowtheOrigin253,Hajo Mini Lotus Backpack Nude,https://knowtheorigin.com/products/hajo-mini-n...,$103.99,[],The Vegan Hajo Unisex Backpack is a minimalist...,KnowtheOrigin,No,2021-10-21,Backpack,Men,Nude,{'Nude': 'https://cdn.shopify.com/s/files/1/23...,['100% organic cotton']


In [None]:
df = pd.read_csv('ABCH_fordays.csv')

In [None]:
df

Unnamed: 0.1,Unnamed: 0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,low_level,gender,secondhand
0,0,T-shirtDress,{'Cotton': '100% '},['Black'],"['XXS', 'XS', 'S', 'M', 'L', 'XL', 'XXL', 'C']",USD 90,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,"T-shirt dresses are a tricky thing to master, ...",27:26.6,dresses,women,No
1,1,SleevelessSkivvyDress,{'Cotton': '100% '},['Black'],"['XXS', 'XS', 'S', 'M', 'L', 'XL', 'XXL', 'C']",USD 145,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,The oh-so-elegant A.14 sleeveless skivvy dress...,27:26.9,dresses,women,No
2,2,LongSleeveSkivvyDress,{'Cotton': '100% '},['Black'],"['XXS', 'XS', 'S', 'M', 'L', 'XL', 'XXL', 'C']",USD 150,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,The oh-so-elegant A.14 long sleeve skivvy dres...,27:27.2,dresses,women,No
3,3,ShortLinenDress,{'Linen': '100%'},['Black'],"['XS', 'S', 'M', 'L', 'XL', 'C']",USD 295,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,"A modern summer dress, breezy and light with p...",27:27.5,dresses,women,No
4,4,TencelLoungeDress,{'Linen': '100%'},['Black'],"['XS', 'S', 'M', 'L', 'XL', 'C']",USD 355,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,"<span>Our answer to the silk dress, the A.23 i...",27:27.8,dresses,women,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280,103,LogoOOOJogger,{'Cotton': '100%'},"['LG3501W-BLACK-l', 'LG3501W-SAGE-l', 'WSizeCh...","['\n SAGE\n ', 'XS', 'S', ...",USD 54.00,https://fordays.com/collections/women-sweats/p...,{'LG3501W-BLACK-l': '//cdn.shopify.com/s/files...,fordays,Easy-fit that has shape but still feels uber c...,29:10.6,women-sweats,women,No
281,104,CoolDaysJogger,{'Cotton': '100%'},"['150014W-DARKGREEN-l', '150014W-BLACK-l', '15...","['\n ECRU\n ', '\n ...",USD 54.00,https://fordays.com/collections/women-sweats/p...,{'150014W-DARKGREEN-l': '//cdn.shopify.com/s/f...,fordays,"The real #1 WFH staple? A comfy, camera-ready ...",29:11.1,women-sweats,women,No
282,105,40'sHalfZip,{'Cotton': '100%'},"['S03201W-BLACK-l', 'S03201W-CREAM-l', 'WSizeC...","['\n BLACK\n ', 'XS', 'S',...",USD 52.00,https://fordays.com/collections/women-sweats/p...,{'S03201W-BLACK-l': '//cdn.shopify.com/s/files...,fordays,"Vintage inspired with a wide rib waistband, sl...",29:11.6,women-sweats,women,No
283,106,Re-FreshFleeceJogger,{'Cotton': '100%'},"['350004U_CINNAMON_', '350004U_NAVY_', '350004...","['\n NAVY\n ', '\n ...",USD 66.00,https://fordays.com/collections/women-sweats/p...,{'350004U_CINNAMON_': '//cdn.shopify.com/s/fil...,fordays,Endless possibilities in this classic Jogger. ...,29:12.2,women-sweats,women,No


In [None]:
df = get_lowlevel(df)

dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
dresse?s?$
coats?
jackets?
sweaters?
sweaters?
sweaters?
sk[io]rts?
sk[io]rts?
sk[io]rts?
sk[io]rts?
skivvy
dresse?s?$
dresse?s?$
shorts
sleeveless
skivvy
dresse?s?$
dresse?s?$
sleeveless
shorts
shorts
sleeveless
dresse?s?$
dresse?s?$
skivvy
dresse?s?$
skivvy
shorts
dresse?s?$
sleeveless
skivvy
shorts
dresse?s?$
sleeveless
dresse?s?$
skivvy
shorts
sleeveless
dresse?s?$
dresse?s?$
dresse?s?$
sweaters?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
sweaters?
sweaters?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
sweaters?
sweaters?
t-?shirts?
t-?shirts?
dresse?s?$
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
sweaters?
dresse?s?$
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shir

In [None]:
df[['display_name','low_level_new']]

Unnamed: 0,display_name,low_level_new
0,T-shirtDress,Midi Dresses
1,SleevelessSkivvyDress,Midi Dresses
2,LongSleeveSkivvyDress,Midi Dresses
3,ShortLinenDress,Midi Dresses
4,TencelLoungeDress,Midi Dresses
...,...,...
280,LogoOOOJogger,Joggers
281,CoolDaysJogger,Joggers
282,40'sHalfZip,other
283,Re-FreshFleeceJogger,Joggers


In [None]:
len(df[df['low_level_new']=='other'][['display_name','low_level_new']])

40

In [None]:
df = df.drop(['low_level'], axis=1)

In [None]:
df = df.rename(columns={"low_level_new": "low_level"})

In [None]:
df['clothing_id'] = get_id(df)

In [None]:
df = df.set_index('clothing_id')

In [None]:
df

Unnamed: 0_level_0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,gender,secondhand,low_level
clothing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
20211025A.BCH0,T-shirtDress,{'Cotton': '100% '},['Black'],"['XXS', 'XS', 'S', 'M', 'L', 'XL', 'XXL', 'C']",USD 90,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,"T-shirt dresses are a tricky thing to master, ...",27:26.6,women,No,Midi Dresses
20211025A.BCH1,SleevelessSkivvyDress,{'Cotton': '100% '},['Black'],"['XXS', 'XS', 'S', 'M', 'L', 'XL', 'XXL', 'C']",USD 145,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,The oh-so-elegant A.14 sleeveless skivvy dress...,27:26.9,women,No,Midi Dresses
20211025A.BCH2,LongSleeveSkivvyDress,{'Cotton': '100% '},['Black'],"['XXS', 'XS', 'S', 'M', 'L', 'XL', 'XXL', 'C']",USD 150,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,The oh-so-elegant A.14 long sleeve skivvy dres...,27:27.2,women,No,Midi Dresses
20211025A.BCH3,ShortLinenDress,{'Linen': '100%'},['Black'],"['XS', 'S', 'M', 'L', 'XL', 'C']",USD 295,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,"A modern summer dress, breezy and light with p...",27:27.5,women,No,Midi Dresses
20211025A.BCH4,TencelLoungeDress,{'Linen': '100%'},['Black'],"['XS', 'S', 'M', 'L', 'XL', 'C']",USD 355,https://abch.world/collections/womens/products...,{'Black': '//cdn.shopify.com/s/files/1/1637/85...,A.BCH,"<span>Our answer to the silk dress, the A.23 i...",27:27.8,women,No,Midi Dresses
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20211025A.BCH280,LogoOOOJogger,{'Cotton': '100%'},"['LG3501W-BLACK-l', 'LG3501W-SAGE-l', 'WSizeCh...","['\n SAGE\n ', 'XS', 'S', ...",USD 54.00,https://fordays.com/collections/women-sweats/p...,{'LG3501W-BLACK-l': '//cdn.shopify.com/s/files...,fordays,Easy-fit that has shape but still feels uber c...,29:10.6,women,No,Joggers
20211025A.BCH281,CoolDaysJogger,{'Cotton': '100%'},"['150014W-DARKGREEN-l', '150014W-BLACK-l', '15...","['\n ECRU\n ', '\n ...",USD 54.00,https://fordays.com/collections/women-sweats/p...,{'150014W-DARKGREEN-l': '//cdn.shopify.com/s/f...,fordays,"The real #1 WFH staple? A comfy, camera-ready ...",29:11.1,women,No,Joggers
20211025A.BCH282,40'sHalfZip,{'Cotton': '100%'},"['S03201W-BLACK-l', 'S03201W-CREAM-l', 'WSizeC...","['\n BLACK\n ', 'XS', 'S',...",USD 52.00,https://fordays.com/collections/women-sweats/p...,{'S03201W-BLACK-l': '//cdn.shopify.com/s/files...,fordays,"Vintage inspired with a wide rib waistband, sl...",29:11.6,women,No,other
20211025A.BCH283,Re-FreshFleeceJogger,{'Cotton': '100%'},"['350004U_CINNAMON_', '350004U_NAVY_', '350004...","['\n NAVY\n ', '\n ...",USD 66.00,https://fordays.com/collections/women-sweats/p...,{'350004U_CINNAMON_': '//cdn.shopify.com/s/fil...,fordays,Endless possibilities in this classic Jogger. ...,29:12.2,women,No,Joggers


In [None]:
df=df.drop(['Unnamed: 0'], axis=1)

In [None]:
df_abch_fd = df

In [None]:
df = pd.read_csv('pangaia_table 2.csv')

In [None]:
df = get_lowlevel(df)

sk[io]rts?
sk[io]rts?
sk[io]rts?
tank
tank
tank
hoodies?
hoodies?
hoodies?
hoodies?
pants?
pants?
pants?
pants?
shorts
shorts
shorts
shorts
shorts
shorts
shorts
shorts
shorts
shorts
shorts
shorts
shorts
t-?shirts?
pants?
t-?shirts?
pants?
hoodies?
hoodies?
shorts
hoodies?
hoodies?
sweatshirts?
hoodies?
sweatshirts?
sweatshirts?
shorts
shorts
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
t-?shirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
sweatshirts?
t-?shirts?
t-?shirts?
t-?shirts?
hoodies?
shorts
shorts
shorts
shorts
shorts
shorts
sweatshirts?
shorts
caps?
cuff
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
hats?
hats?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
t-?shirts?
cuff
cuff
beanies?
cuff
beanies?
cuff
cuff
cuff
bombers?
sweatshirts?
sweatshirts?
bombers?
hoodies?
bombers?
pants?
pant

In [None]:
df[df['low_level_new']=='other'][['display_name','low_level_new']]

Unnamed: 0,display_name,low_level_new
0,5 Tree Credits—the seedling,other
1,10 Tree Credits—the woodland,other
2,15 Tree Credits—the forest,other
30,Pangaia E-Gift Card—,other
31,50 Tree Credits—mother earth,other
201,Recycled Cashmere Set—pale grey melange,other
202,Recycled Cashmere Set—orchid purple,other
203,Recycled Cashmere Set—oatmeal,other
204,Recycled Cashmere Set—camel,other
205,Recycled Cashmere Set—burgundy,other


In [None]:
df = df.drop(['low_level'], axis=1)

In [None]:
df = df.rename(columns={"low_level_new": "low_level"})

In [None]:
df['clothing_id'] = get_id(df)

In [None]:
df = df.set_index('clothing_id')

In [None]:
df_pan = df

In [None]:
df = pd.read_csv('eileenfisher_table.csv')

In [None]:
df['clothing_id'] = get_id(df)

In [None]:
df = df.set_index('clothing_id')

In [None]:
df_ef = df

In [None]:
df_pan = df_pan.drop(['clothing_id'], axis=1)

In [None]:
df_pan

Unnamed: 0_level_0,display_name,product_material,color,size,price,product_url,brand_name,description,scrapped_date,gender,secondhand,low_level
clothing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
20211025PANGAIA0,5 Tree Credits—the seedling,,,,5.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,"THE SEEDLING DONATION = 5 TREES PLANTED, PR...",2021-10-22,,No,other
20211025PANGAIA1,10 Tree Credits—the woodland,,,,10.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,\n \n \n THE WOODLAND DONATION = 10 TREES P...,2021-10-22,,No,other
20211025PANGAIA2,15 Tree Credits—the forest,,,,15.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,"THE FOREST DONATION = 15 TREES PLANTED, PRO...",2021-10-22,,No,other
20211025PANGAIA3,Women's Summer Towelling Wrap Skirt—orchid purple,,,,56.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,Consider this your beach best friend. \n The ...,2021-10-22,Women,No,Skirts
20211025PANGAIA4,Women's Summer Towelling Wrap Skirt—flamingo pink,,,,56.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,Consider this your beach best friend. \n The ...,2021-10-22,Women,No,Skirts
20211025PANGAIA5,Women's Summer Towelling Wrap Skirt—black,,,,56.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,Consider this your beach best friend. \n The ...,2021-10-22,Women,No,Skirts
20211025PANGAIA6,Women's Summer Towelling Tank Top—flamingo pink,,,,35.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,Consider this your beach best friend. \n The ...,2021-10-22,Women,No,Tank Tops
20211025PANGAIA7,Women's Summer Towelling Tank Top—cobalt blue,,,,35.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,Consider this your beach best friend. \n The ...,2021-10-22,Women,No,Tank Tops
20211025PANGAIA8,Women's Summer Towelling Tank Top—black,,,,35.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,Consider this your beach best friend. \n The ...,2021-10-22,Women,No,Tank Tops
20211025PANGAIA9,Women's Summer Towelling Slim Fit Hoodie—stone,,,,98.0,https://cdn.shopify.com/s/files/1/0035/1309/01...,PANGAIA,Consider this part of your beach tracksuit. \...,2021-10-22,Women,No,Hoodies


In [None]:
combined_table = pd.concat([df_ecoalf,df_ob_nj,df_kto,df_abch_fd,df_pan,df_ef])

In [None]:
combined_table

Unnamed: 0_level_0,display_name,product_material,color,size,price,product_url,image_link_color,brand_name,description,scrapped_date,gender,secondhand,low_level,overallscore
clothing_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
20211025ecoalf0,Lenox Long Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""darkbronze"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 429.90,https://ecoalf.com/en/women/2120291-lenoxalf-l...,"{""armygreen"": ""https://ecoalf.com/26343-thickb...",ecoalf,The Lenox coat is made with recycled polyester...,2021-10-18,Woman,No,other,
20211025ecoalf1,Asp Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""deepnavy"", ""oldmustard"",...","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 275.00,https://ecoalf.com/en/women/2120227-asp-jacket...,"{""armygreen"": ""https://ecoalf.com/27548-thickb...",ecoalf,The Asp jacket is made from recycled polyester...,2021-10-18,Woman,No,Jackets,
20211025ecoalf2,Marangu Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""whitesand"", ""deepforest"", ""armygreen"", ""deep...","[""XS"", ""S"", ""M"", ""L"", ""XL"", ""XXL""]",USD 375.00,https://ecoalf.com/en/women/2120309-marangu-ja...,"{""whitesand"": ""https://ecoalf.com/26390-thickb...",ecoalf,The three-quarter MARANGU coat is made from re...,2021-10-18,Woman,No,Jackets,
20211025ecoalf3,Amu New Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""ash"", ""darkbronze"", ""asphalt""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 375.00,https://ecoalf.com/en/women/2120202-amu-new-ja...,"{""armygreen"": ""https://ecoalf.com/27536-thickb...",ecoalf,"The Amu jacket is a must-have this winter, com...",2021-10-18,Woman,No,Jackets,
20211025ecoalf4,Gedre Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""darkpurple"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 315.00,https://ecoalf.com/en/women/2120261-gedre-jack...,"{""armygreen"": ""https://ecoalf.com/26301-thickb...",ecoalf,Look after the Planet and yourself while weari...,2021-10-18,Woman,No,Jackets,
20211025ecoalf5,Glacier Jacket Woman,"{"" Recycled polyester (UTO)"": ""60%"", "" Recycle...","[""antartica"", ""armygreen"", ""deepnavy"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 685.00,https://ecoalf.com/en/women/2120266-glacier-ja...,"{""antartica"": ""https://ecoalf.com/26313-thickb...",ecoalf,With the Glacier jacket rainy days will be fun...,2021-10-18,Woman,No,Jackets,
20211025ecoalf6,Mandu Jacket Woman,"{"" Polyester"": ""49%"", "" S.CAF\u00c9\u00ae Recy...","[""offwhite"", ""oldmustard"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 575.00,https://ecoalf.com/en/women/2120300-mandu-jack...,"{""offwhite"": ""https://ecoalf.com/26366-thickbo...",ecoalf,A 3-in-1 jacket with an outer shell that is wa...,2021-10-18,Woman,No,Jackets,
20211025ecoalf7,Marangu Vest Woman,"{""\u00a0Recycled polyester"": ""100%""}","[""whitesand"", ""armygreen"", ""black""]","[""S"", ""M"", ""L""]",USD 289.90,https://ecoalf.com/en/women/2120314-marangu-ve...,"{""whitesand"": ""https://ecoalf.com/24920-thickb...",ecoalf,"Marangu padded vest, made from recycled polyes...",2021-10-18,Woman,No,Filled Outerwear,
20211025ecoalf8,Mulhacen Woman Jacket,"{"" Recycled polyester (UTO)"": ""60%"", "" Recycle...","[""armygreen"", ""ash"", ""deepnavy"", ""black""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 505.00,https://ecoalf.com/en/women/2120321-mulhacen-w...,"{""armygreen"": ""https://ecoalf.com/26443-thickb...",ecoalf,The Mulhacen jacket will allow you to enjoy yo...,2021-10-18,Woman,No,Jackets,
20211025ecoalf9,Noir Reversible Jacket Woman,"{"" Recycled polyester"": ""100%""}","[""armygreen"", ""darkbronze"", ""rosewood""]","[""XS"", ""S"", ""M"", ""L"", ""XL""]",USD 399.90,https://ecoalf.com/en/women/2120325-noir-rever...,"{""armygreen"": ""https://ecoalf.com/25145-thickb...",ecoalf,The Noir jacket is made from recycled polyeste...,2021-10-18,Woman,No,Jackets,


In [None]:
combined_table.to_csv('combined_table_new.csv')

In [None]:
# Other trial and error attemps

In [None]:
combined_table['image_link_color'][1200]

'https://cdn.nudiejeans.com/img/Grim-Tim-Dry-Deep-Selvage-113292-05_400x400.jpg'

In [None]:
dic = {'color':combined_table['image_link_color'][1200]}

In [None]:
dic

{'color': 'https://cdn.nudiejeans.com/img/Grim-Tim-Dry-Deep-Selvage-113292-05_400x400.jpg'}

In [None]:
for item in dic.items():
    print(item)

('color', 'https://cdn.nudiejeans.com/img/Grim-Tim-Dry-Deep-Selvage-113292-05_400x400.jpg')


In [None]:
page = BeautifulSoup(requests.get("https://ecoalf.com/en/women-100").content,'lxml')

In [None]:
category_class = page.find_all('div',class_="elementor-iqit-banner")

In [None]:
href = page.find_all('div',class_="elementor-iqit-banner")[9].find('a').get('href')

In [None]:
"de" in href

True

In [None]:
if "de" in href:
    href = href.replace("de", "en")

In [None]:
href

'https://ecoalf.com/en/vestidos-y-faldas-185'

In [None]:
len(category_class)

20

In [None]:
cat_dict = {}
for item in category_class:
    try:
        c = item.find('div').get_text()
    except:
        t = None
    l = item.find('a').get('href')
    cat_dict[str(c)]=l

In [None]:
cat_dict

{'Jackets and coats': 'https://ecoalf.com/en/abrigos-y-chaquetas-120',
 'knitwear & sweaters': 'https://ecoalf.com/en/punto-y-jerseys-160',
 'sweatshirts': 'https://ecoalf.com/en/sudaderas-516',
 'SHIRTS': 'https://ecoalf.com/en/camisas-140',
 't-shirts': 'https://ecoalf.com/en/camisetas-110',
 'pants': 'https://ecoalf.com/en/pantalones-150',
 'skirts &dresses': 'https://ecoalf.com/en/ecoalf-1-0-new-premium-line-561',
 'YOGA': 'https://ecoalf.com/en/yoga-563',
 'Because THERE IS NO  PLANET B® COLLECTION': 'https://ecoalf.com/en/coleccion-because-503'}

In [None]:
text

['Jackets and coats',
 'knitwear & sweaters',
 'sweatshirts',
 'SHIRTS',
 't-shirts',
 'pants',
 'skirts &dresses',
 'YOGA',
 'Because THERE IS NO  PLANET B® COLLECTION']

In [None]:
page = BeautifulSoup(requests.get("https://ecoalf.com/en/abrigos-y-chaquetas-120").content,'lxml')

In [None]:
product_class = page.find_all('div',class_="thumbnail-container")
len(product_class)

77

In [None]:
product_class[0].find("img").get("alt")

'Lenox Long Woman'

In [None]:
product_dic = {}
for item in product_class:
    try:
        # Retrieve product name
        p = item.find("img").get("alt")
    except:
        p = None
    # Retrieve product link
    l = item.find('a').get('href')
    if "women" in l:
        # Store the name-link pair in category dictionary
        product_dic[str(p)]=l

In [None]:
product_dic

{'Lenox Long Woman': 'https://ecoalf.com/en/women/2120291-lenoxalf-long-detachable-vest-woman',
 'Asp Jacket Woman': 'https://ecoalf.com/en/women/2120227-asp-jacket-woman',
 'Marangu Jacket Woman': 'https://ecoalf.com/en/women/2120309-marangu-jacket-woman',
 'Amu New Jacket Woman': 'https://ecoalf.com/en/women/2120202-amu-new-jacket-woman',
 'Gedre Jacket Woman': 'https://ecoalf.com/en/women/2120261-gedre-jacket-woman',
 'Glacier Jacket Woman': 'https://ecoalf.com/en/women/2120266-glacier-jacket-woman',
 'Mandu Jacket Woman': 'https://ecoalf.com/en/women/2120300-mandu-jacket-woman',
 'Marangu Vest Woman': 'https://ecoalf.com/en/women/2120314-marangu-vest-woman',
 'Mulhacen Woman Jacket': 'https://ecoalf.com/en/women/2120321-mulhacen-woman-jacket',
 'Noir Reversible Jacket Woman': 'https://ecoalf.com/en/women/2120325-noir-reversible-jacket-woman',
 'Uma Jacket Woman': 'https://ecoalf.com/en/women/2120339-uma-jacket-woman',
 'Sarrau Corduroy Jacket Woman': 'https://ecoalf.com/en/women/21

In [None]:
page = BeautifulSoup(requests.get('https://ecoalf.com/en/women/2120290-lenoxalf-long-detachable-vest-woman').content,'lxml')

In [None]:
price = page.find('span', class_="product-price").get_text()

In [None]:
price = price.split('$')[1]
f'USD {price}'

'USD 115.00'

In [None]:
page = BeautifulSoup(requests.get('https://ecoalf.com/de/damen/2120376-riaza-woman-dress').content,'lxml')

In [None]:
price=page.find('span', class_="product-price").get_text()

In [None]:
price.split('$')

['185,00\xa0', '']

In [None]:
color_class = page.find('div', class_="block-content d-flex justify-content-center justify-content-md-start mb-2 mt-3").find_all('a')

In [None]:
# Create an empty image_link_color dictionary
image_link_color = {}
color = []
for item in color_class:
    link = item.get('href')
    image = BeautifulSoup(requests.get(link).content,'lxml').find('div', class_="product-lmage-large swiper-slide").find('a').get('href')
    c = item.get('title').strip()
    color.append(c)
    image_link_color[str(c)]=image

In [None]:
image_link_color

{'armygreen': 'https://ecoalf.com/26343-thickbox_default/lenoxalf-long-detachable-vest-woman.jpg',
 'ash': 'https://ecoalf.com/26349-thickbox_default/lenoxalf-long-detachable-vest-woman.jpg',
 'darkbronze': 'https://ecoalf.com/26358-thickbox_default/lenoxalf-long-detachable-vest-woman.jpg',
 'black': 'https://ecoalf.com/26362-thickbox_default/lenoxalf-long-detachable-vest-woman.jpg'}

In [None]:
json.dumps(image_link_color)

'{"armygreen": "https://ecoalf.com/en/women/2120288-9903-lenoxalf-long-detachable-vest-woman#/7-talla-l", "ash": "https://ecoalf.com/en/women/2120289-9908-lenoxalf-long-detachable-vest-woman#/7-talla-l", "darkbronze": "https://ecoalf.com/en/women/2120290-9913-lenoxalf-long-detachable-vest-woman#/7-talla-l", "black": "https://ecoalf.com/en/women/2120291-9918-lenoxalf-long-detachable-vest-woman#/7-talla-l"}'

In [None]:
page.find('div', class_="product-lmage-large swiper-slide").find('a').get('href')

'https://ecoalf.com/18408-thickbox_default/abrigo-madison-.jpg'

In [None]:
page = BeautifulSoup(requests.get('https://ecoalf.com/en/inicio/1920479-camiseta-mariela-').content,'lxml')

In [None]:
page.find('div',class_="nomcolor text-capitalize mb-4 text-center text-md-left").get_text()

'greymelange'

In [None]:
page.find('div', class_="product-lmage-large swiper-slide").find('a').get('href')

'https://ecoalf.com/9272-thickbox_default/camiseta-mariela-.jpg'

In [None]:
# Create an empty image_link_color dictionary
image_link_color = {}
color = []
for item in color_class:
    link = item.get('href')
    c = item.get('title').strip()
    color.append(c)
    image_link_color[str(c)]=link

In [None]:
image_link_color

{'armygreen': 'https://ecoalf.com/en/women/2120288-9903-lenoxalf-long-detachable-vest-woman#/7-talla-l',
 'ash': 'https://ecoalf.com/en/jackets-and-coats/2120289-9908-lenoxalf-long-detachable-vest-woman#/7-talla-l',
 'darkbronze': 'https://ecoalf.com/en/women/2120290-9913-lenoxalf-long-detachable-vest-woman#/7-talla-l',
 'black': 'https://ecoalf.com/en/women/2120291-9918-lenoxalf-long-detachable-vest-woman#/7-talla-l'}

In [None]:
size_class = page.find_all('li',class_="input-container float-left")
size_class

[<li class="input-container float-left">
 <input class="input-radio" data-product-attribute="1" data-product-size="XS" name="group[1]" type="radio" value="11"/>
 <span class="radio-label">XS</span>
 </li>,
 <li class="input-container float-left">
 <input class="input-radio" data-product-attribute="1" data-product-size="S" name="group[1]" type="radio" value="10"/>
 <span class="radio-label">S</span>
 </li>,
 <li class="input-container float-left">
 <input class="input-radio" data-product-attribute="1" data-product-size="M" name="group[1]" type="radio" value="6"/>
 <span class="radio-label">M</span>
 </li>,
 <li class="input-container float-left">
 <input checked="checked" class="input-radio" data-product-attribute="1" data-product-size="L" name="group[1]" type="radio" value="7"/>
 <span class="radio-label">L</span>
 </li>,
 <li class="input-container float-left">
 <input class="input-radio" data-product-attribute="1" data-product-size="XL" name="group[1]" type="radio" value="8"/>
 <span

In [None]:
size_class[0].find('span').get_text()

'XS'

In [None]:
# Create an empty size list
size = []
for item in size_class:
    s = item.find('span').get_text()
    size.append(s)

In [None]:
size

['XS', 'S', 'M', 'L', 'XL']

In [None]:
json.dumps(size)

'["XS", "S", "M", "L", "XL"]'

In [None]:
description = page.find('div',class_="product-description text-left").find('div',class_="rte-content").get_text().strip()
description

'The Lenox coat is made with recycled polyester, making it a sustainable and very comfortable, warm and ultralight coat. An essential garment for this winter.'

In [None]:
scrapped_date = datetime.today().strftime('%Y-%m-%d')
scrapped_date

'2021-10-06'

In [None]:
details = page.find('div', class_="product-description-short rte-content").find_all('li')
details

[<li>Women's coat</li>,
 <li>Turtle neck</li>,
 <li>Hood</li>,
 <li>Zip closure</li>,
 <li>Main fabric: 100% Recycled polyester </li>,
 <li>Lining: 100% Recycled polyester</li>,
 <li>Fill: 100% Polyester (Fellex®)</li>,
 <li>This item contains bluesign® APPROVED fabrics</li>,
 <li>PFC-free</li>,
 <li>Vegan</li>]

In [None]:
"Main fabric" in detail[4].get_text()

True

In [None]:
detail[4].get_text().strip().split("Main fabric: ")[1].split(" / ")

['100% Recycled polyester']

In [None]:
test = "Main fabric: 40% Recycled polyester / 50% Organic cotton / 50% Recycled wool / 5% Recycled cotton"

In [None]:
materials = test.split("Main fabric: ")[1].split(" / ")
materials

['40% Recycled polyester',
 '50% Organic cotton',
 '50% Recycled wool',
 '5% Recycled cotton']

In [None]:
materials[0].split('%')

['40', ' Recycled polyester']

In [None]:
pct = materials[0].split('%')[0]

In [None]:
f'{pct}%'

'100%%'

In [None]:
# Create an empty product_material dictionary
product_material = {}
for item in details:
    text = item.get_text()
    if "Main fabric" in text:
        materials = text.strip().split("Main fabric: ")[1].split(" / ")
        for item in materials:
            pct = item.split('%')[0]
            pct = f'{pct}%'
            mtr = item.split('%')[1]
            product_material[str(mtr)]=pct

In [None]:
json.dumps(product_material)

'{" Recycled polyester": "100%"}'

In [None]:
page = BeautifulSoup(requests.get('https://ecoalf.com/de/damen/2120192-noja-dress-woman').content,'lxml')

In [None]:
page.find('div',class_="nomcolor text-capitalize mb-4 text-center text-md-left").get_text()

'whitesand'

In [None]:
i = [2]
b = [1,2,3]

In [None]:
a = i.append(b)

In [None]:
print(a)

None


In [None]:
page = BeautifulSoup(requests.get('https://ecoalf.com/en/sneakers-270').content,'lxml')

In [None]:
page.find_all('div',class_="thumbnail-container")[0]

<div class="thumbnail-container">
<a class="thumbnail product-thumbnail" href="https://ecoalf.com/en/men/2120623-tenis-sneakers-man">
<img alt="Prince Sneakers Man" class="img-fluid js-lazy-product-image lazy-product-image product-thumbnail-first" data-full-size-image-url="https://ecoalf.com/23496-thickbox_default/tenis-sneakers-man.jpg" data-src="https://ecoalf.com/23496-home_default/tenis-sneakers-man.jpg" height="1800" src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1200 1800'%3E%3C/svg%3E" width="1200"/>
<img alt="Prince Sneakers Man 2" class="img-fluid js-lazy-product-image lazy-product-image product-thumbnail-second" data-src="https://ecoalf.com/23498-home_default/tenis-sneakers-man.jpg" height="1800" src="data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 1200 1800'%3E%3C/svg%3E" width="1200"/>
</a>
</div>