In [1]:
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd
import os
from urllib.parse import urlparse, parse_qs

In [2]:
hiking_dir = 'scraped/hiking/'
cosplay_dir = 'scraped/cosplay/'
os.listdir(hiking_dir)

['tongkat_gunung.csv',
 'alat_masak_gunung.csv',
 'tenda_gunung.csv',
 'matras_gunung.csv',
 'alat_mendaki.csv']

In [3]:
os.listdir(cosplay_dir)

['kostum.csv',
 'topeng_cosplay.csv',
 'sepatu_cosplay.csv',
 'aksesoris_anak.csv',
 'kostum_anak.csv']

In [4]:
def csv_to_df(dir, file):
    return pd.read_csv(os.path.join(dir, file))

In [5]:
# Hiking
tongkat_gunung_df = csv_to_df(hiking_dir, 'tongkat_gunung.csv')
alat_masak_gunung_df = csv_to_df(hiking_dir, 'alat_masak_gunung.csv')
tenda_gunung_df = csv_to_df(hiking_dir, 'tenda_gunung.csv')
matras_gunung_df = csv_to_df(hiking_dir, 'matras_gunung.csv')
alat_mendaki_df = csv_to_df(hiking_dir, 'alat_mendaki.csv')

# Cosplay
kostum_df = csv_to_df(cosplay_dir, 'kostum.csv')
topeng_cosplay_df = csv_to_df(cosplay_dir, 'topeng_cosplay.csv')
sepatu_cosplay_df = csv_to_df(cosplay_dir, 'sepatu_cosplay.csv')
aksesoris_anak_df = csv_to_df(cosplay_dir, 'aksesoris_anak.csv')
kostum_anak_df = csv_to_df(cosplay_dir, 'kostum_anak.csv')

In [6]:
# Define a function to obtain items' description
def get_descs_renters(df):
    descs = []
    renters =[]
    urls = df['link']
    driver = webdriver.Chrome()

    # Loop and parse each link
    for link in urls:
        parsed_url = urlparse(str(link))
        query_params = parse_qs(parsed_url.query)
        direct_url = query_params.get('r', [None])[0]

        # Visit parsed link, obtain description, then append to array
        try:    
            driver.get(str(direct_url))
            time.sleep(2)
            
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            time.sleep(1)
            renter = soup.find('h2', class_ = 'css-1wdzqxj-unf-heading e1qvo2ff2')
            desc = soup.select_one('div[data-testid="lblPDPDescriptionProduk"]').get_text(separator=' ')

            descs.append({
                'renters': renter.text,
                'description': desc
            })
            
        except Exception as e:
            descs.append({
                'renters': 'N/A',
                'description': 'N/A'
            })
    # Quit the driver, then return dataframe
    driver.quit()
    return pd.DataFrame(descs)

In [7]:
# Hiking
tongkat_gunung_desc_df = get_descs_renters(tongkat_gunung_df)
alat_masak_gunung_desc_df = get_descs_renters(alat_masak_gunung_df)
tenda_gunung_desc_df = get_descs_renters(tenda_gunung_df)
matras_gunung_desc_df = get_descs_renters(matras_gunung_df)
alat_mendaki_desc_df = get_descs_renters(alat_mendaki_df)

# Cosplay
kostum_desc_df = get_descs_renters(kostum_df)
topeng_cosplay_desc_df = get_descs_renters(topeng_cosplay_df)
aksesoris_anak_desc_df = get_descs_renters(aksesoris_anak_df)
kostum_anak_desc_df = get_descs_renters(kostum_anak_df)
sepatu_cosplay_desc_df = get_descs_renters(sepatu_cosplay_df)

In [13]:
hiking_desc_dfs = {
    'tongkat_gunung_desc': tongkat_gunung_desc_df,
    'alat_masak_gunung_desc': alat_masak_gunung_desc_df,
    'tenda_gunung_desc': tenda_gunung_desc_df,
    'matras_gunung_desc': matras_gunung_desc_df,
    'alat_mendaki_desc': alat_mendaki_desc_df
}

cosplay_desc_dfs = {
    'kostum_desc': kostum_desc_df,
    'topeng_cosplay_desc': topeng_cosplay_desc_df,
    'aksesoris_anak_desc': aksesoris_anak_desc_df,
    'kostum_anak_desc': kostum_anak_desc_df,
    'sepatu_cosplay_desc': sepatu_cosplay_desc_df,
}

In [14]:
hiking_detail_dir = 'scraped_desc/hiking/'
cosplay_detail_dir = 'scraped_desc/cosplay/'
os.makedirs(hiking_detail_dir, exist_ok=True)
os.makedirs(cosplay_detail_dir, exist_ok=True)

In [15]:
def save_df_to_csv(df_dict, category_dir):
    for df_name, df in df_dict.items():
        file_name = df_name + '.csv'
        df.to_csv(os.path.join(category_dir, file_name), index=False)

In [16]:
# Save hiking DataFrames
save_df_to_csv(hiking_desc_dfs, hiking_detail_dir)

# Save cosplay DataFrames
save_df_to_csv(cosplay_desc_dfs, cosplay_detail_dir)