### Import libraries

In [None]:
import time
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd
import os
from urllib.parse import urlparse, parse_qs

### Read files directories

In [None]:
hiking_dir = 'scraped/hiking/'
cosplay_dir = 'scraped/cosplay/'
os.listdir(hiking_dir)

In [None]:
os.listdir(cosplay_dir)

In [None]:
def csv_to_df(dir, file):
    return pd.read_csv(os.path.join(dir, file))

In [None]:
# Hiking
kotak_p3k_df = csv_to_df(hiking_dir, 'kotak_p3k.csv')
matras_df = csv_to_df(hiking_dir, 'matras.csv')
pisau_lipat_df = csv_to_df(hiking_dir, 'pisau_lipat.csv')
thermal_blanket_df = csv_to_df(hiking_dir, 'thermal_blanket.csv')
headlamp_df = csv_to_df(hiking_dir, 'headlamp.csv')

# Cosplay
cosplay_df = csv_to_df(cosplay_dir, 'cosplay.csv')
kostum_cosplay_df = csv_to_df(cosplay_dir, 'kostum_cosplay.csv')
pakaian_cosplay_df = csv_to_df(cosplay_dir, 'pakaian_cosplay.csv')
topeng_cosplay_df = csv_to_df(cosplay_dir, 'topeng_cosplay.csv')
aksesoris_cosplay_df = csv_to_df(cosplay_dir, 'aksesoris_cosplay.csv')

### Get products seller and description by link

In [None]:
def get_descs_renters(df):
    descs = []
    renters =[]
    urls = df['link']
    driver = webdriver.Chrome()

    # Loop and parse each link
    for link in urls:
        parsed_url = urlparse(str(link))
        query_params = parse_qs(parsed_url.query)
        direct_url = query_params.get('r', [None])[0]

        # Visit parsed link, obtain description, then append to array
        try:    
            driver.get(str(direct_url))
            time.sleep(2)
            
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            time.sleep(1)
            renter = soup.find('h2', class_ = 'css-1wdzqxj-unf-heading e1qvo2ff2')
            desc = soup.select_one('div[data-testid="lblPDPDescriptionProduk"]').get_text(separator=' ')

            descs.append({
                'renters': renter.text,
                'description': desc
            })
            
        except Exception as e:
            continue

    # Quit the driver, then return dataframe
    driver.quit()
    return pd.DataFrame(descs)

In [None]:
# Hiking
headlamp_detail_df = get_descs_renters(headlamp_df)
thermal_blanket_detail_df = get_descs_renters(thermal_blanket_df)
pisau_lipat_detail_df = get_descs_renters(pisau_lipat_df)
matras_detail_df = get_descs_renters(matras_df)
kotak_p3k_detail_df = get_descs_renters(kotak_p3k_df)

# Cosplay
cosplay_detail_df = get_descs_renters(cosplay_df)
kostum_cosplay_detail_df = get_descs_renters(kostum_cosplay_df)
pakaian_cosplay_detail_df = get_descs_renters(pakaian_cosplay_df)
topeng_cosplay_detail_df = get_descs_renters(topeng_cosplay_df)
aksesoris_cosplay_detail_df = get_descs_renters(aksesoris_cosplay_df)


In [None]:
hiking_detail_dfs = {
    'headlamp_detail': headlamp_detail_df,
    'thermal_blanket_detail': thermal_blanket_detail_df,
    'pisau_lipat_detail': pisau_lipat_detail_df,
    'matras_detail': matras_detail_df,
    'kotak_p3k_detail': kotak_p3k_detail_df,
}
cosplay_detail_dfs = {
    'cosplay_detail': cosplay_detail_df,
    'kostum_cosplay_detail': kostum_cosplay_detail_df,
    'pakaian_cosplay_detail': pakaian_cosplay_detail_df,
    'topeng_cosplay_detail': topeng_cosplay_detail_df,
    'aksesoris_cosplay_detail': aksesoris_cosplay_detail_df,
}

In [None]:
hiking_detail_dir = 'scraped_desc/hiking/'
cosplay_detail_dir = 'scraped_desc/cosplay/'
os.makedirs(hiking_detail_dir, exist_ok=True)
os.makedirs(cosplay_detail_dir, exist_ok=True)

### Saved dataset

In [None]:
def save_df_to_csv(df_dict, category_dir):
    for df_name, df in df_dict.items():
        file_name = df_name + '.csv'
        df.to_csv(os.path.join(category_dir, file_name), index=False)

In [None]:
# Save hiking DataFrames
save_df_to_csv(hiking_detail_dfs, hiking_detail_dir)

# Save cosplay DataFrames
save_df_to_csv(cosplay_detail_dfs, cosplay_detail_dir)