In [None]:
# Connect to drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Check folder files
%cd "/content/drive/Shareddrives/C23-PS139-ML"
%ls -al


In [None]:
# Remove any carriage return
!apt-get -qq install -y dos2unix
!dos2unix -q "install_chromium_driver.sh"

# Run script
!bash "install_chromium_driver.sh"


In [None]:
# Required libraries
!pip install -Uq selenium selectolax google-colab pandas supabase python-dotenv


In [None]:
import traceback
import ast
import json
import logging
import os
import random
import re
import secrets
import time
from datetime import datetime
from logging import Logger
from os import environ
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed

import numpy as np
import pandas as pd
from dotenv import load_dotenv
from selectolax.parser import HTMLParser
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.chrome.webdriver import WebDriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from supabase import Client, create_client
from tqdm.auto import tqdm


def get_webdriver() -> WebDriver:
    options = webdriver.ChromeOptions()
    options.add_argument("--verbose")
    options.add_argument("--no-sandbox")
    options.add_argument("--headless")
    options.add_argument("--disable-gpu")
    options.add_argument("--window-size=1920, 1200")  # Fixed size
    options.add_argument("--disable-dev-shm-usage")
    driver = webdriver.Chrome(options=options)
    return driver


def get_logger(
    name: str,
    log_file: str = "log",
    log_folder: str = "logs",
    level: int = logging.INFO,
) -> Logger:
    """{log_folder}/{log_name}_{current_time}.log"""

    logger = logging.getLogger(name)
    # logger.setLevel(level)

    # Set logging level
    logging.getLogger("urllib3").setLevel(logging.WARNING)
    logging.getLogger("selenium").setLevel(logging.WARNING)

    formatter = logging.Formatter("%(asctime)s:%(levelname)s:%(name)s: %(message)s")

    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    # project_folder = Path(__file__).parent
    # logs_folder = project_folder / log_folder
    logs_folder = Path(log_folder)

    logs_folder.mkdir(parents=True, exist_ok=True)

    log_file = logs_folder / f"{log_file}_{current_time}.log"

    # print to file
    file_handler = logging.FileHandler(log_file)
    file_handler.setLevel(logging.DEBUG)
    file_handler.setFormatter(formatter)
    logger.addHandler(file_handler)

    # log to stram
    stream_handler = logging.StreamHandler()
    stream_handler.setLevel(logging.CRITICAL)
    stream_handler.setFormatter(formatter)
    logger.addHandler(stream_handler)

    # Remove the default stream handler
    logger.propagate = False

    return logger


def wait_for_element(
    driver: WebDriver,
    by: By,
    value: str,
    timeout: int = 2,
    retry: int = 0,
    max_retry: int = 5,
    verbose: bool = False,
    logger=None,
) -> WebElement:
    try:
        element = WebDriverWait(driver, timeout).until(
            EC.presence_of_element_located((by, value))
        )
        if verbose:
            logger.info(f"Found element: {value}")
        else:
            logger.debug(f"Found element: {value}")
        return element
    except TimeoutException as e:
        if retry >= max_retry:
            logger.error(f"Timeout for element: {value}")
            return None
        wait_time = 1**retry + secrets.randbelow(100) / 100
        if verbose:
            logger.debug(
                f"[{retry+1}/{max_retry}] Retrying element: {value}, retry in {wait_time} seconds."
            )
        else:
            logger.debug(
                f"[{retry+1}/{max_retry}] Retrying element: {value}, retry in {wait_time} seconds."
            )
        time.sleep(wait_time)
        return wait_for_element(
            driver, by, value, timeout, retry + 1, max_retry, verbose, logger
        )


def search_place(driver: WebDriver, keyword: str, city: str, logger=None) -> None:
    search_box = wait_for_element(driver, By.CSS_SELECTOR, "input#searchboxinput", logger=logger)
    search_keyword = " ".join(f"{keyword} {city}".split())
    search_box.send_keys(search_keyword + Keys.ENTER)
    logger.info(f"Searching for '{search_keyword}'")


def get_url(driver: WebDriver, url: str, logger=None):
    driver.get(url)
    logger.debug(f"Getting url: {url}")


def scrape_data(
    driver: WebDriver,
    keyword: str,
    city: str,
    url: str,
    max_found_places: int = 50,
    logger=None,
    keyword_category=None,
) -> pd.DataFrame:
    get_url(driver, url, logger=logger)
    search_place(driver, keyword, city, logger=logger)

    big_headline = wait_for_element(
        driver,
        By.CSS_SELECTOR,
        "h1.DUwDvf.fontHeadlineLarge",
        max_retry=2,
        logger=logger,
    )

    # Scroll to the bottom of the page
    logger.info("Scrolling google maps")

    search_keyword = " ".join(f"{keyword} {city}".split())

    with tqdm(total=max_found_places, desc=f"Scrolling into {search_keyword}") as tbar:
        actual_num_of_places = 0  # Track the actual number of places found

        wait_for_element(
            driver,
            By.CSS_SELECTOR,
            ".Nv2PK.THOPZb.CpccDe",
            logger=logger,
            timeout=10,
        )

        while not big_headline:
            tree = HTMLParser(driver.page_source)

            a_tags = driver.find_elements(By.CSS_SELECTOR, "a.hfpxzc")

            # Skip if no more places found
            if not a_tags:
                break

            # Scroll to the last element
            driver.execute_script("arguments[0].scrollIntoView();", a_tags[-1])

            try:
                # Look for last element
                WebDriverWait(driver, 10).until_not(
                    EC.presence_of_element_located((By.CSS_SELECTOR, ".lXJj5c.Hk4XGb"))
                )
            except TimeoutException as e:
                # Ignore
                pass

            tree = HTMLParser(driver.page_source)
            end_of_list = tree.css_first(".m6QErb.tLjsW.eKbjU")

            if (max_found_places and (len(a_tags) >= max_found_places)) or end_of_list:
                logger.debug("Done scrolling.")
                tbar.total = len(a_tags)
                tbar.update(tbar.total - tbar.n)
                tbar.set_description(f"Done scrolling into {search_keyword}")
                break

            # Update the progress bar based on the actual number of places found
            tbar.total = max(len(a_tags), max_found_places)  # Update total value
            tbar.update(len(a_tags) - actual_num_of_places)
            actual_num_of_places = len(a_tags)

        # Update to make the progress bar full
        tbar.total = max(actual_num_of_places, max_found_places)
        tbar.n = actual_num_of_places
        tbar.set_description(f"Done scrolling into {search_keyword}, found {tbar.n} places")

    tree = HTMLParser(driver.page_source)
    if not big_headline:
        a_tags = tree.css("a.hfpxzc")
    else:
        a_tags = [tree.css_first("div.m6QErb.WNBkOb")]


    logger.info(f"Found {len(a_tags)} places.")

    # Scrape to all places found
    num_of_places = max(len(a_tags), max_found_places)


    tbar = tqdm(a_tags[:num_of_places], postfix={
        "keyword": keyword,
        "city": city,
    })
    for idx, a_tag in enumerate(a_tags[:num_of_places]):
        logger.debug(
            f"------------------- {idx + 1}/{num_of_places} -------------------"
        )

        df = pd.DataFrame()

        place_url = None
        if len(a_tags) > 1:
            place_url = a_tag.attributes["href"]
            get_url(driver, place_url, logger)
            wait_for_element(driver, By.CSS_SELECTOR, 'div[role="main"]', logger=logger)

        # ------------------- Overviews -------------------

        place_tree = HTMLParser(driver.page_source)
        name_element = wait_for_element(
            driver, By.CSS_SELECTOR, "h1.DUwDvf.fontHeadlineLarge", logger=logger
        )

        # name = place_tree.css_first("h1").text(strip=True)
        try:
            name = name_element.text.strip()
        except:
            name = "no name"

        logger.debug(f"Name: {name}")
        tbar.set_description(f"Scraping {name}")

        # Wait until photos are loaded
        img_element = wait_for_element(
            driver,
            By.CSS_SELECTOR,
            "button.aoRNLd.kn2E5e.NMjTrf.lvtCsd",
            timeout=3,
            max_retry=10,
            logger=logger,
        )

        img_url = None
        if img_element:
            place_tree = HTMLParser(driver.page_source)
            photos_element = place_tree.css_first(".RZ66Rb.FgCUCc button img")
            img_url = photos_element.attributes.get("src")

            logger.debug(f"Image URL: {img_url}")

        # ---------- Rating and Price ----------
        rating_element = place_tree.css_first(".fontBodyMedium.dmRWX")
        rating = None
        total_review = None
        price = None

        if rating_element and rating_element.text(strip=True) != "":
            spans = rating_element.css("span")
            spans = [
                span.attributes.get("aria-label").strip()
                for span in spans
                if span.attributes.get("aria-label")
            ]
            # spans = ['Bintang 4,7', '2.292 ulasan']
            if "bintang" in spans[0].lower():
                # Bintang 4,5
                rating = spans[0].lower().removeprefix("bintang").strip().replace(",", ".")
            else:
                rating = float(spans[0].lower().removesuffix("stars").strip())

            if "ulasan" in spans[1].lower():
                # 2.867 ulasan
                total_review = spans[1].lower().removesuffix("ulasan").strip().replace(".", "") # gara2 ini
            else:
                if spans[1].endswith('reviews'):
                    total_review = int(spans[1].removesuffix("reviews").strip().replace(",", ""))
                elif spans[1].endswith('review'):
                    total_review = int(spans[1].removesuffix("review").strip().replace(",", ""))

            # ---------- Price ----------
            if len(spans) > 2:
                # Harga: Sedang
                if "price: " in spans[2].lower():
                    price = spans[2].removeprefix("Price: ")
                elif "harga: " in spans[2].lower():
                    price = spans[2].removeprefix("Harga: ")

        logger.debug(f"Rating: {rating}, Review Count: {total_review}, Price: {price}")


        # ---------- Category ----------
        category_element = place_tree.css_first("button.DkEaL")
        category = None
        if category_element:
            category = category_element.text(strip=True)
        logger.debug(f"Category: {category}")

        address_element = place_tree.css_first("div.Io6YTe.fontBodyMedium")
        if address_element:
            address = address_element.text(strip=True)
        else:
            address = ""
        logger.debug(f"Address: {address}")
        logger.debug(f"City: {city}")

        table = tree.css_first("table.eK4R0e.fontBodyMedium")
        opening_hours = {}
        if table:
            rows = table.css("tr")
            for row in rows:
                day = row.css_first("td").text(strip=True)
                hours = row.css_first(".mxowUb")
                hours = hours.attributes["aria-label"]
                hours = hours.replace("\u202f", "")
                opening_hours.update({day: hours})

            logger.debug(f"Opening Hours: {opening_hours}")

        # ------------------- Reviews -------------------

        review_categories = {}
        reviews_button = wait_for_element(
            driver, By.CSS_SELECTOR, "button.hh2c6:nth-child(2)", logger=logger
        )
        if reviews_button:
            reviews_button.click()

            detail_tree = HTMLParser(driver.page_source)
            review_categories_elements = detail_tree.css("button[role=radio].e2moi")
            for review_categories_element in review_categories_elements[1:]:
                review_category_text_element = review_categories_element.css_first(
                    ".uEubGf.fontBodyMedium"
                )
                review_category_count_element = review_categories_element.css_first(
                    ".bC3Nkc.fontBodySmall"
                )
                review_category_text = (
                    review_category_text_element.text(strip=True)
                    .replace(" ", "_")
                    .lower()
                )
                review_category_count = review_category_count_element.text(strip=True)
                review_categories.update({review_category_text: review_category_count})

            logger.debug(f"Review Categories: {review_categories}")

        # ------------------- About -------------------
        about_button = wait_for_element(
            driver, By.CSS_SELECTOR, "button.hh2c6:nth-child(3)", logger=logger
        )
        about_dict = {}
        if about_button:
            about_button.click()

            clicked_button = driver.find_element(By.CSS_SELECTOR, "button.hh2c6.G7m0Af")
            data_tab_index = clicked_button.get_attribute("data-tab-index")
            if data_tab_index == "2":
                logger.debug("Clicked about button.")
            else:
                logger.error(
                    f"Failed to clicked about button, instead clicked {data_tab_index}"
                )

            wait_for_element(
                driver,
                By.CSS_SELECTOR,
                ".m6QErb.DxyBCb.kA9KIf.dS8AEf",
                timeout=10,
                max_retry=3,
                logger=logger,
            )

            wait_for_element(
                driver,
                By.CSS_SELECTOR,
                "div.iP2t7d",
                timeout=10,
                max_retry=3,
                logger=logger,
            )

            tree = HTMLParser(driver.page_source)
            divs = tree.css("div.iP2t7d")

            about_desc = tree.css_first("span.HlvSq")
            if about_desc:
                about_dict["description"] = about_desc.text(strip=True)

            for r_idx, div in enumerate(divs):
                about_category = (
                    div.css_first("h2.iL3Qke")
                    .text(strip=True)
                    .replace(" ", "_")
                    .lower()
                )
                li_elements = div.css("li")
                about_list = []
                about_no_list = []

                for li in li_elements:
                    img_elements = li.css("img")
                    src = img_elements[0].attributes.get("src")
                    about_element = li.css("span")
                    if src[-31:] == "not_interested_gm_grey_18dp.png":
                        about_no_list.append(about_element[0].text(strip=True))
                    else:
                        about_list.append(about_element[0].text(strip=True))
                if about_list:
                    about_dict[about_category] = about_list
                if about_no_list:
                    about_dict[f"No {about_category}"] = about_no_list

            logger.debug(f"About: {about_dict}")

        tbar.set_description(f"Scraped {name}")

        # ------------------- to dataframe -------------------

        if about_dict:
            about_dict = {
                key: ", ".join(val).lower().strip()
                if isinstance(val, list)
                else val.lower().strip()
                for key, val in about_dict.items()
            }


        place_df = pd.DataFrame(
            {
                "name": [name],
                "map_url": [place_url],
                "img_url": [img_url],
                "rating": [rating],
                "total_review": [total_review],
                "pricing": [price],
                "address": [address],
                "city": [city],
                "category": [json.dumps(review_categories)],
                "about": [json.dumps(about_dict)],
                "opening_hours": [opening_hours],
                "keyword_category": [[keyword_category]],
            }
        )

        df = pd.concat([df, place_df], ignore_index=True)

        tbar.set_description(f"Saving {name}")

        # if running on windows
        if os.name == "nt":
            csv_dir = Path("csv")
            csv_dir.mkdir(parents=True, exist_ok=True)
        else:
            csv_dir = Path("/content") / ".tmp" / "csv"
            csv_dir.mkdir(parents=True, exist_ok=True)

        save_to_file(df, keyword=name, csv_folder=csv_dir)
        tbar.set_description(f"Saved {name}")

        tbar.update(1)

    # last update
    # tbar.update(num_of_places - tbar.n)
    tbar.set_description(f"Done scraping {keyword} {city}")


def save_to_file(df: pd.DataFrame, keyword: str, csv_folder: str = "csv", logger=None):
    csv_folder = Path(csv_folder)
    csv_folder.mkdir(parents=True, exist_ok=True)

    current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

    keyword = keyword.strip().replace(" ", "_").lower()
    keyword = re.sub(r"\W+", "", keyword)  # Remove non-alphanumeric characters

    csv_filename = csv_folder / "{}_{}.csv".format(keyword, current_time)

    df.to_csv(csv_filename, index=False)


def upload_to_database(db_url: str, db_key: str, table_name: str, logger=None, keyword=None, city=None):
    supabase: Client = create_client(supabase_url=db_url, supabase_key=db_key)

    # if running on windows
    if os.name == "nt":
        csv_dir = Path("csv")
    else:
        csv_dir = Path("/content") / ".tmp" / "csv"

    if not csv_dir.exists():
        logger.error(f"No folder {csv_dir} is found")
        return

    filenames = list(csv_dir.glob("*.csv"))

    if not filenames:
        logger.error(f"No csv files found in {csv_dir}")
        return

    with tqdm(filenames, desc="Reading csv files") as tbar:
        new_row_counter = 0
        update_row_counter = 0
        num_of_error = 0
        table = supabase.table(table_name)

        for filename in filenames:
            try:
                tbar.set_description(f"Reading {filename.name}")
                df_csv = pd.read_csv(filename)
                df_csv = df_csv.replace({np.NaN: None, np.nan: None})
                records = df_csv.to_dict("records")
            except Exception as e:
                logger.error(e)
                num_of_error += 1
                continue

            for row in records:
                try:
                    tbar.set_description(f"Inserting {filename.name} into database")
                    table.insert(row, upsert=True).execute()
                    new_row_counter += 1
                    tbar.set_description(f"Success inserted {filename.name} into database")

                    try:
                        os.remove(filename)
                        tbar.set_description(f"Removed {filename.name}")
                    except Exception as e:
                        logger.error(e)
                        num_of_error += 1
                        tbar.set_description(f"Failed to remove {filename.name}")
                except Exception as e:
                    error_dict = dict(ast.literal_eval(e.args[0]))

                    # duplicate handling
                    if error_dict.get("code") == "23505":
                        # fetch the existing row
                        matching_row = {
                            "name": row.get("name"),
                        }
                        existing_data, count = (
                            table.select("*").match(matching_row).execute()
                        )
                        if existing_data:
                            existing_row = existing_data[1][0]
                            existing_row.pop("id")

                            # update with old data (from db)
                            # if new value (local) is None
                            if existing_row != row:
                                update_row_counter += 1
                                for key, value in row.items():
                                    existing_value = existing_row.get(key)

                                    if existing_value == value:
                                        continue

                                    if key == 'keyword_category' and existing_value is not None:
                                        existing_value = eval(existing_value)
                                        if isinstance(existing_value, set):
                                            existing_value = list(existing_value)

                                        value = eval(value) if value is not None else []
                                        if isinstance(value, set):
                                            value = list(value)

                                        value.extend(existing_value)
                                        row[key] = list(set(value))
                                    elif key == "keyword_category":
                                        value = eval(value) if value is not None else []
                                        if isinstance(value, set):
                                            value = list(value)
                                        row[key] = value
                                    else:
                                        row[key] = existing_value or value

                                # delete conflicting row
                                try:
                                    tbar.set_description("Deleting row in database")
                                    table.delete().match(matching_row).execute()
                                except Exception as e:
                                    logger.error(e)
                                    num_of_error += 1
                                    tbar.set_description(f"Failed to delete {filename.name}")

                                # re-insert row
                                try:
                                    tbar.set_description("Re-insert with new row from local")
                                    table.insert(row, upsert=True).execute()
                                except Exception as e:
                                    logger.error(e)
                                    num_of_error += 1
                                    tbar.set_description(f"Failed to re-insert {filename.name}")
                            else:
                                tbar.set_description(f"No changes")
                        try:
                            os.remove(filename)
                            tbar.set_description(f"Removed {filename.name}")
                        except Exception as e:
                            logger.error(e)
                            num_of_error += 1
                            tbar.set_description(f"Failed to remove {filename.name}")
                    else:
                        logger.error(e)
                        num_of_error += 1

            tbar.update(1)

            tbar.set_postfix({"new rows": new_row_counter,
                                "updated rows": update_row_counter,
                                "error": num_of_error,
                                })
        tbar.set_postfix({"new rows": new_row_counter,
                            "updated rows": update_row_counter,
                            "error": num_of_error,
                            "keyword": keyword,
                            "city": city,
                            })
        tbar.set_description(f"Done uploading database")

    logger.info(f"Added {new_row_counter} rows")
    logger.info(f"Updated {update_row_counter} rows")


def run_scraper(
    driver: WebDriver,
    keyword: str,
    city: str,
    url: str = "https://www.google.com/maps/?hl=id",
    max_found_places: int = 50,
    csv_folder: str = "csv",
    table_name: str = "gmap_id",
    num: int = 1,
    logger=None,
    keyword_category=None,
):
    start_time = datetime.now()

    load_dotenv("env.env")

    scrape_data(driver, keyword, city, url, max_found_places,
                logger=logger, keyword_category=keyword_category)
    upload_to_database(
        environ.get("DB_URL"), environ.get("DB_KEY"), table_name, logger=logger,
        keyword=keyword, city=city,
    )

    driver.quit()

    elapsed_time = datetime.now() - start_time
    logger.info(f"Scraper-{num} elapsed: {elapsed_time}")


def get_coordinate(driver: WebDriver, url: str, logger=None):
    return



# Example usage:
keywords = [
    # ("Rekomendasi tempat wisata di", "tempat wisata"),
    # ("Rekomendasi tempat makan di", "tempat makan"),
    ("Rekomendasi penginapan di", "penginapan"),
    # ("Rekomendasi kegiatan di", "kegiatan"),
]

cities = [
    "Kabupaten Kepulauan Seribu",
    # "Kota Jakarta Barat",
    # "Kota Jakarta Pusat",
    # "Kota Jakarta Selatan",
    # "Kota Jakarta Timur",
    "Kota Jakarta Utara",
    "Kabupaten Bantul",
    # "Kabupaten Gunung Kidul",
    # "Kabupaten Kulon Progo",
    # "Kabupaten Sleman",
    "Kota Yogyakarta",
]

keyword_city = [
    (keyword, keyword_category, city)
    for city in  cities
    for (keyword, keyword_category) in keywords
]

# keyword_city = [
    # ("Rekomendasi tempat wisata di", "tempat wisata", "Kabupaten Kepulauan Seribu"),
    # ("Rekomendasi tempat wisata di", "tempat wisata", "Kota Jakarta Pusat"),
    # ("Rekomendasi tempat wisata di", "tempat wisata", "Kabupaten Bantul"),
    # ("Rekomendasi tempat wisata di", "tempat wisata", "Kabupaten Gunung Kidul"),
    # ("Rekomendasi tempat wisata di", "tempat wisata", "Kabupaten Sleman"),
    # ("Rekomendasi tempat makan di", "tempat makan", "Kota Yogyakarta"),
    # ("Rekomendasi penginapan di", "penginapan", "Kota Jakarta Pusat"),
    # ("Rekomendasi penginapan di", "penginapan", "Kota Jakarta Utara"),
    # ("Rekomendasi penginapan di", "penginapan", "Kabupaten Sleman"),
    # ("Rekomendasi kegiatan di", "kegiatan", "Kota Jakarta Utara"),
    # ("Rekomendasi kegiatan di", "kegiatan", "Kabupaten Gunung Kidul"),
    # ("Rekomendasi kegiatan di", "kegiatan", "Kabupaten Kulon Progo"),
    # ("Rekomendasi kegiatan di", "kegiatan", "Kabupaten Bantul"),
# ]


def scrape_keyword(keyword, city, max_found_places, num, keyword_category):
    logger = get_logger(f"scraper-{num}", log_folder="logs", level=logging.CRITICAL)
    driver = get_webdriver()
    run_scraper(
        driver=driver,
        keyword=keyword,
        city=city,
        url="https://www.google.com/maps/@-6.156729,106.725600,15z?hl=id&entry=ttu", # lupa koma
        max_found_places=max_found_places,
        num=num,
        logger=logger,
        keyword_category=keyword_category
    )


# with tqdm(keyword_city) as tbar:
#     for idx, (keyword, keyword_category, city) in enumerate(tbar):
#         tbar.set_description(f"{keyword} {city}")
#         scrape_keyword(keyword, city, 100, idx, keyword_category)

# multithreading
max_workers = os.cpu_count()

with tqdm(keyword_city, desc="Processing") as tbar:
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = []

        for idx, (keyword, keyword_category, city) in enumerate(keyword_city):
            future = executor.submit(scrape_keyword, keyword, city, 150, idx, keyword_category)
            futures.append((future, keyword, city))
        num_of_error = 0
        for future, keyword, city in futures:
            try:
                tbar.set_description(f"Processing {keyword} {city}")
                future.result()
            except Exception as e:
                num_of_error += 1
                tbar.set_postfix({"error": num_of_error})
                print(traceback.format_exc())
                pass
            finally:
                tbar.update(1)

Processing:   0%|          | 0/4 [00:00<?, ?it/s]

Scrolling into Rekomendasi penginapan di Kota Jakarta Utara:   0%|          | 0/150 [00:00<?, ?it/s]

Scrolling into Rekomendasi penginapan di Kabupaten Kepulauan Seribu:   0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s, city=Kota Jakarta Utara, keyword=Rekomendasi penginapan di]

  0%|          | 0/18 [00:00<?, ?it/s, city=Kabupaten Kepulauan Seribu, keyword=Rekomendasi penginapan di]

Reading csv files:   0%|          | 0/33 [00:00<?, ?it/s]

Scrolling into Rekomendasi penginapan di Kabupaten Bantul:   0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s, city=Kabupaten Bantul, keyword=Rekomendasi penginapan di]

Reading csv files:   0%|          | 0/4 [00:00<?, ?it/s]

Scrolling into Rekomendasi penginapan di Kota Yogyakarta:   0%|          | 0/150 [00:00<?, ?it/s]

  0%|          | 0/18 [00:00<?, ?it/s, city=Kota Yogyakarta, keyword=Rekomendasi penginapan di]

Reading csv files:   0%|          | 0/30 [00:00<?, ?it/s]

Reading csv files:   0%|          | 0/5 [00:00<?, ?it/s]