In [1]:
!pip install selenium webdriver-manager beautifulsoup4 pandas numpy




In [40]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

def scrape_google_maps_reviews(url, max_scrolls=40, wait_time=4):
    """Scrape review dari Google Maps secara stabil"""
    options = Options()
    options.add_argument("--headless=new")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-notifications")

    reviews = []  # buffer hasil

    try:
        driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
        driver.get(url)
        time.sleep(4)

        # Coba temukan area scroll review
        try:
            scrollable_div = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.XPATH, "//div[contains(@aria-label, 'review') or contains(@aria-label,'ulasan') or @role='region']"))
            )
        except:
            print("⚠️ Tidak menemukan elemen ulasan. Mungkin halaman tidak menampilkan review publik.")
            driver.quit()
            return pd.DataFrame()

        # Scroll beberapa kali untuk load semua review
        for _ in range(max_scrolls):
            driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
            time.sleep(2)

        # Ambil source HTML
        soup = BeautifulSoup(driver.page_source, "html.parser")

        # Ambil blok review (berbeda-beda class tergantung versi Maps)
        blocks = soup.select("div.jftiEf.fontBodyMedium, div.d4r55")
        print(f"📋 Ditemukan {len(blocks)} blok ulasan mentah.")

        for block in blocks:
            user = block.find("button", class_="WEBjve")
            user = user.get("aria-label") if user else None

            text = block.find("span", class_="wiI7pd") or block.find("div", class_="MyEned")
            text = text.text.strip() if text else None

            rating = block.find("span", class_="kvMYJc")
            rating = rating.get("aria-label") if rating else None

            date = block.find("span", class_="rsqaWe")
            date = date.text if date else None

            if text:
                reviews.append({
                    "username": user,
                    "rating": rating,
                    "ulasan": text,
                    "date": date
                })

        driver.quit()

    except Exception as e:
        print("❌ Terjadi error:", e)

    finally:
        try:
            driver.quit()
        except:
            pass

    # Pastikan selalu mengembalikan DataFrame
    if not reviews:
        print("⚠️ Tidak ada review ditemukan.")
        return pd.DataFrame(columns=["username", "rating", "ulasan", "date"])

    return pd.DataFrame(reviews)


In [41]:
url = "https://www.google.com/maps/place/Perpustakaan+Nasional+Republik+Indonesia/@-6.181317,106.8246484,16z/data=!4m8!3m7!1s0x2e69f442596e0c93:0x4ba58be40979fe36!8m2!3d-6.1811826!4d106.8269204!9m1!1b1!16s%2Fm%2F0ch46lx?entry=ttu&g_ep=EgoyMDI1MTAxNC4wIKXMDSoASAFQAw%3D%3D"
df = scrape_google_maps_reviews(url, max_scrolls=50)

print("✅ Jumlah review diambil:", len(df))
df.head()


📋 Ditemukan 16 blok ulasan mentah.
✅ Jumlah review diambil: 8


Unnamed: 0,username,rating,ulasan,date
0,Foto ratna widya,5 bintang,Lokasinya strategis di pusat kota sehingga tra...,2 bulan lalu
1,Foto Len,5 bintang,Menutup 10bulan perjalanan di Jakarta dengan P...,seminggu lalu
2,Foto Melanie,5 bintang,Salah satu wishlist terbesar buat bisa berkunj...,seminggu lalu
3,Foto Yuliana Putri,5 bintang,Hallo hai semuanya 👋 langsung saja kali ini ak...,3 minggu lalu
4,Foto THANYOUNG,5 bintang,Gedung perpustakaan tertinggi di dunia.. Wow a...,4 minggu lalu


In [23]:
df.to_csv("review_perpusnas.csv", index=False, encoding="utf-8-sig")
print("💾 File disimpan: review_perpusnas.csv")


💾 File disimpan: review_perpusnas.csv


In [35]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import time

def scrape_all_google_maps_reviews(url, wait_time=3, max_waits=15):
    """Scrape SEMUA review Google Maps sampai habis"""
    options = Options()
    # Untuk lihat browser saat scraping, hapus komentar baris di bawah ini
    options.add_argument("--start-maximized")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-notifications")

    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    driver.get(url)
    time.sleep(wait_time)

    # Coba temukan area scroll review
    scrollable_div = None
    for _ in range(max_waits):
        try:
            scrollable_div = driver.find_element(By.XPATH, "//div[contains(@aria-label,'review') or contains(@aria-label,'ulasan')]")
            break
        except:
            time.sleep(1)
    if scrollable_div is None:
        print("⚠️ Tidak menemukan elemen review di halaman. Pastikan URL langsung menuju tempat.")
        driver.quit()
        return pd.DataFrame()

    # Smart scrolling: berhenti jika jumlah review tidak bertambah
    last_height = 0
    same_count = 0
    total_scrolls = 0

    while same_count < 4:  # berhenti jika 3 kali scroll tidak menambah apapun
        driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
        time.sleep(2)

        new_height = driver.execute_script('return arguments[0].scrollHeight', scrollable_div)
        if new_height == last_height:
            same_count += 1
        else:
            same_count = 0
        last_height = new_height
        total_scrolls += 1
        print(f"🌀 Scrolling ke-{total_scrolls}...")

    print("✅ Semua review sudah dimuat sepenuhnya!")

    # Parsing HTML setelah semua review termuat
    soup = BeautifulSoup(driver.page_source, "html.parser")
    blocks = soup.select("div.jftiEf.fontBodyMedium, div.d4r55")
    print(f"📋 Ditemukan total {len(blocks)} blok review mentah")

    reviews = []
    for block in blocks:
        user = block.find("button", class_="WEBjve")
        user = user.get("aria-label") if user else None

        text = block.find("span", class_="wiI7pd") or block.find("div", class_="MyEned")
        text = text.text.strip() if text else None

        rating = block.find("span", class_="kvMYJc")
        rating = rating.get("aria-label") if rating else None

        date = block.find("span", class_="rsqaWe")
        date = date.text if date else None

        reviews.append({
            "username": user,
            "rating": rating,
            "ulasan": text,
            "date": date
        })

    driver.quit()
    return pd.DataFrame(reviews)


In [37]:
url = "https://www.google.com/maps/place/Perpustakaan+Nasional+Republik+Indonesia/@-6.181317,106.8246484,16z/data=!4m8!3m7!1s0x2e69f442596e0c93:0x4ba58be40979fe36!8m2!3d-6.1811826!4d106.8269204!9m1!1b1!16s%2Fm%2F0ch46lx?entry=ttu&g_ep=EgoyMDI1MTAxNC4wIKXMDSoASAFQAw%3D%3D"
df = scrape_all_google_maps_reviews(url)
print("✅ Total review berhasil diambil:", len(df))
df.head()


🌀 Scrolling ke-1...
🌀 Scrolling ke-2...
🌀 Scrolling ke-3...
🌀 Scrolling ke-4...
🌀 Scrolling ke-5...
✅ Semua review sudah dimuat sepenuhnya!
📋 Ditemukan total 16 blok review mentah
✅ Total review berhasil diambil: 16


Unnamed: 0,username,rating,ulasan,date
0,Foto ratna widya,5 bintang,Lokasinya strategis di pusat kota sehingga tra...,2 bulan lalu
1,,,,
2,Foto Len,5 bintang,Menutup 10bulan perjalanan di Jakarta dengan P...,seminggu lalu
3,,,,
4,Foto Melanie,5 bintang,Salah satu wishlist terbesar buat bisa berkunj...,seminggu lalu


In [None]:
df.to_csv("perpusnas_all_reviews.csv", index=False, encoding="utf-8-sig")
print("💾 File tersimpan: perpusnas_all_reviews.csv")

In [47]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
import time
from bs4 import BeautifulSoup
import pandas as pd

def scrape_all_reviews(url, max_scrolls=300):
    # Setup Chrome driver
    options = Options()
    options.add_argument("--start-maximized")
    driver = webdriver.Chrome(service=Service(), options=options)
    
    driver.get(url)
    time.sleep(5)

    # Klik tombol "All reviews" jika ada
    try:
        all_reviews_button = driver.find_element(By.XPATH, "//button[contains(@aria-label, 'Ulasan')]")
        all_reviews_button.click()
        time.sleep(3)
    except:
        print("⚠️ Tidak menemukan tombol 'Semua ulasan' — mungkin sudah di halaman review.")

    # Temukan elemen scrollable
    scrollable_div = driver.find_element(By.CSS_SELECTOR, "div.m6QErb.DxyBCb.kA9KIf.dS8AEf")

    last_height = 0
    scroll_count = 0

    # Auto scroll
    while scroll_count < max_scrolls:
        driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", scrollable_div)
        time.sleep(2)
        new_height = driver.execute_script("return arguments[0].scrollHeight", scrollable_div)

        if new_height == last_height:
            print("✅ Semua komentar sudah dimuat.")
            break
        last_height = new_height
        scroll_count += 1
        print(f"🔽 Scroll ke-{scroll_count} selesai")

    # Parsing semua review
    soup = BeautifulSoup(driver.page_source, "html.parser")
    blocks = soup.select("div.jftiEf.fontBodyMedium, div.d4r55")

    reviews = []
    for block in blocks:
        text = block.get_text(" ", strip=True)
        if text:
            reviews.append(text)

    driver.quit()
    print(f"📋 Ditemukan {len(reviews)} ulasan.")

    return pd.DataFrame(reviews, columns=["Review"])

# URL contoh
url = "https://www.google.com/maps/place/Perpustakaan+Nasional+Republik+Indonesia/@-6.181317,106.8246484,16z/data=!4m8!3m7!1s0x2e69f442596e0c93:0x4ba58be40979fe36!8m2!3d-6.1811826!4d106.8269204!9m1!1b1!16s%2Fm%2F0ch46lx?entry=ttu&g_ep=EgoyMDI1MTAxNC4wIKXMDSoASAFQAw%3D%3D"
df = scrape_all_reviews(url, max_scrolls=300)

print(df.head())
print("Total review:", len(df))


🔽 Scroll ke-1 selesai
🔽 Scroll ke-2 selesai
🔽 Scroll ke-3 selesai
🔽 Scroll ke-4 selesai
🔽 Scroll ke-5 selesai
🔽 Scroll ke-6 selesai
🔽 Scroll ke-7 selesai
🔽 Scroll ke-8 selesai
🔽 Scroll ke-9 selesai
🔽 Scroll ke-10 selesai
✅ Semua komentar sudah dimuat.
📋 Ditemukan 216 ulasan.
                                              Review
0  ratna widya Local Guide · 30 ulasan · 170 foto...
1                                        ratna widya
2  Len Local Guide · 468 ulasan · 3.186 foto   ...
3                                                Len
4  Melanie 4 ulasan · 7 foto       seminggu...
Total review: 216


In [48]:
df.to_csv("perpusnas_all_reviews.csv", index=False, encoding="utf-8-sig")
print("💾 File tersimpan: perpusnas_all_reviews.csv")

💾 File tersimpan: perpusnas_all_reviews.csv


In [None]:
import time
import pandas as pd  # <-- IMPOR PANDAS
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# --- PENGATURAN BROWSER ---
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--lang=id-ID")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)

# Inisialisasi list kosong untuk menampung hasil
comments = []
ratings = []

# Ganti dengan URL Google Maps yang sebenarnya
url = "https://www.google.com/maps/place/Perpustakaan+Nasional+Republik+Indonesia/@-6.1811826,106.8243455,17z/data=!4m16!1m7!3m6!1s0x2e69f442596e0c93:0x4ba58be40979fe36!2sPerpustakaan+Nasional+Republik+Indonesia!8m2!3d-6.1811826!4d106.8269204!16s%2Fm%2F0ch46lx!3m7!1s0x2e69f442596e0c93:0x4ba58be40979fe36!8m2!3d-6.1811826!4d106.8269204!9m1!1b1!16s%2Fm%2F0ch46lx?entry=ttu&g_ep=EgoyMDI1MTAxNC4wIKXMDSoASAFQAw%3D%3D"

try:
    driver.get(url)
    scrollable_div = WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, 'div.m6QErb.DxyBCb.kA9KIf.dS8AEf'))
    )
    print("✅ Elemen ulasan ditemukan")
    
    print("Memuat ulasan...")
    for _ in range(50):
        driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
        time.sleep(2)

    # --- EKSTRAKSI DATA ---
    reviews_elements = driver.find_elements(By.CSS_SELECTOR, 'span.wiI7pd')
    comments = [r.text for r in reviews_elements if r.text.strip() != ""]

    rating_elements = driver.find_elements(By.CSS_SELECTOR, 'span.kvMYJc')
    for r in rating_elements:
        aria_label = r.get_attribute("aria-label")
        if aria_label:
            try:
                score_str = aria_label.split(" ")[2].replace(",", ".")
                score = int(float(score_str))
                ratings.append(score)
            except:
                ratings.append(None)
        else:
            ratings.append(None)

    print("\n--- HASIL SCRAPING ---")
    print("Jumlah komentar:", len(comments))
    print("Contoh 10 Komentar Pertama:")
    print(comments[:10])

    # -----------------------------------------------------------------
    # --- BAGIAN BARU: MENYIMPAN HASIL KE FILE CSV 💾 ---
    # -----------------------------------------------------------------
    print("\nMenyimpan hasil ke file CSV...")
    
    # Pastikan jumlah rating dan komentar sama, potong jika perlu
    min_length = min(len(ratings), len(comments))
    
    # Buat dictionary untuk DataFrame
    data_dict = {
        'rating': ratings[:min_length],
        'komentar': comments[:min_length]
    }
    
    # Buat DataFrame dari dictionary
    df = pd.DataFrame(data_dict)
    
    # Simpan DataFrame ke file CSV
    # index=False agar tidak ada kolom indeks tambahan di file CSV
    df.to_csv('ulasan_maps.csv', index=False, encoding='utf-8-sig')
    
    print("✅ Hasil berhasil disimpan ke file 'ulasan_maps.csv'")
    # -----------------------------------------------------------------

except Exception as e:
    print(f"❌ Terjadi kesalahan: {e}")

finally:
    driver.quit()

✅ Elemen ulasan ditemukan
Memuat ulasan...


In [50]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# --- PENGATURAN BROWSER ---
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument("--lang=id-ID")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(options=options)

# Inisialisasi list kosong untuk menampung hasil
comments = []
ratings = []

# Ganti dengan URL Google Maps yang sebenarnya
url = "https://www.google.com/maps/place/Perpustakaan+Nasional+Republik+Indonesia/@-6.1811826,106.8243455,17z/data=!4m16!1m7!3m6!1s0x2e69f442596e0c93:0x4ba58be40979fe36!2sPerpustakaan+Nasional+Republik+Indonesia!8m2!3d-6.1811826!4d106.8269204!16s%2Fm%2F0ch46lx!3m7!1s0x2e69f442596e0c93:0x4ba58be40979fe36!8m2!3d-6.1811826!4d106.8269204!9m1!1b1!16s%2Fm%2F0ch46lx?entry=ttu&g_ep=EgoyMDI1MTAxNC4wIKXMDSoASAFQAw%3D%3D"

try:
    driver.get(url)
    scrollable_div = WebDriverWait(driver, 15).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, 'div.m6QErb.DxyBCb.kA9KIf.dS8AEf'))
    )
    print("✅ Elemen ulasan ditemukan")
    
    print("Memuat ulasan...")
    for _ in range(50): # Sesuaikan jumlah scroll
        driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
        time.sleep(2)

    # -----------------------------------------------------------------
    # --- BAGIAN BARU: KLIK SEMUA TOMBOL "LAINNYA" 🖱️ ---
    # -----------------------------------------------------------------
    print("\nMencari dan mengklik tombol 'Lainnya' untuk menampilkan teks lengkap...")
    
    # Selector CSS untuk tombol "Lainnya" di Google Maps
    # Perhatian: Selector ini bisa berubah jika Google memperbarui situsnya
    more_buttons_selector = 'button.w8nwRe.kyuRq'
    
    # Cari semua tombol "Lainnya"
    more_buttons = driver.find_elements(By.CSS_SELECTOR, more_buttons_selector)
    
    for button in more_buttons:
        try:
            # Klik tombol untuk memunculkan teks lengkap
            button.click()
            time.sleep(0.5) # Beri jeda singkat agar teks sempat muncul
        except Exception as e:
            # Lewati jika ada error (misal: elemen menjadi stale setelah diklik)
            pass
            
    print(f"✅ Selesai mengklik {len(more_buttons)} tombol 'Lainnya'.")
    # -----------------------------------------------------------------

    # --- EKSTRAKSI DATA ---
    print("\nMengekstrak data ulasan...")
    reviews_elements = driver.find_elements(By.CSS_SELECTOR, 'span.wiI7pd')
    comments = [r.text for r in reviews_elements if r.text.strip() != ""]

    rating_elements = driver.find_elements(By.CSS_SELECTOR, 'span.kvMYJc')
    for r in rating_elements:
        aria_label = r.get_attribute("aria-label")
        if aria_label:
            try:
                score_str = aria_label.split(" ")[2].replace(",", ".")
                score = int(float(score_str))
                ratings.append(score)
            except:
                ratings.append(None)
        else:
            ratings.append(None)

    print("\n--- HASIL SCRAPING ---")
    print("Jumlah komentar:", len(comments))
    print("Contoh 10 Komentar Pertama:")
    print(comments[:10])

    # --- MENYIMPAN HASIL KE FILE CSV ---
    print("\nMenyimpan hasil ke file CSV...")
    min_length = min(len(ratings), len(comments))
    data_dict = {'rating': ratings[:min_length], 'komentar': comments[:min_length]}
    df = pd.DataFrame(data_dict)
    df.to_csv('ulasan_maps.csv', index=False, encoding='utf-8-sig')
    print("✅ Hasil berhasil disimpan ke file 'ulasan_maps.csv'")

except Exception as e:
    print(f"❌ Terjadi kesalahan: {e}")

finally:
    driver.quit()

✅ Elemen ulasan ditemukan
Memuat ulasan...

Mencari dan mengklik tombol 'Lainnya' untuk menampilkan teks lengkap...
✅ Selesai mengklik 306 tombol 'Lainnya'.

Mengekstrak data ulasan...

--- HASIL SCRAPING ---
Jumlah komentar: 508
Contoh 10 Komentar Pertama:
['Lokasinya strategis di pusat kota sehingga transportasi umum yg tersedia cukup banyak.\nWalaupun datang ketika hari Minggu, tetapi tetap banyak pengunjungnya.\nBagi yang pertama kali ke perpusnas seperti saya, di sana pegawainya sangat ramah dan sigap membantu.\nDari lobi utama kita bisa ke arah kanan untuk penitipan barang/tas. Jika ukuran tasnya cukup besar harus dititipkan di loker. Namun, jika tas tidak terlalu besar bisa kita bawa tetapi harus kita masukan tas plastik yang sudah disediakan dari pihak perpusnas.\nAda 24 lantai, dari lt 1 sd lt 4 tersedia eskalator..tetapi jika ingin naik ke lt 5 sd 25 kita hanya bisa menggunakan lift.\nSebenernya sudah tersedia 5 lift, namun saat berkunjung kemarin antri liftnya lumayan lama😂😂

In [2]:
# === STEP 1: Install & Import Library ===
!pip install selenium webdriver-manager beautifulsoup4 pandas

import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
from datetime import datetime

# === STEP 2: Setup Chrome ===
def get_driver():
    options = Options()
    options.add_argument("--headless")  # tanpa tampilan browser
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# === STEP 3: Fungsi Scraping ===
def scrape_google_maps_reviews(url, max_scrolls=20):
    driver = get_driver()
    driver.get(url)
    time.sleep(5)

    # scroll ke bawah agar semua review termuat
    for _ in range(max_scrolls):
        driver.execute_script("window.scrollBy(0, 1000);")
        time.sleep(2)

    # ambil source code setelah scroll
    soup = BeautifulSoup(driver.page_source, "html.parser")
    driver.quit()

    # === ambil data dari setiap review ===
    reviews = []
    review_blocks = soup.select("div.jftiEf.fontBodyMedium, div.d4r55")

    for block in review_blocks:
        try:
            username = block.select_one("div.d4r55").text.strip()
        except:
            username = None

        try:
            rating = block.select_one("span.kvMYJc")["aria-label"].split(" ")[1]
        except:
            rating = None

        try:
            review_text = block.select_one("span.wiI7pd").text.strip()
        except:
            review_text = None

        try:
            relative_date = block.select_one("span.rsqaWe").text.strip()
        except:
            relative_date = None

        review_date = datetime.now().strftime("%Y-%m-%d")

        reviews.append({
            "username": username,
            "rating": rating,
            "review_text": review_text,
            "relative_date": relative_date,
            "review_date": review_date
        })

    return pd.DataFrame(reviews)

# === STEP 4: Jalankan Scraper ===
url = "https://www.google.com/maps/place/Perpustakaan+Nasional+Republik+Indonesia/@-6.1811826,106.8243455,17z/data=!4m16!1m7!3m6!1s0x2e69f442596e0c93:0x4ba58be40979fe36!2sPerpustakaan+Nasional+Republik+Indonesia!8m2!3d-6.1811826!4d106.8269204!16s%2Fm%2F0ch46lx!3m7!1s0x2e69f442596e0c93:0x4ba58be40979fe36!8m2!3d-6.1811826!4d106.8269204!9m1!1b1!16s%2Fm%2F0ch46lx?entry=ttu&g_ep=EgoyMDI1MTAxNC4wIKXMDSoASAFQAw%3D%3D"
df = scrape_google_maps_reviews(url, max_scrolls=30)

# === STEP 5: Simpan ke CSV ===
df.to_csv("googlemaps_reviews.csv", index=False, encoding='utf-8')
print(f"✅ {len(df)} review berhasil diambil dan disimpan!")
df.head()


✅ 16 review berhasil diambil dan disimpan!


Unnamed: 0,username,rating,review_text,relative_date,review_date
0,RIFANDIKA ILHAM AKBAR,bintang,Perpustakaan Nasional Republik Indonesia ini m...,Diedit 4 hari lalu,2025-10-21
1,,,,,2025-10-21
2,ratna widya,bintang,Lokasinya strategis di pusat kota sehingga tra...,2 bulan lalu,2025-10-21
3,,,,,2025-10-21
4,Len,bintang,Menutup 10bulan perjalanan di Jakarta dengan P...,seminggu lalu,2025-10-21


In [19]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

In [25]:
# ==========================================================
# 🔧 STEP 1: INSTALL LIBRARY
# ==========================================================
!pip install selenium webdriver-manager beautifulsoup4 pandas

# ==========================================================
# 🧠 STEP 2: IMPORT MODULE
# ==========================================================


# ==========================================================
# 🚀 STEP 3: SETUP CHROME DRIVER
# ==========================================================
def get_driver(headless=True):
    options = Options()
    if headless:
        options.add_argument("--headless")  # tanpa tampilan browser
    options.add_argument("--lang=id-ID")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-gpu")
    return webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)

# ==========================================================
# 🧾 STEP 4: SCRAPER UTAMA
# ==========================================================
def scrape_google_maps_reviews(url, max_scrolls=50):
    driver = get_driver(headless=True)
    driver.get(url)
    time.sleep(5)

    # Tunggu elemen ulasan muncul
    try:
        scrollable_div = WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, 'div.m6QErb.DxyBCb.kA9KIf.dS8AEf'))
        )
        print("✅ Elemen ulasan ditemukan, mulai scroll...")
    except Exception as e:
        print(f"❌ Tidak dapat menemukan elemen ulasan: {e}")
        driver.quit()
        return pd.DataFrame()

    # SCROLL BERULANG UNTUK MUAT SEMUA KOMENTAR
    for i in range(max_scrolls):
        driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
        time.sleep(2)
    print(f"📜 Selesai scroll sebanyak {max_scrolls} kali.")

    # KLIK SEMUA TOMBOL “LAINNYA”
    print("🖱️ Mencari dan mengklik tombol 'Lainnya'...")
    more_buttons = driver.find_elements(By.CSS_SELECTOR, 'button.w8nwRe.kyuRq')
    for button in more_buttons:
        try:
            driver.execute_script("arguments[0].click();", button)
            time.sleep(0.3)
        except:
            continue
    print(f"✅ {len(more_buttons)} tombol 'Lainnya' diklik.")

    # AMBIL SOURCE HTML
    soup = BeautifulSoup(driver.page_source, "html.parser")
    driver.quit()

    # EKSTRAKSI DATA
    print("🔍 Mengekstrak data ulasan...")
    review_blocks = soup.select("div.jftiEf.fontBodyMedium, div.d4r55")

    reviews = []
    for block in review_blocks:
        try:
            username = block.select_one("div.d4r55").text.strip()
        except:
            username = None
        try:
            rating_text = block.select_one("span.kvMYJc")["aria-label"]
            rating_num = ''.join([c for c in rating_text if c.isdigit() or c == '.'])
            rating = float(rating_num) if rating_num else None
        except:
            rating = None
        try:
            review_text = block.select_one("span.wiI7pd").text.strip()
        except:
            review_text = None
        try:
            relative_date = block.select_one("span.rsqaWe").text.strip()
        except:
            relative_date = None

        review_date = datetime.now().strftime("%Y-%m-%d")

        if review_text:
            reviews.append({
                "username": username,
                "rating": rating,
                "review_text": review_text,
                "relative_date": relative_date,
                "review_date": review_date
            })

    df = pd.DataFrame(reviews)
    print(f"✅ Berhasil mengambil {len(df)} ulasan.")
    return df

# ==========================================================
# 🗺️ STEP 5: JALANKAN SCRAPER
# ==========================================================
url = "https://www.google.com/maps/place/Dinas+Perpustakaan+dan+Kearsipan+Provinsi+Jawa+Timur/@-7.289368,112.7633911,17z/data=!4m8!3m7!1s0x2dd7fa4bdc0d5c25:0x627c4fffa8e42378!8m2!3d-7.289368!4d112.768262!9m1!1b1!16s%2Fg%2F1hc5pmm9r?entry=ttu&g_ep=EgoyMDI1MTAxNC4wIKXMDSoASAFQAw%3D%3D"
df = scrape_google_maps_reviews(url, max_scrolls=80)

# ==========================================================
# 💾 STEP 6: SIMPAN DATA
# ==========================================================
df.to_csv("googlemaps_reviews_perpusjatim_sby.csv", index=False, encoding='utf-8-sig')
print("\n💾 Dataset tersimpan sebagai 'googlemaps_reviews.csv'")
df.head()


✅ Elemen ulasan ditemukan, mulai scroll...
📜 Selesai scroll sebanyak 80 kali.
🖱️ Mencari dan mengklik tombol 'Lainnya'...
✅ 47 tombol 'Lainnya' diklik.
🔍 Mengekstrak data ulasan...
✅ Berhasil mengambil 294 ulasan.

💾 Dataset tersimpan sebagai 'googlemaps_reviews.csv'


Unnamed: 0,username,rating,review_text,relative_date,review_date
0,Febri Gama,5.0,"Salah satu perpus terbaik, lokasi langganan de...",2 bulan lalu,2025-10-21
1,tursina dewi,5.0,Pertama kali ke Dinas Perpustakaan dan Kearsip...,Diedit 2 bulan lalu,2025-10-21
2,yusuf,1.0,"(Sabtu, 6 September 2025). Berdasarkan informa...",sebulan lalu,2025-10-21
3,faiz zulfa,5.0,Meskipun bukan KTP Surabaya bisa jadi anggota....,sebulan lalu,2025-10-21
4,Julaikah,5.0,"Tempatnya luas, cocok untuk mengerjakan tugas ...",6 bulan lalu,2025-10-21


In [4]:
!pip install flask joblib

Collecting flask
  Downloading flask-3.1.2-py3-none-any.whl.metadata (3.2 kB)
Collecting joblib
  Downloading joblib-1.5.2-py3-none-any.whl.metadata (5.6 kB)
Collecting blinker>=1.9.0 (from flask)
  Using cached blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting click>=8.1.3 (from flask)
  Using cached click-8.3.0-py3-none-any.whl.metadata (2.6 kB)
Collecting itsdangerous>=2.2.0 (from flask)
  Using cached itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting werkzeug>=3.1.0 (from flask)
  Using cached werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)
Downloading flask-3.1.2-py3-none-any.whl (103 kB)
Downloading joblib-1.5.2-py3-none-any.whl (308 kB)
Using cached blinker-1.9.0-py3-none-any.whl (8.5 kB)
Using cached click-8.3.0-py3-none-any.whl (107 kB)
Using cached itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Using cached werkzeug-3.1.3-py3-none-any.whl (224 kB)
Installing collected packages: werkzeug, joblib, itsdangerous, click, blinker, flask

   -----------------

In [None]:
!python app.py

In [None]:
!pip install streamlit

In [None]:
import streamlit as st

In [None]:
!streamlit run "C:\Users\dwiky\Documents\googlemaps_projectt\app_streamlit.py" &

In [None]:
joblib.dump(best_model, r"C:\Users\dwiky\Documents\googlemaps_projectt\Models\sentiment_model.pkl")
joblib.dump(tfidf, r"C:\Users\dwiky\Documents\googlemaps_projectt\Models\tfidf_vectorizer.pkl")