In [1]:
import pandas as pd
import time
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager

# --- CONFIG ---
KEYWORD = "Cafe Banyuwangi"
SCROLL_COUNT = 10

chrome_options = Options()
chrome_options.add_argument("--disable-notifications")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
# Optimasi speed - komentar ini kalau mau lihat gambar juga
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
driver.maximize_window()

def smart_sleep(min_sec, max_sec):
    time.sleep(random.uniform(min_sec, max_sec))

def get_address_from_detail_panel():
    """Fungsi untuk ngambil alamat dari panel detail (setelah klik)"""
    alamat = "Tidak ditemukan"
    
    try:
        # Strategi 1: Cari button "Copy address"
        try:
            address_button = WebDriverWait(driver, 3).until(
                EC.presence_of_element_located((By.XPATH, "//button[@data-item-id='address']"))
            )
            alamat = address_button.get_attribute("aria-label")
            if alamat and "Address:" in alamat:
                alamat = alamat.replace("Address:", "").strip()
                return alamat
        except:
            pass
        
        # Strategi 2: Cari div yang ada class address
        try:
            address_div = driver.find_element(By.CSS_SELECTOR, "button[data-item-id='address'] div.fontBodyMedium")
            alamat = address_div.text.strip()
            if alamat and len(alamat) > 10:
                return alamat
        except:
            pass
        
        # Strategi 3: Cari semua div dengan class fontBodyMedium di area info
        try:
            info_divs = driver.find_elements(By.CSS_SELECTOR, "div.fontBodyMedium")
            for div in info_divs:
                text = div.text.strip()
                if len(text) > 15:  # Alamat biasanya panjang
                    if any(keyword in text.lower() for keyword in 
                           ["jl.", "jalan", "kec.", "kab.", "banyuwangi", "no.", "gg.", "gang", "desa"]):
                        alamat = text
                        return alamat
        except:
            pass
        
        # Strategi 4: Ambil dari metadata (paling reliable tapi kadang ga ada)
        try:
            address_meta = driver.find_element(By.XPATH, "//div[@class='Io6YTe fontBodyMedium kR99db ']")
            alamat = address_meta.text.strip()
            if alamat and len(alamat) > 10:
                return alamat
        except:
            pass
            
    except Exception as e:
        print(f"      ‚ö†Ô∏è Error get address: {e}")
    
    return alamat

def get_rating_from_detail_panel():
    """Fungsi untuk ngambil rating dari panel detail"""
    rating = "N/A"
    
    try:
        # Cari rating di aria-label (paling akurat)
        rating_elem = driver.find_element(By.CSS_SELECTOR, "div.F7nice span[aria-hidden='true']")
        rating = rating_elem.text.strip()
        return rating
    except:
        pass
    
    return rating

print(f"üöÄ Memulai Scraping: {KEYWORD}")
print("=" * 60)

try:
    driver.get(f"https://www.google.com/maps/search/{KEYWORD}")
    smart_sleep(5, 7)

    # 1. Proses Scrolling
    print("üìú Scrolling untuk load semua data...")
    for i in range(SCROLL_COUNT):
        try:
            panel = driver.find_element(By.XPATH, '//div[@role="feed"]')
            driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', panel)
            print(f"   [{i+1}/{SCROLL_COUNT}] Scrolling...")
            smart_sleep(2, 3)
            
            try:
                driver.find_element(By.XPATH, '//*[contains(text(), "reached the end") or contains(text(), "Anda telah mencapai")]')
                print("   ‚úì Sudah sampai akhir daftar")
                break
            except:
                pass
                
        except Exception as e:
            print(f"   Error saat scroll: {e}")
            break

    # 2. Ambil semua link bisnis dulu
    print("\nüîç Mencari semua bisnis di list...")
    smart_sleep(2, 3)
    
    # Ambil semua link bisnis dari sidebar
    links = driver.find_elements(By.CSS_SELECTOR, "a[href*='maps/place']")
    print(f"üìã Ditemukan {len(links)} bisnis potensial")
    
    # Simpan URL dulu biar ga kehapus pas DOM berubah
    business_urls = []
    seen_urls = set()
    
    for link in links:
        try:
            url = link.get_attribute("href")
            nama = link.get_attribute("aria-label")
            
            if url and nama and url not in seen_urls:
                business_urls.append({"nama": nama, "url": url})
                seen_urls.add(url)
        except:
            continue
    
    print(f"‚úÖ Berhasil kumpulkan {len(business_urls)} unique bisnis\n")
    print("=" * 60)
    
    # 3. KLIK satu-satu untuk ambil detail lengkap
    all_results = []
    
    for idx, business in enumerate(business_urls):
        try:
            print(f"\n[{idx+1}/{len(business_urls)}] üéØ {business['nama']}")
            
            # Buka URL bisnis
            driver.get(business['url'])
            smart_sleep(3, 5)  # Tunggu page load
            
            # Ambil alamat dari detail panel
            alamat = get_address_from_detail_panel()
            print(f"   üìç Alamat: {alamat}")
            
            # Ambil rating
            rating = get_rating_from_detail_panel()
            print(f"   ‚≠ê Rating: {rating}")
            
            # Simpan data
            all_results.append({
                "nama_bisnis": business['nama'],
                "rating": rating,
                "alamat": alamat,
                "link": business['url']
            })
            print(f"   ‚úÖ Data tersimpan")
            
            # Jeda sebelum ke bisnis berikutnya (biar ga kedetect bot)
            smart_sleep(1, 2)
            
        except Exception as e:
            print(f"   ‚ùå Error: {e}")
            # Tetap simpan walaupun error
            all_results.append({
                "nama_bisnis": business['nama'],
                "rating": "N/A",
                "alamat": "Tidak ditemukan",
                "link": business['url']
            })
            continue

    # 4. Simpan ke Excel
    print("\n" + "=" * 60)
    if len(all_results) > 0:
        df = pd.DataFrame(all_results)
        
        # Bersihkan duplikat
        df.drop_duplicates(subset=['nama_bisnis'], inplace=True)
        
        # Hitung berapa yang dapet alamat
        alamat_found = len(df[df['alamat'] != 'Tidak ditemukan'])
        
        # Sort berdasarkan rating
        df['rating_sort'] = df['rating'].apply(lambda x: 0 if x == 'N/A' else float(x.replace(',', '.')))
        df = df.sort_values('rating_sort', ascending=False)
        df = df.drop('rating_sort', axis=1)
        
        # Simpan
        filename = "database_cafe_bwi_COMPLETE.xlsx"
        df.to_excel(filename, index=False)

        print(f"‚ú® SELESAI!")
        print(f"üìä Total data: {len(df)} bisnis")
        print(f"üìç Alamat ditemukan: {alamat_found}/{len(df)} ({alamat_found/len(df)*100:.1f}%)")
        print(f"üìÇ File disimpan: {filename}")
        print("=" * 60)
        
        # Preview data
        print("\nüìã Preview 5 data pertama:")
        print(df.head().to_string())
        
        # Warning untuk yang ga dapet alamat
        missing_address = df[df['alamat'] == 'Tidak ditemukan']
        if len(missing_address) > 0:
            print(f"\n‚ö†Ô∏è  {len(missing_address)} bisnis tidak memiliki alamat lengkap:")
            for name in missing_address['nama_bisnis'].head(5):
                print(f"   - {name}")
    else:
        print("‚ùå GAGAL! Tidak ada data yang berhasil di-scrape.")

except Exception as e:
    print(f"\n‚ùå ERROR FATAL: {e}")
    import traceback
    traceback.print_exc()

finally:
    print("\nüîö Menutup browser...")
    driver.quit()
    print("‚úÖ Done!")

üöÄ Memulai Scraping: Cafe Banyuwangi
üìú Scrolling untuk load semua data...
   [1/10] Scrolling...
   [2/10] Scrolling...
   [3/10] Scrolling...
   [4/10] Scrolling...
   [5/10] Scrolling...
   [6/10] Scrolling...
   [7/10] Scrolling...
   [8/10] Scrolling...
   [9/10] Scrolling...
   [10/10] Scrolling...

üîç Mencari semua bisnis di list...
üìã Ditemukan 52 bisnis potensial
‚úÖ Berhasil kumpulkan 48 unique bisnis


[1/48] üéØ Atap Langit Coffee
   üìç Alamat: Jl. MH.Tamrin, Pengantigan, Kec. Banyuwangi, Kabupaten Banyuwangi, Jawa Timur 68414
   ‚≠ê Rating: 4,8
   ‚úÖ Data tersimpan

[2/48] üéØ Ujung Jawa Cafe & Eatery
   üìç Alamat: Jl. Mh Thamrin No.40, Singotrunan, Kec. Banyuwangi, Kabupaten Banyuwangi, Jawa Timur 68414
   ‚≠ê Rating: 4,7
   ‚úÖ Data tersimpan

[3/48] üéØ SURYA KOPITIAM
   üìç Alamat: Q9H7+Q7V, Taman Baru, Tukangkayu, Kec. Banyuwangi, Kabupaten Banyuwangi, Jawa Timur 68416
   ‚≠ê Rating: 4,6
   ‚úÖ Data tersimpan

[4/48] üéØ Tomoro Coffee and Pastry Sudi