In [None]:
import pandas as pd
import time
import re
import sys
import os
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from webdriver_manager.chrome import ChromeDriverManager

# --- FILE CONFIG ---
input_file = "database_sekolah_banyuwangi.xlsx" 
output_file = "hasil_koordinat_sekolah_final.xlsx"

# --- FUNCTION: GET COORDINATES FROM URL ---
def extract_coords(url):
    match = re.search(r'@([-?\d\.]+),([-?\d\.]+)', url)
    if match:
        return f"'{match.group(1)}", f"'{match.group(2)}"
    return "Cek Manual", "Cek Manual"

# --- SETUP CHROME ---
chrome_options = Options()
# chrome_options.add_argument("--headless") # Buka ini kalau mau running tanpa jendela
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)

# --- LOAD DATA ---
if not os.path.exists(input_file):
    print(f"‚ùå File {input_file} ga ada, Bro!")
    sys.exit()

df = pd.read_excel(input_file)

if "Latitude_Hasil" not in df.columns:
    df["Latitude_Hasil"] = "Belum Diambil"
    df["Longitude_Hasil"] = "Belum Diambil"

print(f"=== Memulai Scraping {len(df)} Data ===")

try:
    for index, row in df.iterrows():
        if df.at[index, "Latitude_Hasil"] != "Belum Diambil":
            continue

        nama_sekolah = str(row["nama_sekolah"]).strip()
        alamat = str(row["alamat"]).strip() if pd.notna(row["alamat"]) else ""
        
        search_query = f"{nama_sekolah} {alamat}"
        url_search = f"https://www.google.com/maps/search/{search_query.replace(' ', '+')}"
        
        print(f"[{index+1}/{len(df)}] Mencari: {nama_sekolah}")
        
        driver.get(url_search)
        
        try:
            WebDriverWait(driver, 10).until(lambda d: "@" in d.current_url and "," in d.current_url.split("@")[1])
            
            time.sleep(3)
            
            lat, lon = extract_coords(driver.current_url)
            df.at[index, "Latitude_Hasil"] = lat
            df.at[index, "Longitude_Hasil"] = lon
            print(f"OK: {lat}, {lon}")
            
        except Exception:
            print("Koordinat tidak ditemukan otomatis (Timeout)")
            df.at[index, "Latitude_Hasil"] = "Cek Manual"
            df.at[index, "Longitude_Hasil"] = "Cek Manual"

        # Auto-save setiap 5 data biar aman
        if (index + 1) % 5 == 0:
            df.to_excel(output_file, index=False)
            print("Progress disimpan...")

except KeyboardInterrupt:
    print("\nStopping... Menyimpan data terakhir.")

finally:
    df.to_excel(output_file, index=False)
    driver.quit()
    print(f"\nSelesai! cek di file: {output_file}")

=== Memulai Scraping 119 Data ===
[1/119] Mencari: SERASI - Sekolah Ramah Inklusi
OK: '-8.2282283, '114.3620055
[2/119] Mencari: TK. Dharma Wanita 7 kepatihan
OK: '-8.2129255, '114.3788351
[3/119] Mencari: KUMON BANYUWANGI
OK: '-8.2158788, '114.3692556
[4/119] Mencari: Kantor Neutron Yogyakarta Cabang Banyuwangi-1
OK: '-8.2142591, '114.3548889
[5/119] Mencari: KB-TK Taman Quran Banyuwangi
OK: '-8.2313125, '114.3525625
   üíæ Progress disimpan...
[6/119] Mencari: Sekolah Kapal Pesiar Blambangan College
OK: '-8.275169, '114.3374638
[7/119] Mencari: The Saba School
OK: '-8.2124946, '114.3707951
[8/119] Mencari: KB AL IRSYAD AL ISLAMIYYAH Banyuwangi
OK: '-8.2017676, '114.3755326
[9/119] Mencari: TK Santa Maria Banyuwangi
OK: '-8.2164652, '114.3605725
[10/119] Mencari: PAUD Inklusif Cerdas Banyuwangi
OK: '-8.2263348, '114.3706854
   üíæ Progress disimpan...
[11/119] Mencari: ISLAMIC CENTER BAITURRAHMAN BANYUWANGI
OK: '-8.2215398, '114.3684359
[12/119] Mencari: TK KHADIJAH 4 Panderejo - Ba