# 🎯 Clean Google Maps Review Scraper

**Completely redesigned approach focusing on individual reviewers**

This scraper properly extracts:
- 📝 **Reviewer Name**: Individual person who wrote the review
- ⭐ **Star Rating**: Stars given by that specific reviewer
- 💬 **Review Text**: The actual review content
- 🏢 **Business/Apartment Name**: Location being reviewed

In [49]:
# 📚 Import Required Libraries
import pandas as pd
import time
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys

print("📚 All libraries imported successfully!")
print("🎯 Ready to scrape Google Maps reviews with precision!")

📚 All libraries imported successfully!
🎯 Ready to scrape Google Maps reviews with precision!


In [50]:
def setup_driver(headless=False):
    """
    Setup Chrome driver with optimal settings for Google Maps
    """
    try:
        options = Options()
        
        # Anti-detection settings
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        options.add_argument('--disable-blink-features=AutomationControlled')
        options.add_experimental_option("excludeSwitches", ["enable-automation"])
        options.add_experimental_option('useAutomationExtension', False)
        options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36')
        
        if headless:
            options.add_argument('--headless')
        
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service, options=options)
        driver.maximize_window()
        
        # Remove webdriver property
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        
        print("✅ Chrome driver setup complete")
        return driver
        
    except Exception as e:
        print(f"❌ Driver setup failed: {e}")
        return None

print("🔧 Driver setup function ready")

🔧 Driver setup function ready


In [51]:
def get_business_name(driver):
    """
    Extract the business/apartment name from Google Maps
    """
    name_selectors = [
        "h1[data-attrid='title']",
        "h1.DUwDvf",
        "h1.fontHeadlineLarge", 
        "div.lMbq3e h1",
        "h1"
    ]
    
    for selector in name_selectors:
        try:
            element = driver.find_element(By.CSS_SELECTOR, selector)
            name = element.text.strip()
            if name and len(name) > 2:
                print(f"🏢 Found business: {name}")
                return name
        except:
            continue
    
    return "Unknown Business"

def navigate_to_reviews(driver):
    """
    Navigate to the reviews tab in Google Maps
    """
    try:
        wait = WebDriverWait(driver, 15)
        
        # Look for Reviews button/tab
        review_selectors = [
            "button[data-value='Reviews']",
            "[data-tab-index='1']",
            "button[aria-label*='Reviews']",
            "div[role='tab'][aria-label*='Reviews']"
        ]
        
        for selector in review_selectors:
            try:
                reviews_tab = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
                reviews_tab.click()
                print("📝 Clicked on Reviews tab")
                time.sleep(3)
                return True
            except TimeoutException:
                continue
        
        print("ℹ️ Already on reviews or reviews visible")
        return True
        
    except Exception as e:
        print(f"⚠️ Navigation issue: {e}")
        return True  # Continue anyway

def expand_all_reviews(driver):
    """
    Find and click all "More" buttons to expand truncated reviews
    """
    print("🔍 Looking for 'More' buttons to expand reviews...")
    
    # Different selectors for "More" buttons
    more_button_selectors = [
        "button[aria-label*='More']",
        "button[data-expandable-section]",
        "//button[contains(text(), 'More')]",
        "//button[contains(@aria-label, 'more')]",
        ".review-more-link",
        "button.review-more-button"
    ]
    
    expanded_count = 0
    
    for selector in more_button_selectors:
        try:
            if selector.startswith("//"):  # XPath selector
                more_buttons = driver.find_elements(By.XPATH, selector)
            else:  # CSS selector
                more_buttons = driver.find_elements(By.CSS_SELECTOR, selector)
            
            for button in more_buttons:
                try:
                    if button.is_displayed() and button.is_enabled():
                        # Scroll button into view
                        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)
                        time.sleep(1)
                        
                        # Click the button
                        button.click()
                        expanded_count += 1
                        time.sleep(1)  # Wait for expansion
                        
                except Exception as e:
                    continue
                    
        except Exception as e:
            continue
    
    if expanded_count > 0:
        print(f"✅ Expanded {expanded_count} 'More' buttons")
    else:
        print("ℹ️ No 'More' buttons found or they were already expanded")
    
    return expanded_count

print("🏢 Business name, navigation, and review expansion functions ready")

🏢 Business name, navigation, and review expansion functions ready


In [52]:
def extract_reviewer_data(review_element):
    """
    Extract individual reviewer's name, stars, and review text
    IMPROVED: Better targeting of individual review components
    """
    try:
        reviewer_data = {
            'reviewer_name': 'Unknown',
            'stars': 'N/A', 
            'review_text': 'No text available'
        }
        
        # 1. Extract REVIEWER NAME - More specific targeting
        name_selectors = [
            ".d4r55",  # Primary reviewer name class
            "a .d4r55",  # Name within link
            ".TSUbDb .d4r55",
            ".fontHeaderMedium",
            "[aria-label] .d4r55"
        ]
        
        reviewer_name_found = False
        for selector in name_selectors:
            try:
                name_elements = review_element.find_elements(By.CSS_SELECTOR, selector)
                for name_elem in name_elements:
                    name = name_elem.text.strip()
                    if name and len(name) > 1 and len(name) < 100:  # Reasonable name length
                        reviewer_data['reviewer_name'] = name
                        reviewer_name_found = True
                        break
                if reviewer_name_found:
                    break
            except:
                continue
        
        # 2. Extract STAR RATING - Look for star images/aria-labels
        star_selectors = [
            "span[role='img'][aria-label*='star']",
            "div[role='img'][aria-label*='star']", 
            "[aria-label*='stars'][role='img']",
            "[aria-label*='Rated'][role='img']",
            ".kvMYJc[role='img']"
        ]
        
        stars_found = False
        for selector in star_selectors:
            try:
                star_elements = review_element.find_elements(By.CSS_SELECTOR, selector)
                for star_elem in star_elements:
                    aria_label = star_elem.get_attribute('aria-label')
                    if aria_label and ('star' in aria_label.lower() or 'rated' in aria_label.lower()):
                        # Extract rating from labels like "Rated 5 out of 5 stars" or "4 stars"
                        rating_match = re.search(r'(\d+(?:\.\d+)?)', aria_label)
                        if rating_match:
                            rating = rating_match.group(1)
                            if 1 <= float(rating) <= 5:  # Valid star rating
                                reviewer_data['stars'] = rating
                                stars_found = True
                                break
                if stars_found:
                    break
            except:
                continue
        
        # 3. Extract REVIEW TEXT - Look for actual review content
        # First, try to click "More" button to expand full review text
        try:
            more_buttons = review_element.find_elements(By.CSS_SELECTOR, "button[aria-label*='More']")
            if not more_buttons:
                more_buttons = review_element.find_elements(By.XPATH, ".//button[contains(text(), 'More')]")
            if not more_buttons:
                more_buttons = review_element.find_elements(By.CSS_SELECTOR, "[data-expandable-section] button")
            
            for more_button in more_buttons:
                try:
                    if more_button.is_displayed() and more_button.is_enabled():
                        more_button.click()
                        time.sleep(1)  # Wait for text to expand
                        print("   🔍 Clicked 'More' to expand review")
                        break
                except:
                    continue
        except:
            pass  # Continue if no "More" button found
        
        text_selectors = [
            ".wiI7pd",  # Main review text
            ".MyEned .wiI7pd",
            "span.wiI7pd",
            "[data-expandable-section] .wiI7pd",
            "[data-expandable-section] span",
            ".review-text",
            ".fontBodyMedium span"
        ]
        
        review_text_found = False
        for selector in text_selectors:
            try:
                text_elements = review_element.find_elements(By.CSS_SELECTOR, selector)
                for text_elem in text_elements:
                    text = text_elem.text.strip()
                    # More strict filtering for actual review content
                    if (text and len(text) > 20 and len(text) < 3000 and  # Reasonable length (increased for full text)
                        text != reviewer_data['reviewer_name'] and  # Not the name
                        'star' not in text.lower() and  # Not rating text
                        'rated' not in text.lower() and  # Not rating text
                        not text.isdigit() and  # Not just numbers
                        not re.match(r'^[\d\s\.,]+$', text)):  # Not just numbers/dates
                        reviewer_data['review_text'] = text[:1000]  # Increased limit for full reviews
                        review_text_found = True
                        break
                if review_text_found:
                    break
            except:
                continue
        
        # Only return data if we found at least name and either stars or text
        if (reviewer_data['reviewer_name'] != 'Unknown' and 
            (reviewer_data['stars'] != 'N/A' or reviewer_data['review_text'] != 'No text available')):
            return reviewer_data
        else:
            return None
        
    except Exception as e:
        print(f"⚠️ Error extracting reviewer data: {e}")
        return None

print("👤 Individual reviewer extraction function ready")

👤 Individual reviewer extraction function ready


In [53]:
def scrape_reviews(driver, max_reviews):
    """
    Main function to scrape individual reviews from Google Maps
    ENHANCED: Now handles "More" buttons and better scrolling for more reviews
    """
    reviews = []
    seen_reviewers = set()  # Track unique reviewers to avoid duplicates
    
    print(f"🎯 Starting to scrape {max_reviews} UNIQUE individual reviews...")
    
    # Wait for reviews to load
    time.sleep(5)
    
    # Enhanced scrolling to load more reviews
    print("📜 Scrolling to load more reviews...")
    for scroll_attempt in range(10):  # More scroll attempts for larger review counts
        # Scroll down
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        
        # Also try scrolling the reviews container specifically
        try:
            reviews_container = driver.find_element(By.CSS_SELECTOR, ".m6QErb")
            driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight;", reviews_container)
        except:
            pass
        
        # Check if we have enough review containers
        containers = driver.find_elements(By.CSS_SELECTOR, "div[data-review-id]")
        print(f"   📦 Scroll {scroll_attempt + 1}: Found {len(containers)} review containers")
        
        if len(containers) >= max_reviews + 5:  # Get a few extra to account for filtering
            break
        
        time.sleep(2)
    
    # Find review containers - try most specific first
    container_selectors = [
        "div[data-review-id]",  # Most reliable - actual review containers
        "div.jftiEf",  # Individual review blocks
        "div.MyEned",  # Alternative review container
    ]
    
    review_containers = []
    for selector in container_selectors:
        try:
            containers = driver.find_elements(By.CSS_SELECTOR, selector)
            if containers and len(containers) > 1:  # Need multiple containers for multiple reviews
                review_containers = containers
                print(f"📦 Found {len(containers)} review containers using: {selector}")
                break
        except:
            continue
    
    if not review_containers:
        print("❌ No review containers found")
        return []
    
    # Extract data from each unique review container
    count = 0
    processed_containers = 0
    
    for container in review_containers:
        if count >= max_reviews:
            break
            
        processed_containers += 1
        print(f"\n🔍 Processing container {processed_containers}/{len(review_containers)}")
            
        try:
            # Scroll container into view for better extraction
            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", container)
            time.sleep(2)
            
            # Extract reviewer data (this now includes "More" button handling)
            reviewer_data = extract_reviewer_data(container)
            
            if reviewer_data and reviewer_data['reviewer_name'] != 'Unknown':
                reviewer_name = reviewer_data['reviewer_name']
                
                # Check if we've already seen this reviewer (avoid duplicates)
                if reviewer_name not in seen_reviewers:
                    seen_reviewers.add(reviewer_name)
                    reviews.append(reviewer_data)
                    count += 1
                    print(f"✅ UNIQUE Review {count}: {reviewer_name} | {reviewer_data['stars']}⭐")
                    print(f"   📝 Text: {reviewer_data['review_text'][:80]}...")
                else:
                    print(f"⚠️ Skipping duplicate reviewer: {reviewer_name}")
            else:
                print("⚠️ Could not extract valid reviewer data from this container")
            
        except Exception as e:
            print(f"❌ Error processing container: {e}")
            continue
    
    print(f"\n🎉 Successfully extracted {len(reviews)} UNIQUE reviews from {processed_containers} containers")
    return reviews

print("🔍 Main scraping function ready")

🔍 Main scraping function ready


In [54]:
def save_to_csv(reviews, business_name):
    """
    Save the extracted reviews to a CSV file
    """
    if not reviews:
        print("❌ No reviews to save")
        return False
    
    try:
        # Add business name to each review
        for review in reviews:
            review['business_name'] = business_name
        
        # Create DataFrame with proper column order
        df = pd.DataFrame(reviews)
        columns = ['business_name', 'reviewer_name', 'stars', 'review_text']
        df = df[columns]
        
        # Generate filename
        safe_name = business_name.replace(' ', '_').replace('/', '_')
        filename = f"clean_reviews_{safe_name}.csv"
        
        # Save to CSV
        df.to_csv(filename, index=False, encoding='utf-8')
        
        print(f"💾 Saved {len(reviews)} reviews to {filename}")
        print("\n📊 Sample data:")
        print(df.to_string(max_colwidth=60, index=False))
        
        return True
        
    except Exception as e:
        print(f"❌ Error saving CSV: {e}")
        return False

print("💾 CSV saving function ready")

💾 CSV saving function ready


In [55]:
def run_clean_scraper(url, max_reviews=2000, headless=False):
    """
    🚀 MAIN FUNCTION - Clean Google Maps Review Scraper
    
    This function orchestrates the entire scraping process:
    1. Sets up the browser
    2. Navigates to the URL  
    3. Gets the business name
    4. Navigates to reviews
    5. Extracts individual reviewer data
    6. Saves to CSV
    """
    driver = None
    
    try:
        print("🎯 CLEAN GOOGLE MAPS REVIEW SCRAPER")
        print("=" * 45)
        
        # Step 1: Setup driver
        print("🔧 Setting up Chrome driver...")
        driver = setup_driver(headless=headless)
        if not driver:
            return False
        
        # Step 2: Navigate to URL
        print(f"🌐 Loading: {url}")
        driver.get(url)
        time.sleep(5)
        
        # Step 3: Get business name
        business_name = get_business_name(driver)
        
        # Step 4: Navigate to reviews
        print("📝 Navigating to reviews section...")
        navigate_to_reviews(driver)
        
        # Step 4.5: Expand all "More" buttons to get full review text
        print("🔍 Expanding all reviews...")
        expand_all_reviews(driver)
        
        # Step 5: Extract reviews
        reviews = scrape_reviews(driver, max_reviews=max_reviews)
        
        if reviews:
            # Step 6: Save to CSV
            save_to_csv(reviews, business_name)
            
            print("\n🎉 SCRAPING COMPLETED SUCCESSFULLY!")
            print(f"🏢 Business: {business_name}")
            print(f"📊 Reviews extracted: {len(reviews)}")
            return True
        else:
            print("\n❌ No reviews were extracted")
            return False
            
    except Exception as e:
        print(f"\n❌ Scraping failed: {e}")
        return False
        
    finally:
        if driver:
            driver.quit()
            print("\n✅ Browser closed")

print("🚀 Main scraper function ready to use!")

🚀 Main scraper function ready to use!


In [56]:
# 🎯 RUN THE CLEAN SCRAPER
# Replace the URL with your target Google Maps location

# Configuration
TARGET_URL = "https://maps.app.goo.gl/wT5sboiYx8TDCUgX7"  # Replace with your URL
MAX_REVIEWS = 5  # Start with 2 reviews for testing
HEADLESS = False  # Set to True to hide browser

print("🎯 EXECUTING CLEAN SCRAPER - FIXED VERSION")
print("🔧 Now properly extracts UNIQUE individual reviewers")
print(f"📍 URL: {TARGET_URL}")
print(f"📊 Max Reviews: {MAX_REVIEWS}")
print("-" * 45)

# Run the scraper
success = run_clean_scraper(
    url=TARGET_URL,
    max_reviews=MAX_REVIEWS, 
    headless=HEADLESS
)

if success:
    print("\n✅ All done! Check your CSV file for UNIQUE reviewer results.")
else:
    print("\n❌ Scraping unsuccessful. Check the URL and try again.")

🎯 EXECUTING CLEAN SCRAPER - FIXED VERSION
🔧 Now properly extracts UNIQUE individual reviewers
📍 URL: https://maps.app.goo.gl/wT5sboiYx8TDCUgX7
📊 Max Reviews: 5
---------------------------------------------
🎯 CLEAN GOOGLE MAPS REVIEW SCRAPER
🔧 Setting up Chrome driver...


✅ Chrome driver setup complete
🌐 Loading: https://maps.app.goo.gl/wT5sboiYx8TDCUgX7
🏢 Found business: The Holly
📝 Navigating to reviews section...
📝 Clicked on Reviews tab
🔍 Expanding all reviews...
🔍 Looking for 'More' buttons to expand reviews...
✅ Expanded 11 'More' buttons
🎯 Starting to scrape 5 UNIQUE individual reviews...
📜 Scrolling to load more reviews...
   📦 Scroll 1: Found 40 review containers
📦 Found 40 review containers using: div[data-review-id]

🔍 Processing container 1/40
✅ UNIQUE Review 1: Bibek Dhungana | 1⭐
   📝 Text: I’m the guarantor for Apartment 627A. We have signed leases for both Summer and ...

🔍 Processing container 2/40
⚠️ Skipping duplicate reviewer: Bibek Dhungana

🔍 Processing container 3/40
✅ UNIQUE Review 2: Joshue Lewis | 5⭐
   📝 Text: I absolutely love living at The Holly! The location is unbeatable, especially fo...

🔍 Processing container 4/40
⚠️ Skipping duplicate reviewer: Joshue Lewis

🔍 Processing container 5/40
✅ UNIQUE Review 3: Katherine Earl