In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time

In [None]:
# This automatically downloads and manages the correct driver for your version of Chrome
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))

# This will read URL's from the Cafe CSV File
try:
    # Load Dataset
    cafe_df = pd.read_csv("E:/Personal Project/Cafe_Data.csv") 
    
    urls = cafe_df['Maps_URL'].tolist() 
except FileNotFoundError:
    print("Error: The file was not found. Please check the path.")
    urls = []
except KeyError:
    print("Error: The column 'Maps_URL' was not found in your CSV file.")
    urls = []
    
all_reviews_data = []

print(f"Found {len(urls)} URLs to scrape.")

Found 20 URLs to scrape.


In [None]:
# This will Loop through each DataFrame row to get the Cafe name and URL from the Cafe CSV File
for index, row in cafe_df.iterrows():
    cafe_name = row['Cafe_Name']
    url = row['Maps_URL']
    
    if not isinstance(url, str) or not url.startswith('http'):
        print(f"Skipping invalid entry for {cafe_name}")
        continue

    print(f"\nScraping reviews from: '{cafe_name}'")
    driver.get(url)
    time.sleep(5) 

    try:
        scrollable_div = driver.find_element(By.CSS_SELECTOR, 'div.m6QErb.DxyBCb.kA9KIf.dS8AEf')
        for _ in range(5): 
            driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div)
            print("Scrolling...")
            time.sleep(2)
    except Exception as e:
        print(f"Could not scroll. Proceeding with loaded reviews. Error: {e}")

    # This will Extract the review data from each of the review from  URL of the Cafes
    review_elements = driver.find_elements(By.CSS_SELECTOR, 'div.jJc9Ad')
    print(f"Found {len(review_elements)} reviews on the page.")

    for review in review_elements:
        try:
            # This has to be manually inspected from any one of the URL (as the Layout format of google maps review is common) and fed into the code
            name_class = 'd4r55'
            rating_class = 'kvMYJc' 
            
            # Reviewer Name
            try:
                name = review.find_element(By.CSS_SELECTOR, f'div.{name_class}').text
            except:
                name = "N/A"

            # Review Text
            try:
                review_text = review.find_element(By.CSS_SELECTOR, 'span.wiI7pd').text
            except:
                review_text = "N/A"
                
            # Star Rating from the 'aria-label' which is the Parent Element
            try:
                # Find the span with the correct class and get its aria-label
                rating_element = review.find_element(By.CSS_SELECTOR, f'span.{rating_class}')
                rating = rating_element.get_attribute('aria-label')
            except:
                rating = "N/A"

            # Adding the collected data as multiple Dictionaries
            all_reviews_data.append({
                'Cafe_URL': url,
                'Reviewer_Name': name,
                'Rating': rating,
                'Review_Text': review_text
            })
            
        except Exception as e:
            print(f"Error processing one review: {e}")
            continue

print("\nLoop finished. Proceed to the next cell to save the data.")

  


Scraping reviews from: 'Sri Sri Café'
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Found 8 reviews on the page.

Scraping reviews from: 'Season's Cafe'
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Found 8 reviews on the page.

Scraping reviews from: 'Eat N Drink'
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Found 8 reviews on the page.

Scraping reviews from: 'Vinaya Café'
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Found 8 reviews on the page.

Scraping reviews from: 'Ande ka Funda'
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Found 3 reviews on the page.

Scraping reviews from: 'Third Wave Coffee'
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Found 8 reviews on the page.

Scraping reviews from: 'CHAI VENUE'
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Scrolling...
Found 8 reviews on the page.

Scraping reviews from: 'Spice N Sip Mr Gowda'
Scrolling...
Scroll

In [None]:
# Converting multiple Dictionaries to DataFrame
final_df = pd.DataFrame(all_reviews_data)

# Saving it to a new CSV file
final_df.to_csv('scraped_reviews.csv', index=False, encoding='utf-8')

print("\n-------------------------------------------")
print(f"Scraping complete! {len(final_df)} reviews saved to 'scraped_reviews.csv'")
final_df.head()


-------------------------------------------
Scraping complete! 150 reviews saved to 'scraped_reviews.csv'


Unnamed: 0,Cafe_URL,Reviewer_Name,Rating,Review_Text
0,https://www.google.com/maps/place/Sri+Sri+Cafe...,Akash Raj,5 stars,Had a great experience at Sri Sri Cafe! The at...
1,https://www.google.com/maps/place/Sri+Sri+Cafe...,Simran Fathima,5 stars,Pizza at its best🍕as said as is💖! From service...
2,https://www.google.com/maps/place/Sri+Sri+Cafe...,Uday,5 stars,The Unlimited Gujrati Thali was amazing and su...
3,https://www.google.com/maps/place/Sri+Sri+Cafe...,Amrita Chattopadhyay,5 stars,The food is delicious. The place is clean. The...
4,https://www.google.com/maps/place/Sri+Sri+Cafe...,Dharam Hinduja,5 stars,This is one of those places that we discovered...


In [5]:
driver.quit()