https://medium.com/@darshankhandelwal12/scrape-google-maps-reviews-with-python-7e1f1a6cb3a1

In [75]:
import requests 
from bs4 import BeautifulSoup
import pandas as pd
from fake_useragent import UserAgent
import re
from datetime import date
from reportlab.lib.pagesizes import letter, A4
from reportlab.pdfgen import canvas
import textwrap
import os

In [41]:
pip install reportlab

Collecting reportlab
  Downloading reportlab-4.2.0-py3-none-any.whl.metadata (1.4 kB)
Collecting chardet (from reportlab)
  Downloading chardet-5.2.0-py3-none-any.whl.metadata (3.4 kB)
Downloading reportlab-4.2.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hDownloading chardet-5.2.0-py3-none-any.whl (199 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.4/199.4 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: chardet, reportlab
Successfully installed chardet-5.2.0 reportlab-4.2.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
def get_dynamic_user_agent():
    ua = UserAgent()
    return ua.random

In [3]:
test_user = get_dynamic_user_agent()

In [12]:
test_user

'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'

In [16]:
headers = {
        "User-Agent": get_dynamic_user_agent()
    }

In [3]:
def extract_place_id(url):
    # Define the regex pattern to match the place ID
    pattern = r'0x[0-9a-f]+:0x[0-9a-f]+'
    
    # Search for the pattern in the URL
    match = re.search(pattern, url)
    
    # If a match is found, return it; otherwise, return None
    if match:
        return match.group(0)
    else:
        return None

In [73]:
def extract_place_name(url):
    match = re.search(r'/place/([^/@]+)', url)
    if match:
        place_name = match.group(1)
        # Replace '+' with ' '
        place_name_with_spaces = place_name.replace('+', '_')
    
    return place_name_with_spaces

In [65]:
def save_to_txt(filename, place_name, users, line_length=80):
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(f"LOCATION: {place_name}\n")
        f.write(f"RUN DATE: {date.today()}\n")
        f.write(f"NO. COMMENTS: {len(users)}\n\n")
        
        for user_data in users:
            f.write(f"Name: {user_data['name']}\n")
            f.write(f"Rating: {user_data['rating']}\n")
            f.write(f"Review Date: {user_data['review_date']}\n")
            review_wrapped = textwrap.fill(f"Review: {user_data['review']}", width=line_length)
            f.write(review_wrapped + '\n')
            f.write("--------------\n")

In [48]:
def save_to_pdf(filename, place_name, users):
    c = canvas.Canvas(filename, pagesize=A4)
    width, height =A4

    c.setFont("Helvetica", 14)
    c.drawString(30, height - 40, f"LOCATION: {place_name}")
    c.drawString(30, height - 60, f"RUN DATE: {date.today()}")
    c.drawString(30, height - 80, f"NO. COMMENTS: {len(users)}")
    
    c.setFont("Helvetica", 12)
    y_position = height - 120

    for user_data in users:
        if y_position < 40:
            c.showPage()
            y_position = height - 40
        c.drawString(30, y_position, f"Name: {user_data['name']}")
        y_position -= 20
        c.drawString(30, y_position, f"Rating: {user_data['rating']}")
        y_position -= 20
        c.drawString(30, y_position, f"Review Date: {user_data['review_date']}")
        y_position -= 20
        c.drawString(30, y_position, f"Review: {user_data['review']}")
        y_position -= 40  # Space between reviews

    c.save()

In [8]:
central_westgate = 'https://www.google.com/maps/place/Central+Westville/@13.8059695,100.4465064,17z/data=!4m18!1m9!3m8!1s0x30e29be5502c2807:0x3ad97383404e6a3e!2sCentral+Westville!8m2!3d13.8059643!4d100.4490813!9m1!1b1!16s%2Fg%2F11tjm1w2pq!3m7!1s0x30e29be5502c2807:0x3ad97383404e6a3e!8m2!3d13.8059643!4d100.4490813!9m1!1b1!16s%2Fg%2F11tjm1w2pq?entry=ttu'

In [68]:
robinson = 'https://www.google.com/maps/place/Robinson+Lifestyle+Sri+Samarn/@13.9403604,100.4216309,12z/data=!4m12!1m2!2m1!1sRobinson!3m8!1s0x30e283bf8f26263f:0x20d0efc02e3501a5!8m2!3d13.9403604!4d100.5534668!9m1!1b1!15sCghSb2JpbnNvbiIDiAEBkgEPc2hvcHBpbmdfY2VudGVy4AEA!16s%2Fg%2F1hm4tzsnx?entry=ttu'

In [12]:
place_id = extract_place_id(central_westgate)

In [70]:
robinson_id = extract_place_id(robinson)

In [11]:
place_name = extract_place_name(central_westgate)

In [12]:
place_name

'Central Westville'

In [13]:
url = f"https://www.google.com/async/reviewDialog?hl=en_us&async=feature_id:{place_id},next_page_token:,sort_by:qualityScore,start_index:,associated_topic:,_fmt:pc"

In [17]:
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

In [71]:
def get_reviews_data(place_review_link):
    
    headers = {
        "User-Agent": get_dynamic_user_agent()
    }

    place_name = extract_place_name(place_review_link)
    place_id = extract_place_id(place_review_link)
    
    user = []
    # location_info = {}
    # data_id = ''
    next_page_token = ''
    count = 0 

    while True: 
        url = f"https://www.google.com/async/reviewDialog?hl=en_us&async=feature_id:{place_id},next_page_token:{next_page_token},sort_by:qualityScore,start_index:,associated_topic:,_fmt:pc"

        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # for el in soup.select('.c9QyIf'):
        #     data_id = soup.select_one('.loris')['data-fid']
        #     token = soup.select_one('.gws-localreviews__general-reviews-block')['data-next-page-token']
            
        for el in soup.select('.gws-localreviews__google-review'):
            count += 1
            user.append({
                'name': el.select_one('.TSUbDb').text.strip(),
                # 'link': el.select_one('.TSUbDb a')['href'],
                # 'thumbnail': el.select_one('.lDY1rd')['src'],
                #'numOfreviews': el.select_one('.Msppse').text.strip(),
                'rating': el.select_one('.lTi8oc')['aria-label'],
                'review': el.select_one('.Jtu6Td').text.strip(),
                'review_date': el.select_one('.dehysf').text.strip()
                # 'images': [d['style'][21:d['style'].rindex(')')] for d in el.select('.EDblX .JrO5Xe')]
            })

        next_page_token_element = soup.select_one('.gws-localreviews__general-reviews-block')
        if next_page_token_element['data-next-page-token'] != '':
            next_page_token = next_page_token_element['data-next-page-token']
            #start_index += 10  # Assuming each page has 10 reviews; adjust as necessary
        else:
            break
    
    save_to_txt(f"reviews_{place_name}.txt", place_name, user)

    print(f"LOCATION: {place_name}")
    print(f'RUN DATE: {date.today()}')
    print(f'No. COMMENTS: {count}')
    # print(location_info)
    # print("DATA ID:")
    # print(data_id)
    print("TOKEN:")
    # print(token)
    # print("USER:")

    for user_data in user:
        print(user_data)
        print("--------------")

In [46]:
with open("test.txt", "w") as f:
    f.write(str(soup.select('.gws-localreviews__google-review')[0]))

In [26]:
for el in soup.select('.c9QyIf'):
    print(el.text)

Central Westville999, Ratchaphruek Rd, Maha Sawat, Bang Kruai District, Nonthaburi, ThailandWrite a reviewEdit4.51,099 reviewsReviews are automatically processed to detect inappropriate content and spam. We may take down reviews that are flagged in order to comply with Google policies or legal obligations.People often mentionAll0department store149pet108ac units23running22+6exercise18waterfall12bridge12central eastville9security guard7lice6Sort byMost relevantNewestHighestLowestLoading...Kantavee WiengsimaShare reviewReport reviewLocal Guide·1,025 reviews·10,217 photos4 months agoHuge shopping mall that has shopping area inside and open-air outside. If you enjoy walking, this is the place where you can bring your family and pet. Lots of types of stores and restaurants. Clothes, electronic appliances, food are available. Hundreds of staff are willing to help you. Everything is new. Love this place!4 months ago0LikeJ FShare reviewReport reviewLocal Guide·23 reviews·119 photos3 weeks agoN

In [36]:
soup.select_one('.gws-localreviews__general-reviews-block').attrs

{'data-google-review-count': '10',
 'data-next-page-token': 'CAESY0NBRVFDaHBFUTJwRlNVRlNTWEJEWjI5QlVEZGZURUZLUzJSZlgxOWZSV2hDY1c5MlNrbHZPVUpLUzFKcU1UbG5NRUZCUVVGQlIyZHVPVEpSVlVOamNsb3ROMGM0V1VGRFNVRQ==',
 'class': ['gws-localreviews__general-reviews-block']}

In [23]:
soup.select('.lcorif')

[]


In [22]:
soup.select_one('.loris')['data-fid']

'0x30e29be5502c2807:0x3ad97383404e6a3e'

In [35]:
soup.select_one('.gws-localreviews__general-reviews-block')['data-next-page-token']

'CAESY0NBRVFDaHBFUTJwRlNVRlNTWEJEWjI5QlVEZGZURUZLUzJSZlgxOWZSV2hDY1c5MlNrbHZPVUpLUzFKcU1UbG5NRUZCUVVGQlIyZHVPVEpSVlVOamNsb3ROMGM0V1VGRFNVRQ=='

In [66]:
get_reviews_data(central_westgate)

LOCATION: Central Westville
RUN DATE: 2024-06-05
No. COMMENTS: 520
TOKEN:
{'name': 'Kantavee Wiengsima', 'rating': 'Rated 5.0 out of 5,', 'review': 'Huge shopping mall that has shopping area inside and open-air outside. If you enjoy walking, this is the place where you can bring your family and pet. Lots of types of stores and restaurants. Clothes, electronic appliances, food are available. Hundreds of staff are willing to help you. Everything is new. Love this place!', 'review_date': '4 months ago'}
--------------
{'name': 'J F', 'rating': 'Rated 5.0 out of 5,', 'review': 'Newest pet friendly mall in the region. Mall is massive with many dining and shopping spots. Equiped with a pet park and open air running track.', 'review_date': '3 weeks ago'}
--------------
{'name': 'Day 25', 'rating': 'Rated 3.0 out of 5,', 'review': 'Pets allowed in the superstore, keep all pet lovers to bring those to have a walk making noisy barking and pet smell so horribly. Most pet lovers with dog leash kee

In [72]:
get_reviews_data(robinson)

LOCATION: Robinson Lifestyle Sri Samarn
RUN DATE: 2024-06-05
No. COMMENTS: 723
TOKEN:
{'name': 'Shomori Pass', 'rating': 'Rated 5.0 out of 5,', 'review': 'I’ve been a consumer here for many years and really love the way they renovated and expanded the shopping center. It now has some of my favorite food franchises such as Fuji and they’ve always had one of my other favorites The Salad Factory. It has a smaller gym and also a Tops Supermarket.', 'review_date': '9 months ago'}
--------------
{'name': 'Craig Burton', 'rating': 'Rated 4.0 out of 5,', 'review': "Oh, where to begin with Robinson Lifestyle Si Saman! This beacon of consumerism, nestled in western Bangkok, is a temple to shopping, dining, and all things lifestyle. With its three lofty floors, it's like the shopping gods decided to give us mere mortals a playground. Central Retail Corporation has outdone themselves this time, aiming to capture the hearts (and wallets) of high-spending shoppers, a noble quest indeed.Imagine, if y

In [67]:
def txt_to_pdf(txt_filename, pdf_filename, line_length=80):
    c = canvas.Canvas(pdf_filename, pagesize=A4)
    width, height = A4

    c.setFont("Helvetica", 12)
    y_position = height - 40
    line_height = 14
    max_width = width - 60  # 30 padding on both sides

    with open(txt_filename, 'r', encoding='utf-8') as f:
        for line in f:
            lines = textwrap.wrap(line.strip(), width=line_length)
            for wrapped_line in lines:
                if y_position < line_height:  # Start a new page if the text reaches the bottom
                    c.showPage()
                    y_position = height - 40
                    c.setFont("Helvetica", 12)
                c.drawString(30, y_position, wrapped_line)
                y_position -= line_height

    c.save()

# Example usage
txt_filename = "reviews.txt"
pdf_filename = "reviews.pdf"
txt_to_pdf(txt_filename, pdf_filename)


In [81]:
from dotenv import load_dotenv

In [82]:
load_dotenv()

True

In [83]:
print(os.getenv("GOOGLE_API_KEY"))

AIzaSyCA9Cv2YCpFiN4ASzjCOa92QbGu_FtD9ac
