# **Fetch Google Reviews for POIs (Aveiro)**

**Goal:** Build a pipeline to fetch reviews for Points of Interest (POIs) in Aveiro, using:
- **OSM-derived POIs** (from `pois_aveiro.csv`)
- **Google Places API (v1)** for place search and place details (reviews)
- Output to `reviews_output.csv` for downstream **NLP** (sentiment & topic modeling)

> **Note:** Google Places **reviews** are limited to 5 reviews per POI

## 1. Setup & Requirements

**Requirements**
1. A **Google Cloud Project** with **Places API** enabled.
2. A valid **API key**
3. The input file `pois_aveiro.csv` including `geom_pt` (EWKB, SRID=4326) and/or `geom`.

**Environment variables:**
- `GOOGLE_API_KEY`: API key.

In [None]:
import requests
import time
import os
import pandas as pd
from shapely import wkb

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY", "AIzaSyAwme_k4xdwadaStLv2_bLFAckn55hJ1TNF8L8A")
INPUT_CSV = "../pois_aveiro.csv"
SLEEP_BETWEEN_REQUESTS = 0.25  # seconds to avoid hitting rate limits
RADIUS_METERS = 10  # Search within a 10-meter radius by default
MAX_PLACES_PER_RUN = 10  # Limit the number of places to query

## 2. Helper Functions
Utilities for geometry parsing and API calls.

In [14]:
def ewkb_hex_point_to_lonlat(hex_str: str):
    """
    Convert EWKB hex POINT (SRID=4326) to (lon, lat).
    Many exports store POINT as EWKB hex like '0101000020E6100000...'
    """
    if not isinstance(hex_str, str) or not hex_str:
        return None
    try:
        geom = wkb.loads(bytes.fromhex(hex_str))
        if geom.geom_type == "Point":
            return (geom.x, geom.y)
    except Exception:
        pass
    return None


# Helper Functions for Places API

def places_search_nearby(lat, lon, radius=500, types=["restaurant"]):
    """
    Use Google Places Nearby Search to search for places within a given radius.
    Returns the places found and the API call status.
    """
    url = "https://places.googleapis.com/v1/places:searchNearby"
    payload = {
        "locationRestriction": {
            "circle": {
                "center": {
                    "latitude": lat,
                    "longitude": lon
                },
                "radius": radius
            }
        },
        # "includedTypes": types,
        "maxResultCount":  (MAX_PLACES_PER_RUN or 10),
    }
    params = { 'key': GOOGLE_API_KEY }
    headers = {
        "X-Goog-FieldMask": "places.displayName,places.id,places.reviews,places.location,places.rating,places.primaryType",
        "X-Goog-Api-Key": GOOGLE_API_KEY,
        "Content-Type": "application/json",
    }

    try:
        # Send POST request to the Nearby Search API
        response = requests.post(url, params=params, json=payload, headers=headers, timeout=20)
        time.sleep(SLEEP_BETWEEN_REQUESTS)
        response.raise_for_status()
    except requests.exceptions.RequestException as e:
        print(f"Error during API request: {e}")
        return None, "ERROR"
    
    data = response.json()
    places = data.get("places", [])
    return places, "OK" if places else "ZERO_RESULTS"

def process_reviews(reviews):
    """
    Extract the reviews from the response and format them into a list of dictionaries.
    """
    reviews_data = []
    for review in reviews:
        reviews_data.append({
            "author_name": review.get("authorAttribution", {}).get("displayName", "Unknown"),
            "rating": review.get("rating"),
            "review_text": review.get("text", {}).get("text", ""),
            "publish_time": review.get("publishTime"),
        })
    return reviews_data

def get_reviews_for_nearby_places(lat, lon, radius=500):
    """
    Fetch nearby places and their reviews within the specified radius.
    Returns a list of reviews and the status of the operation.
    """
    places, status = places_search_nearby(lat, lon, radius)
    if not places:
        return [], f"No nearby places found. Status: {status}"

    all_reviews = []
    for place in places:
        place_id = place.get("id")
        place_name = place.get("displayName", {}).get("text", "")
        place_location = place.get("location", {})
        place_rating = place.get("rating")
        place_primary_type = place.get("primaryType", "Unknown")
        reviews = place.get("reviews", [])
        processed_reviews = process_reviews(reviews)

        for review in processed_reviews:
            review["place_name"] = place_name
            review["place_id"] = place_id
            review["place_location"] = place_location
            review["place_rating"] = place_rating
            review["place_primary_type"] = place_primary_type
            all_reviews.append(review)

    return all_reviews, "OK"

## 3. Load & Preview POIs
We expect `pois_aveiro.csv` to contain, among many attributes, at least:
- `gid`: unique id
- `amenity` / `shop` / `tourism`: category hints
- `geom_pt` (preferred) or `geom`: EWKB encoded `POINT` (SRID 4326)

In [15]:
pd.set_option("display.max_columns", 60)
try:
    df = pd.read_csv(INPUT_CSV, low_memory=False)
    display(df.head(3))
    print(f"Loaded {len(df)} rows from {INPUT_CSV}")
except FileNotFoundError:
    print(f"WARNING: {INPUT_CSV} not found. Place it next to this notebook.")
    df = pd.DataFrame()

Unnamed: 0,gid,access,addr:city,addr:country,addr:hamlet,addr:housename,addr:housenumber,addr:municipality,addr:place,addr:postcode,addr:street,addr:suburb,addr:unit,air_conditioning,alt_name,amenity,animal_shelter,architect,architect:wikidata,area,armrest,artist_name,artwork_subject,artwork_type,athletics,atm,attraction,authentication:app,authentication:membership_card,authentication:none,...,toilets:wheelchair,tourism,townhall:type,traffic_calming,traffic_sign,train,tram,unisex,url,valves,vehicle,vending,visibility,washing_machine,waste,water,watermill:disused,website,wetland,wheelchair,wheelchair:description:en,wheelchair:description:pt,width,wifi,wikidata,wikimedia_commons,wikipedia,geom,geom_pt,cat
0,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,0104000020E6100000010000000101000000EBED85B828...,0101000020E6100000EBED85B8287521C025C33B0A2151...,bus_stop
1,2,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,0104000020E6100000010000000101000000861C5BCF10...,0101000020E6100000861C5BCF101E21C00A720635325C...,stop_position
2,3,,Aveiro,,,,,,,3810-000,"EN 109 Km 57,8",,,,,fuel,,,,,,,,,,,,,,,...,,,,,,,,,,schrader,,,,,,,,,,yes,,,,,,,,0104000020E610000001000000010100000078978BF84E...,0101000020E610000078978BF84E4821C05DCDF055E150...,fuel


Loaded 13258 rows from ../pois_aveiro.csv


## 4. Processing Logic per-POI
For each row:
1. Determine **amenity** (or fallbacks).
2. Parse **coordinates** from `geom_pt` / `geom`.
3. Build a **name hint** from available name columns.
4. **Nearby Search** to fetch place name, place ID, place location, place rating, place type and **reviews** (limited to `MAX_REVIEWS_PER_PLACE`).

In [16]:
def run_pipeline():
    # Load POIs from file
    df = pd.read_csv(INPUT_CSV)

    out_rows = []
    processed = 0

    # Loop over each POI and process reviews
    for idx, row in df.iterrows():
        if processed >= MAX_PLACES_PER_RUN:
            break

        # Get coordinates from 'geom_pt' or 'geom' column
        lonlat = None
        if "geom_pt" in df.columns and isinstance(row.get("geom_pt"), str):
            lonlat = ewkb_hex_point_to_lonlat(row.get("geom_pt"))
            print(lonlat)
        if lonlat is None and "geom" in df.columns and isinstance(row.get("geom"), str):
            lonlat = ewkb_hex_point_to_lonlat(row.get("geom"))

        if not lonlat:
            continue

        lon, lat = lonlat

        # Fetch reviews for the nearby places
        reviews_data, status = get_reviews_for_nearby_places(lat, lon, RADIUS_METERS)

        if reviews_data:
            # Add reviews data to output rows
            for review in reviews_data:
                out_rows.append(review)

        processed += 1

    # Create DataFrame and save to CSV
    if out_rows:
        df_reviews = pd.DataFrame(out_rows)
        df_reviews.to_csv("nearby_reviews.csv", index=False)
        print(f"Saved reviews data to 'nearby_reviews.csv'.")
    else:
        print("No reviews fetched.")
        
    return df_reviews

df_reviews = run_pipeline()
display(df_reviews.head(10))


  df = pd.read_csv(INPUT_CSV)


(-8.7288263, 40.6338208)
(-8.558722, 40.7202822)
(-8.6412275, 40.6318767)
(-8.6303399, 40.6483621)
(-8.6529905, 40.6408293)
(-8.6155501, 40.6530001)
(-8.6302907, 40.6481634)
(-8.6289941, 40.6490393)
(-8.6305762, 40.6464895)
(-8.6398177, 40.6372592)
Saved reviews data to 'nearby_reviews.csv'.


Unnamed: 0,author_name,rating,review_text,publish_time,place_name,place_id,place_location,place_rating,place_primary_type
0,Markus Wenger,4,,2024-11-08T10:05:22.566412Z,Avelab - Esgueira,ChIJAQTNSQCZIw0R4oI-HJ_-4rM,"{'latitude': 40.6481978, 'longitude': -8.630289}",4,medical_lab
1,Lara Mendes,5,Muito simpáticos! A entrega foi rapidíssima e ...,2025-10-28T20:47:06.519725824Z,EATIN,ChIJ4UYOPL2ZIw0RqgE9nEWdd4M,"{'latitude': 40.648165899999995, 'longitude': ...",5,hamburger_restaurant
2,Diogo Moço,5,Encomendei comida para casa e foram rápidos na...,2025-10-28T20:47:48.904490884Z,EATIN,ChIJ4UYOPL2ZIw0RqgE9nEWdd4M,"{'latitude': 40.648165899999995, 'longitude': ...",5,hamburger_restaurant
3,Nicole,5,"saboroso, ótima entrega e muito simpáticos",2025-10-24T03:35:24.556694520Z,EATIN,ChIJ4UYOPL2ZIw0RqgE9nEWdd4M,"{'latitude': 40.648165899999995, 'longitude': ...",5,hamburger_restaurant
