# Google Places Sample Data

## Import Packages

In [1]:
!pip install python-dotenv



In [2]:
!pip install folium geopandas

Collecting folium
  Obtaining dependency information for folium from https://files.pythonhosted.org/packages/ae/6d/18a7546e1748ecdd6ed7cd00d3f183faf1df08bd4f5e5e0eb3e72458b862/folium-0.17.0-py2.py3-none-any.whl.metadata
  Downloading folium-0.17.0-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting geopandas
  Obtaining dependency information for geopandas from https://files.pythonhosted.org/packages/c4/64/7d344cfcef5efddf9cf32f59af7f855828e9d74b5f862eddf5bfd9f25323/geopandas-1.0.1-py3-none-any.whl.metadata
  Downloading geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)
Collecting branca>=0.6.0 (from folium)
  Obtaining dependency information for branca>=0.6.0 from https://files.pythonhosted.org/packages/fc/be/720f85abacd654ec86f1431bc7c004eae74417bd9d0e7a2bc43601062627/branca-0.8.0-py3-none-any.whl.metadata
  Downloading branca-0.8.0-py3-none-any.whl.metadata (1.5 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Obtaining dependency information for pyogrio>=0.7.2 from https://files.pyt

In [3]:
import requests
import pandas as pd
import time
import os
import matplotlib.pyplot as plt
import folium
from dotenv import load_dotenv
from folium.plugins import HeatMap

load_dotenv()

# API Key
API_KEY = os.environ.get('GOOGLE_PLACES_API_KEY')

ModuleNotFoundError: No module named 'folium'

## Collect Data

In [None]:
def get_places_data(location, radius, place_type, api_key, max_results=20):
    """
    Fetches places data using the Google Places API.
    
    Args:
    - location (str): Latitude and Longitude (e.g., "37.7749,-122.4194")
    - radius (int): Search radius in meters
    - place_type (str): Type of place to search (e.g., 'restaurant', 'tourist_attraction')
    - api_key (str): Your Google Places API key
    - max_results (int): Maximum number of results to return (API returns 20 per request)
    
    Returns:
    - pd.DataFrame: DataFrame containing the places data
    """
    endpoint_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    places = []
    next_page_token = None
    
    while len(places) < max_results:
        # Construct the API request
        params = {
            'location': location,
            'radius': radius,
            'type': place_type,
            'key': api_key
        }

        if next_page_token:
            params['pagetoken'] = next_page_token
        
        # Make the API request
        res = requests.get(endpoint_url, params=params)
        results = res.json()
        
        # Add the results to the list
        places.extend(results['results'])
        
        # Check if there's a next page
        next_page_token = results.get('next_page_token', None)

        # If there is no next page or we've reached the max results, break
        if not next_page_token or len(places) >= max_results:
            break
        
        # Pause to avoid hitting the API rate limit
        time.sleep(2)

    # Create a DataFrame from the results
    data = []
    for place in places:
        place_info = {
            'place_id': place.get('place_id'),
            'name': place.get('name'),
            'address': place.get('vicinity'),
            'rating': place.get('rating'),
            'user_ratings_total': place.get('user_ratings_total'),
            'latitude': place['geometry']['location']['lat'],
            'longitude': place['geometry']['location']['lng'],
            'place_type': place_type
        }
        data.append(place_info)

    return pd.DataFrame(data)

In [None]:
def get_place_details(place_id, api_key):
    """
    Fetch detailed information about a place using the Google Places API.
    
    Args:
    - place_id (str): The unique ID of the place.
    - api_key (str): Your Google Places API key.
    
    Returns:
    - dict: Dictionary containing place details such as reviews and photos.
    """
    endpoint_url = "https://maps.googleapis.com/maps/api/place/details/json"
    params = {
        'place_id': place_id,
        'fields': 'name,rating,reviews,photos',
        'key': api_key
    }
    
    # Make the API request
    response = requests.get(endpoint_url, params=params)
    place_details = response.json().get('result', {})
    
    return place_details

In [None]:
def extract_reviews(place_details):
    """
    Extract reviews from place details.

    Args:
    - place_details (dict): Dictionary containing place details including reviews.
    
    Returns:
    - list: List of reviews for the place.
    """
    reviews = []
    if 'reviews' in place_details:
        for review in place_details['reviews']:
            review_data = {
                'author_name': review['author_name'],
                'rating': review['rating'],
                'text': review['text'],
                'relative_time_description': review['relative_time_description']
            }
            reviews.append(review_data)
    return reviews

In [None]:
def get_photo_urls(place_details, api_key, max_photos=5):
    """
    Fetch photo URLs for a place.
    
    Args:
    - place_details (dict): Dictionary containing place details including photos.
    - api_key (str): Your Google Places API key.
    - max_photos (int): Maximum number of photos to retrieve.
    
    Returns:
    - list: List of photo URLs.
    """
    photo_urls = []
    if 'photos' in place_details:
        for photo in place_details['photos'][:max_photos]:
            # Use the photo reference to construct the photo URL
            photo_reference = photo['photo_reference']
            photo_url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth=400&photoreference={photo_reference}&key={api_key}"
            photo_urls.append(photo_url)
    return photo_urls

In [None]:
# Run this block sparingly

# Define the location and radius
location = "37.7749,-122.4194"  # San Francisco, CA
radius = 5000  # 5 km radius

# Collect data for different types of places
restaurants_data = get_places_data(location, radius, 'restaurant', API_KEY)
tourist_attractions_data = get_places_data(location, radius, 'tourist_attraction', API_KEY)

In [None]:
# Combine the data
all_places_data = pd.concat([restaurants_data, tourist_attractions_data], ignore_index=True)

In [None]:
# Run this block sparingly

# place_id from a previous search
place_id = 'ChIJN1t_tDeuEmsRUsoyG83frY4' # TODO: will be changed

# Get place details using the place_id
place_details = get_place_details(place_id, API_KEY)

In [None]:
# Extract reviews
reviews = extract_reviews(place_details)
print("Reviews:")
for review in reviews:
    print(f"Author: {review['author_name']}, Rating: {review['rating']}, Review: {review['text']}")

In [None]:
# Convert reviews to a DataFrame
reviews_df = pd.DataFrame(all_reviews)

In [None]:
# Run this block sparingly

# Extract photo URLs
photo_urls = get_photo_urls(place_details, API_KEY)
print("\nPhoto URLs:")
for url in photo_urls:
    print(url)

In [None]:
# Convert photo URLs to a DataFrame
photo_urls_df = pd.DataFrame({'photo_url': all_photo_urls})

## Save Data to CSV

In [None]:
all_places_data.to_csv('travel_places_data.csv', index=False)
reviews_df.to_csv('place_reviews.csv', index=False)
photo_urls_df.to_csv('place_photos.csv', index=False)

## Data Exploration & Preparation

In [None]:
all_places_data.head()

In [None]:
all_places_data.describe()

In [None]:
# Plot distribution of ratings
plt.figure(figsize=(8, 6))
all_places_data['rating'].dropna().hist(bins=20, color='skyblue')
plt.title('Distribution of Ratings')
plt.xlabel('Rating')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Location heatmap to show the density of places

# Filter the data to get the locations (latitude and longitude)
location_data = all_places_data[['latitude', 'longitude']].dropna()

# Get the center of the map (average latitude and longitude)
center_lat = location_data['latitude'].mean()
center_lng = location_data['longitude'].mean()

# Create a Folium map centered on the average coordinates
mymap = folium.Map(location=[center_lat, center_lng], zoom_start=12)

# Create a list of [latitude, longitude] pairs for the heatmap
heatmap_data = [[row['latitude'], row['longitude']] for index, row in location_data.iterrows()]

# Add the HeatMap layer to the map
HeatMap(heatmap_data).add_to(mymap)

# Show the map in the notebook
mymap

In [None]:
# Save the map as an HTML file
mymap.save('places_heatmap.html')