# GameBus Health Behavior Mining - Location Categorization

This notebook demonstrates how to categorize location data from GameBus using the Google Places API with a grid-based caching strategy.

## Setup

First, let's set up our environment and import the necessary modules.

In [1]:
import sys
import os
import pandas as pd
import json
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from dotenv import load_dotenv

# Add the project root directory to the Python path
sys.path.append('..')

# Import project modules
from config.paths import RAW_DATA_DIR
from config.credentials import GOOGLE_PLACES_API_KEY
from src.utils.logging import setup_logging
from src.utils.file_handlers import load_json, save_json

# Load environment variables (for Google Places API key)
load_dotenv()

# Set up logging
logger = setup_logging(log_level="INFO")
logger.info("Notebook initialized")

2025-05-14 11:46:41,893 - gamebus_health_mining - INFO - Notebook initialized


## Load Location Data

Load the raw location data that was extracted in the previous notebook.

In [2]:
# Load location data
player_id = 107631  # Use the same player ID as in data extraction
location_file = f"{RAW_DATA_DIR}/player_{107631}_location.json"

try:
    location_data = load_json(location_file)
    print(f"Loaded {len(location_data)} location data points")
    
    # Convert to DataFrame for easier analysis
    location_df = pd.DataFrame(location_data)
    display(location_df.head())
    
except Exception as e:
    print(f"Error loading location data: {e}")

Loaded 13408 location data points


Unnamed: 0,LATITUDE,LONGITUDE,ALTIDUDE,SPEED,ERROR,TIMESTAMP,ARM,activity_id,date,gameDescriptor
0,200,200,31,0,0,1747154573,Arm 2,3339642,1747154634000,GEOFENCE
1,200,200,31,0,0,1747154513,Arm 2,3339641,1747154574000,GEOFENCE
2,200,200,31,0,0,1747154453,Arm 2,3339640,1747154514000,GEOFENCE
3,200,200,31,0,0,1747154393,Arm 2,3339639,1747154453000,GEOFENCE
4,200,200,31,0,0,1747154333,Arm 2,3339638,1747154393000,GEOFENCE


In [24]:
import googlemaps
from datetime import datetime

gmaps = googlemaps.Client(key="AIzaSyD-61j59DOW1aVoBY89Bm82xO1INhhMPZo")

# Look up an address with reverse geocoding
reverse_geocode_result = gmaps.reverse_geocode((51.447228320654425, 5.484548663752785))
print(reverse_geocode_result)



[{'address_components': [{'long_name': 'CFWM+VR', 'short_name': 'CFWM+VR', 'types': ['plus_code']}, {'long_name': 'Eindhoven', 'short_name': 'Eindhoven', 'types': ['locality', 'political']}, {'long_name': 'Eindhoven', 'short_name': 'Eindhoven', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'North Brabant', 'short_name': 'NB', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'Netherlands', 'short_name': 'NL', 'types': ['country', 'political']}], 'formatted_address': 'CFWM+VR Eindhoven, Netherlands', 'geometry': {'bounds': {'northeast': {'lat': 51.44725, 'lng': 5.484624999999999}, 'southwest': {'lat': 51.447125, 'lng': 5.4845}}, 'location': {'lat': 51.4472283, 'lng': 5.484548699999999}, 'location_type': 'GEOMETRIC_CENTER', 'viewport': {'northeast': {'lat': 51.44853648029149, 'lng': 5.485911480291501}, 'southwest': {'lat': 51.4458385197085, 'lng': 5.483213519708498}}}, 'place_id': 'GhIJPyzlxj65SUAR-kbPiC3wFUA', 'plus_code': {'compound_code': 'C

In [25]:
print(GOOGLE_PLACES_API_KEY)
place_type_result = gmaps.place(place_id="ChIJgexMlR_Lj4ARiKCKuhNnjn0")
print(place_type_result)

AIzaSyB1ioT23JgnDaVl39hHtMo9Sk-QbPhdnMo
{'html_attributions': [], 'result': {'address_components': [{'long_name': 'spc 2421', 'short_name': 'spc 2421', 'types': ['subpremise']}, {'long_name': '2855', 'short_name': '2855', 'types': ['street_number']}, {'long_name': 'Stevens Creek Boulevard', 'short_name': 'Stevens Creek Blvd', 'types': ['route']}, {'long_name': 'Central San Jose', 'short_name': 'Central San Jose', 'types': ['neighborhood', 'political']}, {'long_name': 'Santa Clara', 'short_name': 'Santa Clara', 'types': ['locality', 'political']}, {'long_name': 'Santa Clara County', 'short_name': 'Santa Clara County', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'California', 'short_name': 'CA', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'United States', 'short_name': 'US', 'types': ['country', 'political']}, {'long_name': '95050', 'short_name': '95050', 'types': ['postal_code']}], 'adr_address': '<span class="street-address">2855 Stev

In [4]:
import asyncio
from google.maps import places_v1

from google.api_core.client_options import ClientOptions # For API key configuration



In [30]:

client_options = ClientOptions(api_key=GOOGLE_PLACES_API_KEY) # Changed API_KEY to api_key
    
client=  places_v1.PlacesAsyncClient(client_options=client_options) 
request_name = f"places/ChIJ62_oCR7Lj4AR_MGWkSPotD4"
field_mask_value = "id,types,primaryType"
request = places_v1.GetPlaceRequest(name=request_name)
response = await client.get_place(
    request=request,
    metadata=[("x-goog-fieldmask", field_mask_value)]
)
print(response)
        
            
all_place_types = list(response.primary_type)
            
print(all_place_types)
                


id: "ChIJ62_oCR7Lj4AR_MGWkSPotD4"
types: "department_store"
types: "shoe_store"
types: "clothing_store"
types: "point_of_interest"
types: "store"
types: "establishment"
primary_type: "department_store"

['d', 'e', 'p', 'a', 'r', 't', 'm', 'e', 'n', 't', '_', 's', 't', 'o', 'r', 'e']


In [22]:
    # Make sure you've set PLACE_ID_TO_QUERY and API_KEY in the cell above
    # For example:
    # PLACE_ID_TO_QUERY = "your_specific_place_id" 
    # API_KEY = "your_api_key"
    
    
    
    # Or run the main function example:
    # await main()


Fetching details for Place ID: ChIJtdLiGWpvxkcRBK5ZLccn2m8...
ChIJtdLiGWpvxkcRBK5ZLccn2m8
Successfully retrieved details for Place ID: ChIJtdLiGWpvxkcRBK5ZLccn2m8
Type: ['street_address']
id: "GhIJs-xJYHMMSkAR9jnjoClwFEA"
types: "plus_code"



In [None]:
import os, requests
LAT, LNG = 52.09878625840195, 5.1112955604325565

def get_place_type_from_coord(lat: float, lng: float) -> str:
    url = "https://places.googleapis.com/v1/places:searchNearby"
    headers = {
        "Content-Type": "application/json",
        "X-Goog-Api-Key": GOOGLE_PLACES_API_KEY,
        # Ask only for what we need – keeps the response tiny and cheap.
        "X-Goog-FieldMask": "places.displayName,places.types,places.primaryType",
    }
    payload = {
        #"includedTypes": ["library", "preschool", "school", "primary_school", "secondary_school", "university"],
        "maxResultCount": 1, #can change for debugging
        "locationRestriction": {
            "circle": {
            "center": {
                "latitude": lat,
                "longitude": lng},
            "radius": 20
            }
        },                                             
        "rankPreference": "DISTANCE" # closest first
    }         
    resp = requests.post(url, headers=headers, json=payload, timeout=5)
    resp.raise_for_status()
    places = resp.json().get("places", [])
    print(places)
    if not places:
        return "other"

    # Google always returns primaryType; fall back to full list if you like
    types = places[0]["types"]
    primaryType = places[0]["primaryType"]

    return primaryType, types

get_place_type_from_coord(LAT,LNG)

[{'types': ['gym', 'fitness_center', 'sports_activity_location', 'health', 'point_of_interest', 'establishment'], 'displayName': {'text': 'SportCity Utrecht Westerdijk', 'languageCode': 'nl'}, 'primaryType': 'gym'}]


('gym',
 ['gym',
  'fitness_center',
  'sports_activity_location',
  'health',
  'point_of_interest',
  'establishment'])