In [1]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

import json
import pandas as pd
import numpy as np # for NaN values
import os

In [2]:
# Set environment variables
PLACES_KEY =  os.getenv('PLACES_API_KEY')
# Set service
service = build('places', 'v1', developerKey=PLACES_KEY)

In [3]:
def places_connection(api_key):
    try:
        # Connect to Google Places API
        service = build('places', 'v1', developerKey=api_key)        
               
        return service
    
    except HttpError as error:
        print(f'An error occurred: {error}')
        return None

In [4]:
service = places_connection(PLACES_KEY)

In [5]:
# Load cities
cities = pd.read_csv('tests\cities.csv')
cities[:3]

Unnamed: 0,city
0,Altamonte-Springs
1,Apopka
2,Bartow


In [6]:
family_fun = ['museum',
'cultural_landmark',
'historical_place',
'water_park',
'aquarium',
'shopping_mall']

In [15]:
# Loop through each city
def get_places(cities, family_fun):
    stay_list = []  # List to store places to stay

    for city in cities:

        for place in family_fun:

            try:
                # Prepare the initial search query
                query = f'{place} in {city}, Florida, USA'
                request_body = {
                    'textQuery': query,
                    'regionCode': 'US',
                    'languageCode':'en',
                    'pageSize': 20
                }
                
                # Make the request to the   
                response = service.places().searchText(body=request_body, fields='*').execute()
                
                # Process the current page of results
                if 'places' in response:
                    for place in response['places']:
                        store_data = {
                                    'City': city,
                                    'Family fun': place.get('displayName', {}).get('text', 'N/A'),
                                    'Address': place.get('formattedAddress', 'N/A'),
                                    'Latitude': place.get('location', {}).get('latitude', np.nan),
                                    'Longitude': place.get('location', {}).get('longitude', np.nan),
                                    'Rating': place.get('rating', np.nan),
                                    'User Rating Count': place.get('userRatingCount', 0),
                                    'Comments': [
                                        review.get('text', 'N/A') for review in place.get('reviews', [])[:3]
                                    ]  # Extraer hasta los 3 mejores comentarios
                                }

                        stay_list.append(store_data)                    
                        
            except Exception as e:
                    print(f"Error processing {city}: {e}")                
                    continue

            # Convert to DataFrames
        df = pd.DataFrame(stay_list)

    return df

In [16]:
df = get_places(cities['city'], family_fun)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2149 entries, 0 to 2148
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   City               2149 non-null   object 
 1   Family fun         2149 non-null   object 
 2   Address            2149 non-null   object 
 3   Latitude           2149 non-null   float64
 4   Longitude          2149 non-null   float64
 5   Rating             2063 non-null   float64
 6   User Rating Count  2149 non-null   int64  
 7   Comments           2149 non-null   object 
dtypes: float64(3), int64(1), object(4)
memory usage: 134.4+ KB


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2149 entries, 0 to 2148
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   City               2149 non-null   object 
 1   Family fun         2149 non-null   object 
 2   Address            2149 non-null   object 
 3   Latitude           2149 non-null   float64
 4   Longitude          2149 non-null   float64
 5   Rating             2063 non-null   float64
 6   User Rating Count  2149 non-null   int64  
 7   Comments           2149 non-null   object 
dtypes: float64(3), int64(1), object(4)
memory usage: 134.4+ KB


In [23]:
# Filtering the data    
df_f = df[(df['Rating'] > 3.5) & (df['User Rating Count'] > 50)]

In [24]:
df_f.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1584 entries, 0 to 2148
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   City               1584 non-null   object 
 1   Family fun         1584 non-null   object 
 2   Address            1584 non-null   object 
 3   Latitude           1584 non-null   float64
 4   Longitude          1584 non-null   float64
 5   Rating             1584 non-null   float64
 6   User Rating Count  1584 non-null   int64  
 7   Comments           1584 non-null   object 
dtypes: float64(3), int64(1), object(4)
memory usage: 111.4+ KB


In [25]:
df_f = df_f.drop_duplicates(subset='Latitude')

In [26]:
df_f.info()

<class 'pandas.core.frame.DataFrame'>
Index: 620 entries, 0 to 2145
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   City               620 non-null    object 
 1   Family fun         620 non-null    object 
 2   Address            620 non-null    object 
 3   Latitude           620 non-null    float64
 4   Longitude          620 non-null    float64
 5   Rating             620 non-null    float64
 6   User Rating Count  620 non-null    int64  
 7   Comments           620 non-null    object 
dtypes: float64(3), int64(1), object(4)
memory usage: 43.6+ KB


## NODE Family Fun

In [27]:
df_f.to_csv(r'neo4j\nodes\node_family_fun.csv', index=False, encoding='utf-8')

In [28]:
r_ff = df_f[['Family fun', 'City']].copy()

In [29]:
r_ff['relation'] = 'PLACES_TO_GET_FUN'

In [30]:
r_ff.to_csv(r'neo4j\relationships\relation_family_fun.csv', index=False, encoding='utf-8')