In [1]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

import json
import pandas as pd
import numpy as np # for NaN values
import os

In [2]:
# Set environment variables
PLACES_KEY =  os.getenv('PLACES_API_KEY')
# Set service
service = build('places', 'v1', developerKey=PLACES_KEY)

In [3]:
def places_connection(api_key):
    try:
        # Connect to Google Places API
        service = build('places', 'v1', developerKey=api_key)        
               
        return service
    
    except HttpError as error:
        print(f'An error occurred: {error}')
        return None

In [4]:
service = places_connection(PLACES_KEY)

In [5]:
# Load cities
cities = pd.read_csv('tests\cities.csv')
cities[:3]

Unnamed: 0,city
0,Altamonte-Springs
1,Apopka
2,Bartow


In [6]:
outdoor = ['beach',
'parks',
'state_park',
'marina']

In [7]:
# Loop through each city
def get_places(cities, outdoor):
    stay_list = []  # List to store places to stay

    for city in cities:

        for place in outdoor:

            try:
                # Prepare the initial search query
                query = f'{place} in {city}, Florida, USA'
                request_body = {
                    'textQuery': query,
                    'regionCode': 'US',
                    'languageCode':'en',
                    'pageSize': 20
                }
                
                # Make the request to the   
                response = service.places().searchText(body=request_body, fields='*').execute()
                
                # Process the current page of results
                if 'places' in response:
                    for place in response['places']:
                        store_data = {
                                    'City': city,
                                    'Outdoor': place.get('displayName', {}).get('text', 'N/A'),
                                    'Address': place.get('formattedAddress', 'N/A'),
                                    'Latitude': place.get('location', {}).get('latitude', np.nan),
                                    'Longitude': place.get('location', {}).get('longitude', np.nan),
                                    'Rating': place.get('rating', np.nan),
                                    'User Rating Count': place.get('userRatingCount', 0),
                                    'Comments': [
                                        review.get('text', 'N/A') for review in place.get('reviews', [])[:3]
                                    ]  # Extraer hasta los 3 mejores comentarios
                                }

                        stay_list.append(store_data)                    
                        
            except Exception as e:
                    print(f"Error processing {city}: {e}")                
                    continue

            # Convert to DataFrames
        df = pd.DataFrame(stay_list)

    return df

In [8]:
df = get_places(cities['city'], outdoor)

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1759 entries, 0 to 1758
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   City               1759 non-null   object 
 1   Outdoor            1759 non-null   object 
 2   Address            1759 non-null   object 
 3   Latitude           1759 non-null   float64
 4   Longitude          1759 non-null   float64
 5   Rating             1709 non-null   float64
 6   User Rating Count  1759 non-null   int64  
 7   Comments           1759 non-null   object 
dtypes: float64(3), int64(1), object(4)
memory usage: 110.1+ KB


In [10]:
# Filtering the data    
df_f = df[(df['Rating'] > 3.0) & (df['User Rating Count'] > 50)]

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1759 entries, 0 to 1758
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   City               1759 non-null   object 
 1   Outdoor            1759 non-null   object 
 2   Address            1759 non-null   object 
 3   Latitude           1759 non-null   float64
 4   Longitude          1759 non-null   float64
 5   Rating             1709 non-null   float64
 6   User Rating Count  1759 non-null   int64  
 7   Comments           1759 non-null   object 
dtypes: float64(3), int64(1), object(4)
memory usage: 110.1+ KB


In [12]:
df_f = df_f.drop_duplicates(subset='Latitude')

In [13]:
df_f.info()

<class 'pandas.core.frame.DataFrame'>
Index: 650 entries, 0 to 1740
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   City               650 non-null    object 
 1   Outdoor            650 non-null    object 
 2   Address            650 non-null    object 
 3   Latitude           650 non-null    float64
 4   Longitude          650 non-null    float64
 5   Rating             650 non-null    float64
 6   User Rating Count  650 non-null    int64  
 7   Comments           650 non-null    object 
dtypes: float64(3), int64(1), object(4)
memory usage: 45.7+ KB


## Node outdoor

In [14]:
df_f.to_csv(r'neo4j\nodes\node_outdoor.csv', index=False, encoding='utf-8')

In [15]:
r_ff = df_f[['Outdoor', 'City']].copy()

In [16]:
r_ff['relation'] = 'Outdoors_to_go'

In [17]:
r_ff.to_csv(r'neo4j\relationships\relation_outdoors.csv', index=False, encoding='utf-8')