In [17]:
import os
import requests
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

In [2]:
import yaml

with open('.env.yaml', 'r') as file:
    env_vars = yaml.safe_load(file)

In [3]:
# CONSTANTES
# CLIENT_ID = env_vars["Maps_CLIENT_ID"]
API_KEY = env_vars["YELP_API_KEY"]
GCP_PROJECT = env_vars["GCP_PROJECT"]
PARAMS="location=astoria&limit=50"
TABLE_ID = "metadata-sites"
DATASET_ID = "astoria"

In [35]:
# Realizar la solicitud HTTP
def request_api(url, params):    
    url = f"{url}?{params}&key={API_KEY}"
    response = requests.get(url)
    return response.json()

In [39]:
# Obtener localizaci√≥n de un estado
def get_state_metadata(state):
    url="https://maps.googleapis.com/maps/api/geocode/json"
    params=f"address={state}"
    response_json = request_api(url, params)
    result = response_json["results"][0]
    return result

geocoding = get_state_metadata("Florida")
location = f"{geocoding['geometry']['location']['lat']},{geocoding['geometry']['location']['lng']}"
location

'27.6648274,-81.5157535'

In [47]:
# Obtener lugares cercanos
def get_nearby_places(location, radius=50000, place_type='business'):
    url="https://maps.googleapis.com/maps/api/place/nearbysearch/json"    
    # fields = 'place_id,rating,user_ratings_total'
    params=f"location={location}&radius={radius}&type={place_type}"#&fields={fields}"
    places_response = request_api(url, params)
    result = places_response['results']
    return result

places = get_nearby_places(location)
places

[{'geometry': {'location': {'lat': 27.5958672, 'lng': -81.5061862},
   'viewport': {'northeast': {'lat': 27.63315792242184,
     'lng': -81.47286495262327},
    'southwest': {'lat': 27.57236998615755, 'lng': -81.53922891092547}}},
  'icon': 'https://maps.gstatic.com/mapfiles/place_api/icons/v1/png_71/geocode-71.png',
  'icon_background_color': '#7B9EB0',
  'icon_mask_base_uri': 'https://maps.gstatic.com/mapfiles/place_api/icons/v2/generic_pinlet',
  'name': 'Avon Park',
  'photos': [{'height': 1600,
    'html_attributions': ['<a href="https://maps.google.com/maps/contrib/103339891864974358915">ruth Smith</a>'],
    'photo_reference': 'AcJnMuFj_BBIu5dORaq50iOk2sR-ry0bJk4nbfCEgV_txI4nhh2t6owdML4KAXAzNO8r7psQmegLlrxPpddu9td-weAPMenGj_93dHTob_qwmFBCPetllxMlzS4tFg4d_u4Kst81ERUjBDOhelQJnqb6Nqso8HqmjHppeO5CplpuYCSkWQL3',
    'width': 747}],
  'place_id': 'ChIJV-gnG45V3IgRSDUQcb3C0hw',
  'reference': 'ChIJV-gnG45V3IgRSDUQcb3C0hw',
  'scope': 'GOOGLE',
  'types': ['locality', 'political'],
  'v

In [43]:
# Obtener detalles de un lugar
def get_place_details(place_id):    
    fields = 'name,formatted_address,place_id,geometry,types'
    url = "https://maps.googleapis.com/maps/api/place/details/json"
    params= f"place_id={place_id}&fields={fields}"
    response_json = request_api(url, params)
    print(response_json)
    result = response_json['result']
    return result


places_details = []
for place in places:
    place_id = place['place_id']
    place_details = get_place_details(place_id)
    places_details.append(place_details)
    break # traer solo el primero para probar
places_details

{'html_attributions': [], 'result': {'address_components': [{'long_name': 'Avon Park', 'short_name': 'Avon Park', 'types': ['locality', 'political']}, {'long_name': 'Highlands County', 'short_name': 'Highlands County', 'types': ['administrative_area_level_2', 'political']}, {'long_name': 'Florida', 'short_name': 'FL', 'types': ['administrative_area_level_1', 'political']}, {'long_name': 'United States', 'short_name': 'US', 'types': ['country', 'political']}, {'long_name': '33825', 'short_name': '33825', 'types': ['postal_code']}], 'adr_address': '<span class="locality">Avon Park</span>, <span class="region">FL</span> <span class="postal-code">33825</span>, <span class="country-name">USA</span>', 'formatted_address': 'Avon Park, FL 33825, USA', 'geometry': {'location': {'lat': 27.5958672, 'lng': -81.5061862}, 'viewport': {'northeast': {'lat': 27.63315792242184, 'lng': -81.47286495262327}, 'southwest': {'lat': 27.57236998615755, 'lng': -81.53922891092547}}}, 'icon': 'https://maps.gstatic

[{'address_components': [{'long_name': 'Avon Park',
    'short_name': 'Avon Park',
    'types': ['locality', 'political']},
   {'long_name': 'Highlands County',
    'short_name': 'Highlands County',
    'types': ['administrative_area_level_2', 'political']},
   {'long_name': 'Florida',
    'short_name': 'FL',
    'types': ['administrative_area_level_1', 'political']},
   {'long_name': 'United States',
    'short_name': 'US',
    'types': ['country', 'political']},
   {'long_name': '33825', 'short_name': '33825', 'types': ['postal_code']}],
  'adr_address': '<span class="locality">Avon Park</span>, <span class="region">FL</span> <span class="postal-code">33825</span>, <span class="country-name">USA</span>',
  'formatted_address': 'Avon Park, FL 33825, USA',
  'geometry': {'location': {'lat': 27.5958672, 'lng': -81.5061862},
   'viewport': {'northeast': {'lat': 27.63315792242184,
     'lng': -81.47286495262327},
    'southwest': {'lat': 27.57236998615755, 'lng': -81.53922891092547}}},
  

In [76]:
# Normalizar los datos
def normalize(places):
    parsed_places = []
    for place in places:
        place['address'] = place['vicinity']
        place['gmap_id'] = place['place_id']
        place['description'] = ''
        place['latitude'] = place['geometry']['location']['lat']
        place['longitude'] = place['geometry']['location']['lng']
        place['category'] = place['types']
        place['avg_rating'] = place.get('rating', 0)
        place['num_of_reviews'] = place.get('user_ratings_total', 0)
        place['price'] = place.get('price_level', 0)
        place['hours'] = ''
        place['state'] = 'Florida'
        place['url'] = 'https://www.google.com/maps/place//data='+place['place_id']        
        parsed_places.append(place)

    fields = ['name', 'address', 'gmap_id', 'description', 'latitude', 'longitude', 'category', 'avg_rating', 'num_of_reviews', 'price', 'hours', 'state', 'url']
    df = pd.DataFrame(parsed_places, columns=fields)
    df["category"] = df["category"].apply(lambda x: " ".join(x))
    return df

df = normalize(places)
df

Unnamed: 0,name,address,gmap_id,description,latitude,longitude,category,avg_rating,num_of_reviews,price,hours,state,url
0,Avon Park,Avon Park,ChIJV-gnG45V3IgRSDUQcb3C0hw,,27.595867,-81.506186,locality political,0.0,0,0,,Florida,https://www.google.com/maps/place//data=ChIJV-...
1,"Kevin Sherin, MD","1006 West Pleasant Street, Avon Park",ChIJIS7C6UF854gRs6HvPDkbv-o,,27.599535,-81.512792,doctor point_of_interest health establishment,5.0,4,0,,Florida,https://www.google.com/maps/place//data=ChIJIS...
2,Avon Park High School,"700 East Main Street, Avon Park",ChIJcYhIUcdV3IgRKQhEFT6RBZA,,27.594513,-81.487773,secondary_school point_of_interest school esta...,3.6,11,0,,Florida,https://www.google.com/maps/place//data=ChIJcY...
3,JNK Auto Supply Inc,"195 US Highway 27 North, Avon Park",ChIJv5WklJJV3IgRQPjNGzVT53o,,27.596914,-81.514947,car_repair point_of_interest store establishment,4.6,78,0,,Florida,https://www.google.com/maps/place//data=ChIJv5...
4,Peace River Center,"950 County Road 17A West, Avon Park",ChIJB1WyCk5X3IgR0KLuOe5C6y0,,27.618313,-81.511769,point_of_interest health establishment,3.4,5,0,,Florida,https://www.google.com/maps/place//data=ChIJB1...
5,South Florida State College,"600 West College Drive, Avon Park",ChIJK7dXAwdW3IgRrbkiT8drUtE,,27.570013,-81.511692,university point_of_interest school establishment,4.5,110,0,,Florida,https://www.google.com/maps/place//data=ChIJK7...
6,Subway,"7030 US Highway 27, Frostproof",ChIJBQ0UKRcB3YgRajNYh6FWy5Q,,27.733196,-81.573226,meal_takeaway restaurant food point_of_interes...,3.7,90,1,,Florida,https://www.google.com/maps/place//data=ChIJBQ...
7,Royal Care of Avon Park,"1213 West Stratford Road, Avon Park",ChIJOULHoof_3IgRzpps_TBXkMI,,27.628156,-81.517087,point_of_interest health establishment,3.2,21,0,,Florida,https://www.google.com/maps/place//data=ChIJOU...
8,Jacaranda Hotel,"19 East Main Street, Avon Park",ChIJ5carHpNV3IgRg2sUDm8rhHY,,27.59598,-81.501731,lodging point_of_interest establishment,4.5,874,0,,Florida,https://www.google.com/maps/place//data=ChIJ5c...
9,Dollar General,"14 Highway 630 East, Frostproof",ChIJdXmSTWsB3YgRgVNmijTNtb4,,27.755298,-81.530059,drugstore convenience_store food point_of_inte...,4.1,295,1,,Florida,https://www.google.com/maps/place//data=ChIJdX...


In [108]:
# Convertir el DataFrame de pandas a un objeto pyarrow.Table
df = pa.Table.from_pandas(df)
# Guardar el objeto pyarrow.Table en formato Parquet
pq.write_table(df, TABLE_ID+".parquet")

In [109]:
# Leer el DataFrame desde el archivo Parquet
df = pd.read_parquet(TABLE_ID+".parquet")
df.head()

Unnamed: 0,business_id,date,name,address,city,state,postal_code,latitude,longitude,stars,review_count,is_open,categories
0,7kk7PZvEeeb9fg0XgpIf1g,"2013-01-05 14:52:30, 2013-01-19 14:21:37, 2013...",Patti and Allan Herbert Wellness Center,"1241 Dickinson Dr Coral Gables, FL 33146",Coral Gables,FL,33146,25.715676,-80.280477,4.5,31,1,Recreation Centers
1,jl3sNS0Yg2-LN32O8H6CSg,"2013-01-05 14:52:30, 2013-01-19 14:21:37, 2013...",Hit Zone Inc,"7900 SW 40th St Miami, FL 33155",Miami,FL,33155,25.733021,-80.323534,4.0,17,1,Recreation Centers
2,t3aYNpAvVxKT-CfU36kZyw,"2013-01-05 14:52:30, 2013-01-19 14:21:37, 2013...",Florida International University Recreation Ce...,"11290 SW 12th St WRC 101 Miami, FL 33199",Miami,FL,33199,25.756015,-80.37809,3.5,14,1,"Trainers, Recreation Centers"
3,obhS1T1ZlakTmR1Ke8Luag,"2013-01-05 14:52:30, 2013-01-19 14:21:37, 2013...",Coral Gables War Memorial Youth Center,"405 University Dr Coral Gables, FL 33134",Coral Gables,FL,33134,25.741908,-80.263279,3.5,13,1,"Parks, Playgrounds, Recreation Centers"
4,cCLAEzDoPBzsHePs_23Kbw,"2013-01-05 14:52:30, 2013-01-19 14:21:37, 2013...",Kids Empire Miami Dolphin Mall,"11401 NW 12th St Ste E610 Miami, FL 33172",Miami,FL,33172,25.787996,-80.38024,3.0,23,1,"Recreation Centers, Indoor Playcentre, Venues ..."


In [72]:
# Guardar el dataframe en BigQuery
def df_to_bq(df, destination_table, project_id):
    """
    Guarda un dataframe de pandas en BigQuery
    """
    # convertir a string
    df = df.astype(str)
    # guardar en BigQuery
    df.to_gbq(destination_table=destination_table, project_id=project_id, if_exists="replace")

In [73]:
# tabla de destino
destination_table = f"{DATASET_ID}.Meta_Data_TBL"
df_to_bq(df, destination_table, GCP_PROJECT)