In [2]:
import json
import requests

import pandas as pd

In [3]:
def json_to_df(data):
    elements = data['elements']
    places = {'tipo': [], 'lat': [], 'lon': [], 'name': [], 'address': []}
    
    for i in elements:
        
        tipo = i.get('tags', None).get('amenity', None)
        latitude = i.get('lat', None)
        longitude = i.get('lon', None)
        name = i.get('tags', {}).get('name', "NO NAME")
        street = i.get('tags', {}).get('addr:street', "NO STREET")
        number = i.get('tags', {}).get('addr:housenumber', 9999)

        places['tipo'].append(tipo)
        places['lat'].append(latitude)
        places['lon'].append(longitude)
        places['name'].append(name)
        places['address'].append(street + ' ' + str(number))

            
    return pd.DataFrame(places)

In [4]:

tipo = ["restaurant", "bar", "pub", "cafe", "cinema", "theatre"]
barcelona = (41.33,2.06,41.46,2.27)
madrid = (40.256,-3.995,40.543,-3.306)
bilbao = (43.199,-3.069,43.373,-2.828)
valencia = (39.408,-0.4758,39.548,-0.273)
malaga = (36.658,-4.501,36.754,-4.351)
sevilla = (37.332,-6.094,37.439,-5.893)

In [5]:
def tablas(nodes, location, col):
    """Con esta función se obtienen los locales para los nodes definidos en la localización (marcada por 4 puntos), que traen los argumentos."""

    overpass_url = "http://overpass-api.de/api/interpreter"
    overpass_query = f"""
    [out:json];
    node["amenity"]{location};
    out;
    """
            
    response = requests.get(overpass_url, 
                            params={'data': overpass_query})
    data = response.json() 
    df_total = json_to_df(data)
    
    for ii,k in enumerate(nodes):

        if ii==0:
            df = df_total[df_total["tipo"] == k]
        else:
            df = pd.concat([df, df_total[df_total["tipo"] == k]])

    df.reset_index(inplace=True)
    df.drop(columns="index", inplace=True)
    df["location"] = col

    print("Se ha obtenido un dataframe con shape:", df.shape)

    return df

In [6]:

df_barcelona = tablas(tipo, barcelona, "Barcelona")

Se ha obtenido un dataframe con shape: (5797, 6)


In [7]:
df_barcelona

Unnamed: 0,tipo,lat,lon,name,address,location
0,restaurant,41.381687,2.076645,Ateneu de Sant Just Desvern,NO STREET 9999,Barcelona
1,restaurant,41.383643,2.158303,Amaltea,NO STREET 9999,Barcelona
2,restaurant,41.389518,2.162177,Hanibishi,NO STREET 9999,Barcelona
3,restaurant,41.389997,2.131837,Moncho's House,NO STREET 9999,Barcelona
4,restaurant,41.383672,2.180639,Taller de Tapas,l'Argenteria 51,Barcelona
...,...,...,...,...,...,...
5792,theatre,41.369839,2.137810,Sala Sant Medir,NO STREET 9999,Barcelona
5793,theatre,41.370484,2.123253,Auditori la Torrassa,Carrer de Santiago Apòstol 40,Barcelona
5794,theatre,41.378793,2.134733,NO NAME,NO STREET 9999,Barcelona
5795,theatre,41.388456,2.173889,Sala Ars,NO STREET 9999,Barcelona


In [8]:
df_madrid = tablas(tipo, madrid, "Madrid")


Se ha obtenido un dataframe con shape: (8546, 6)


In [9]:
df_bilbao = tablas(tipo, bilbao, 'Bilbao')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [None]:
df_malaga = tablas(tipo, malaga, 'Málaga')

Se ha obtenido un dataframe con shape: (610, 6)


In [None]:
df_valencia = tablas(tipo, valencia, 'Valencia')

Se ha obtenido un dataframe con shape: (1688, 6)


In [None]:
df_sevilla = tablas(tipo, sevilla, 'Sevilla')

Se ha obtenido un dataframe con shape: (1861, 6)


In [10]:
df_madrid

Unnamed: 0,tipo,lat,lon,name,address,location
0,restaurant,40.428732,-3.702002,Café Comercial,Glorieta de Bilbao 7,Madrid
1,restaurant,40.424584,-3.709622,Rey de Tallarines,Plaza del Conde de Toreno 2,Madrid
2,restaurant,40.425762,-3.712085,La Parrilla de Nino,Plaza de Cristino Martos 2,Madrid
3,restaurant,40.425140,-3.711853,Delhi,Calle del Duque de Osuna 6,Madrid
4,restaurant,40.426033,-3.711541,La Pomarada,Calle del Conde Duque 9999,Madrid
...,...,...,...,...,...,...
8541,theatre,40.398606,-3.665860,Grada mágica,Calle de Emilio Ortuño 20,Madrid
8542,theatre,40.303408,-3.833338,Sala Municipal de Teatro Nuria Espert,NO STREET 9999,Madrid
8543,theatre,40.426603,-3.709312,La Integra,Calle de Amaniel 24,Madrid
8544,theatre,40.373836,-3.659936,Auditorio Angelillo,NO STREET 9999,Madrid


In [25]:
type(df_madrid['lon'][0])

numpy.float64

In [22]:
for i in df_madrid['tipo'].value_counts():
    print(df_madrid['tipo'][i])
    print(i)

bar
4524
restaurant
1809
restaurant
1239
restaurant
841
restaurant
84
restaurant
49


In [23]:
df_madrid.loc[df_madrid['tipo'].value_counts()<50, df_madrid['tipo']] = 'otro'

  return array(a, dtype, copy=False, order=order)


IndexingError: (restaurant    False
bar           False
cafe          False
pub           False
theatre       False
cinema         True
Name: tipo, dtype: bool, 0       restaurant
1       restaurant
2       restaurant
3       restaurant
4       restaurant
           ...    
8541       theatre
8542       theatre
8543       theatre
8544       theatre
8545       theatre
Name: tipo, Length: 8546, dtype: object)

In [15]:
df_madrid['tipo'].value_counts()

restaurant    4524
bar           1809
cafe          1239
pub            841
theatre         84
cinema          49
Name: tipo, dtype: int64

In [22]:
path = './data/madrid.csv'
df_madrid.to_csv(path, index= False)

In [23]:
barcelona_path = './data/barcelona.csv'
df_barcelona.to_csv(barcelona_path, index= False)

In [24]:
valencia_path = './data/valencia.csv'
df_valencia.to_csv(valencia_path, index= False)

In [25]:
sevilla_path = './data/sevilla.csv'
df_sevilla.to_csv(sevilla_path, index= False)

In [26]:
malaga_path = './data/malaga.csv'
df_malaga.to_csv(malaga_path, index= False)

In [1]:
bilbao_path = './data/bilbao.csv'
df_bilbao.to_csv(bilbao_path, index= False)


NameError: name 'df_bilbao' is not defined

In [31]:
df_madrid['tipo'].unique()

array(['restaurant', 'bar', 'pub', 'cafe', 'cinema', 'theatre'],
      dtype=object)