In [43]:
# import libraries

#general 
import pandas as pd

#calculate geo distance
import geopandas as gpd
from shapely.geometry import Point

#import data from URL
import requests

#allow users to select multiple functionalities
import argparse

#allowing users to input interest points with flexibility 
from fuzzywuzzy import fuzz
from fuzzywuzzy import process

#for nice displaying
from tabulate import tabulate

In [44]:
# Script functions - calculate distance
# transform latitude/longitude data in degrees to pseudo-mercator coordinates in metres
def to_mercator(lat, long):  
    c = gpd.GeoSeries([Point(lat, long)], crs=4326)
    c = c.to_crs(3857)
    return c

# return the distance in metres between to latitude/longitude pair point in degrees (i.e.: 40.392436 / -3.6994487)
def distance_meters(start, finish):
    return start.distance(finish)

In [45]:
# Script functions - get Bicimad Stations info from API
def get_bicimad_info():
    
    # getting token
    login_url = 'https://openapi.emtmadrid.es/v1/mobilitylabs/user/login/'
    login_headers = {'email': 'malva.gonzalez@ironhack.com','password': 'Bicimad2022'}
    accessToken=requests.get(login_url,headers=login_headers).json()['data'][0]['accessToken']

    #Getting stations information
    stations_url = 'https://openapi.emtmadrid.es/v1/transport/bicimad/stations/'
    stations_headers = {'accessToken':accessToken }
    bicimad_stations_json=requests.get(stations_url,headers=stations_headers).json()['data']

    bicimad_stations=pd.json_normalize(bicimad_stations_json)

    #Taking only the intersting columns needed for the project and filtering the data  
    bicimad_stations=bicimad_stations[["address","geometry.coordinates","name","dock_bikes","free_bases"]]
    bicimad_stations = bicimad_stations.rename(columns={'geometry.coordinates': 'geometry_coordinates'})

    #Filtering out rows without geometry_coordinates:
    bicimad_stations=bicimad_stations[bicimad_stations.geometry_coordinates.notnull()]

    #Calculating coordinates
    def left_split(x):
        return x[0]
    def right_split(x):
        return x[-1]
    long_finish=bicimad_stations.apply(lambda x: left_split(x['geometry_coordinates']), axis=1)
    lat_finish=bicimad_stations.apply(lambda x: right_split(x['geometry_coordinates']), axis=1)
    bicimad_stations["long_finish"]=long_finish
    bicimad_stations["lat_finish"]=lat_finish

    #Renaming field names
    bicimad_stations = bicimad_stations.rename(columns={'address': 'Station location', 'name': 'BiciMAD station'})

    #Deleting not needed columns
    bicimad_stations = bicimad_stations.drop(columns='geometry_coordinates')

    #Calculating coordenades
    bicimad_stations[["long_finish", "lat_finish"]] = bicimad_stations[["long_finish", "lat_finish"]].apply(pd.to_numeric)

    coordinates_bicimad=bicimad_stations.apply(lambda x: to_mercator(x['lat_finish'],x['long_finish']), axis=1)
    bicimad_stations["coordinates_bicimad"]=coordinates_bicimad

    #Deleting not needed columns
    bicimad_stations.drop(['long_finish','lat_finish'], axis=1, inplace=True)

    #Storing dataframe
    bicimad_stations.to_csv('../data/bicimad_stations_clean.csv')
    return bicimad_stations

In [46]:
%%time
get_bicimad_info()

CPU times: user 7.42 s, sys: 548 ms, total: 7.97 s
Wall time: 8.5 s


Unnamed: 0,Station location,BiciMAD station,dock_bikes,free_bases,coordinates_bicimad
0,Puerta del Sol nº 1,Puerta del Sol A,22,3,POINT (4499223.648366921 -412373.2851572837)
1,Puerta del Sol nº 1,Puerta del Sol B,20,9,POINT (4499234.670122574 -412347.498528782)
2,Calle Miguel Moya nº 1,Miguel Moya,17,4,POINT (4499599.340516399 -412820.3206106317)
3,Plaza del Conde del Valle de Súchil nº 3,Plaza Conde Suchil,10,5,POINT (4500679.707306497 -412940.3068148262)
4,Calle Manuela Malasaña nº 5,Malasaña,16,5,POINT (4500485.866677178 -412457.3286497012)
...,...,...,...,...,...
259,Avenida Juan de Herrera frente a la calle Paul...,INEF,3,19,POINT (4501644.435409558 -415511.9577777153)
260,Avenida de la Complutense (Metro Ciudad Univer...,Ciudad Universitaria 1,0,24,POINT (4502177.655770458 -415179.5220676512)
261,Avenida de la Complutense (Metro Ciudad Univer...,Ciudad Universitaria 2,0,23,POINT (4502140.920338497 -415172.8287427756)
262,Calle José Antonio Novais frente al nº 12,Facultad Biología,7,17,POINT (4502687.743941171 -415213.4906984274)


In [47]:
%%time
bicimad_stations=get_bicimad_info()

CPU times: user 7.42 s, sys: 547 ms, total: 7.97 s
Wall time: 8.39 s


In [48]:
# Script functions - get Interesting Points info from API
def get_interest_points_info():
    #Extracting raw data - Interesting Points
    #Defining public url for accesing to the datasets
    url_interest_points = 'https://datos.madrid.es/egob/catalogo/300356-0-monumentos-ciudad-madrid.json'
    #Taking url information with requests
    response_interest_points = requests.get(url_interest_points)
    response_interest_points=response_interest_points.json()['@graph']

    #Converting json to pandas dataframe and taking only needed fields
    interest_points = pd.json_normalize(response_interest_points)
    interest_points=interest_points[["title","address.street-address","location.latitude","location.longitude","organization.organization-desc"]]
    
    #Renaming and adding columns needed
    interest_points = interest_points.rename(columns={'title': 'Place of interest', 'address.street-address': 'Place address','location.latitude': 'lat_start','location.longitude': 'long_start','organization.organization-desc': 'Place Description'})
    interest_points["Type of place"] = "Monumentos de la ciudad de Madrid" 

    #Cleaning the dataframe (excluding rows without latitude or longitude information)
    interest_points=interest_points[interest_points.lat_start.notnull()]
    interest_points=interest_points[interest_points.long_start.notnull()]
    
    #Saving CSV
    interest_points.to_csv('../data/interest_points.csv')
    return interest_points

In [49]:
%%time
interest_points=get_interest_points_info()

CPU times: user 229 ms, sys: 38.4 ms, total: 268 ms
Wall time: 1.86 s


In [50]:
interest_points

Unnamed: 0,Place of interest,Place address,lat_start,long_start,Place Description,Type of place
0,A las víctimas del Holocausto,JARDIN TRES CULTURAS,40.464322,-3.603558,"La obra, creada por el escultor judío de orige...",Monumentos de la ciudad de Madrid
1,A los Abuelos,C Alicún,40.479812,-3.661719,"Pequeña escultura, situada en un parque delimi...",Monumentos de la ciudad de Madrid
2,A Neruda-El Ser alado,Gta José Luis Ozores 116,40.381855,-3.652419,Monumento dedicado al escritor Pablo Neruda (1...,Monumentos de la ciudad de Madrid
3,Abogados Laboristas,C Atocha 55,40.412911,-3.700017,El 24 de enero de 1977 se produjo un atentado ...,Monumentos de la ciudad de Madrid
4,Abstracta I,,40.421746,-3.662293,"La histórica quinta de la Fuente del Berro, de...",Monumentos de la ciudad de Madrid
...,...,...,...,...,...,...
1840,Wamba,Pza Oriente,40.417760,-3.712903,Forma parte esta escultura de la serie concebi...,Monumentos de la ciudad de Madrid
1841,Wifredo. El Velloso,Pza Oriente,40.418785,-3.711830,Forma parte esta escultura de la serie concebi...,Monumentos de la ciudad de Madrid
1842,X aniversario Colegio Mayor Argentino Nuestra ...,C Martín Fierro,40.436046,-3.732043,"La lápida, promovida por el Ministerio de Cult...",Monumentos de la ciudad de Madrid
1843,XXV aniversario Colegio Mayor Argentino,C Martín Fierro,40.436127,-3.731997,"La lápida, promovida por la Embajada de la Rep...",Monumentos de la ciudad de Madrid


In [69]:
#for calculating specific interesting point
def get_specific_interest_point(dataframe,input_var):
    def compare_strings(input_var,x):
        return fuzz.ratio(input_var,x)
    similarity=dataframe.apply(lambda x: compare_strings(x['Place of interest'],input_var), axis=1)
    dataframe["similarity"]=similarity
    specific_interest_point=dataframe[dataframe["similarity"]>80]
    if specific_interest_point.empty:
        specific_interest_point=dataframe[dataframe['similarity'] == dataframe["similarity"].max()]                               
    return specific_interest_point

In [70]:
specific_interest_point=get_specific_interest_point(interest_points,'alcala')

In [71]:
specific_interest_point

Unnamed: 0,Place of interest,Place address,lat_start,long_start,Place Description,Type of place,similarity
998,Gallia,C Guzmán el Bueno 133,40.443308,-3.713126,Entre los años 1964 y 1968 se levantó en una m...,Monumentos de la ciudad de Madrid,67


In [54]:
#def calculating distance
def get_interest_points_info_coordinades(dataframe):
    #Calculating coordinades
    dataframe[["long_start", "lat_start"]].apply(pd.to_numeric) 
    #dataframe[["long_start", "lat_start"]] = numeric
    coordinates_interest_points=dataframe.apply(pd.to_numeric).apply(lambda x: to_mercator(x['lat_start'],x['long_start']), axis=1)
    dataframe["coordinates_interest_points"]=coordinates_interest_points
    #Deleting not needed columns
    dataframe.drop(['long_start','lat_start'], axis=1, inplace=True)

    #Storing dataframe
    dataframe.to_csv('../data/interest_points_clean.csv')
    interest_points_coordinades=dataframe
    return interest_points_coordinades

In [55]:
specific_interest_point_clean=get_interest_points_info_coordinades(specific_interest_point)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


In [18]:
#def calculating distance
def get_interest_points_info_coordinades(dataframe):
    #Calculating coordinades
    dataframe[["long_start", "lat_start"]] = dataframe[["long_start", "lat_start"]].apply(pd.to_numeric)
    coordinates_interest_points=dataframe.apply(lambda x: to_mercator(x['lat_start'],x['long_start']), axis=1)
    dataframe["coordinates_interest_points"]=coordinates_interest_points

    #Deleting not needed columns
    dataframe.drop(['long_start','lat_start'], axis=1, inplace=True)

    #Storing dataframe
    dataframe.to_csv('../data/interest_points_clean.csv')
    interest_points_coordinades=dataframe
    return interest_points_coordinades

In [19]:
%time
interest_point_clean=get_interest_points_info_coordinades(interest_points)

CPU times: user 2 µs, sys: 1e+03 ns, total: 3 µs
Wall time: 5.01 µs


In [None]:
interest_point_clean

In [None]:
#calculating nearest station
def get_near_station(bicimad_stations,dataframe):
    #Joining data
    interest_points_bicimad = bicimad_stations.assign(key=0).merge(dataframe.assign(key=0), how='left', on = 'key')
    interest_points_bicimad.drop('key', axis=1, inplace=True)
    interest_points_bicimad.to_csv('../data/interest_points_bicimad.csv')

    #Calculating coordinades
    distance=interest_points_bicimad.apply(lambda x: distance_meters(x['coordinates_interest_points'],x['coordinates_bicimad']), axis=1)
    interest_points_bicimad["distance"]=distance

    #Deleting not needed columns
    interest_points_bicimad.drop(['coordinates_bicimad','coordinates_interest_points'], axis=1, inplace=True)

    #Sorting values
    interest_points_bicimad=interest_points_bicimad.sort_values(["Place of interest","Station location"])

    #Filtering by the minimun distanct
    nearest_BiciMAD_station=interest_points_bicimad[interest_points_bicimad['distance'] ==
                        interest_points_bicimad.groupby(['Place of interest','Place address'])['distance'].transform('min')]
    #Sorting the data
    nearest_BiciMAD_station=nearest_BiciMAD_station.sort_values(["Place of interest","Station location"])
    nearest_BiciMAD_station=nearest_BiciMAD_station.reset_index()
    column_names = ["Place of interest", "Type of place", "Place address","BiciMAD station","Station location","distance","dock_bikes","free_bases","Place Description"]
    nearest_BiciMAD_station = nearest_BiciMAD_station.reindex(columns=column_names)
    nearest_BiciMAD_station = nearest_BiciMAD_station.rename(columns={'dock_bikes': 'bikes availability','distance': 'distance(m)','free_bases': 'bases availability'})
    nearest_BiciMAD_station['distance(m)']=nearest_BiciMAD_station['distance(m)'].round(2)

    #Saving CSV
    nearest_BiciMAD_station.to_csv('../data/output/nearest_BiciMAD_station.csv')
    return nearest_BiciMAD_station


In [None]:
%time
get_near_station(bicimad_stations,interest_point_clean)

In [None]:
get_near_station(bicimad_stations,specific_interest_point_clean)