In [25]:
import requests
import pandas as pd
import numpy as np

In [26]:
url = "https://api.tfl.gov.uk/StopPoint/Mode/tube"
resp = requests.get(url)
resp.raise_for_status()

In [27]:
data = resp.json()
stations = pd.json_normalize(data, record_path="stopPoints")

In [28]:
stations = stations[["commonName", "lat", "lon"]]

In [29]:
stations.head()

Unnamed: 0,commonName,lat,lon
0,Amersham Underground Station,51.674206,-0.607362
1,Chalfont & Latimer Underground Station,51.667915,-0.560616
2,Chalfont & Latimer Underground Station,51.668122,-0.560624
3,Chesham Underground Station,51.705227,-0.611113
4,Croxley Underground Station,51.647069,-0.441746


In [30]:
listings = pd.read_csv("data/ml_ready_listings.csv", index_col=0)

In [31]:
listings.head()

Unnamed: 0,latitude,longitude,minimum_nights,availability_365,has_review,bedroom_count,log_price,log_reviews_per_month,log_reviews_ltm,log_days_since_review,...,boro_Kensington and Chelsea,boro_Lambeth,boro_Other,boro_Southwark,boro_Tower Hamlets,boro_Wandsworth,boro_Westminster,room_Hotel room,room_Private room,room_Shared room
0,51.44306,-0.01948,3,293,1,4,5.697093,0.41211,2.564949,4.204693,...,0,0,1,0,0,0,0,0,0,0
1,51.44284,-0.01997,3,318,1,1,4.59512,0.198851,1.609438,5.459586,...,0,0,1,0,0,0,0,0,0,0
2,51.44359,-0.02275,3,302,1,2,5.003946,0.357674,1.94591,4.543295,...,0,0,1,0,0,0,0,0,0,0
3,51.44355,-0.02309,3,328,1,2,4.976734,0.262364,2.079442,4.744932,...,0,0,1,0,0,0,0,0,0,0
4,51.44333,-0.02307,3,255,1,2,5.062595,0.300105,1.609438,5.379897,...,0,0,1,0,0,0,0,0,0,0


In [37]:
stations_rad = np.radians(stations[["lat", "lon"]])
stations_rad.head()

Unnamed: 0,lat,lon
0,0.901885,-0.0106
1,0.901775,-0.009785
2,0.901779,-0.009785
3,0.902426,-0.010666
4,0.901411,-0.00771


In [33]:
from sklearn.neighbors import BallTree

tree = BallTree(stations_rad, metric="haversine")


In [39]:
listings_rad = np.radians(listings[["latitude", "longitude"]])
dists, idxs = tree.query(listings_rad, k=1)

In [41]:
listings["dist_to_tube_km"] = (dists.flatten() * 6371)
listings["nearest_station"] = stations.loc[idxs.flatten(), "commonName"].values

In [45]:
listings.head()

Unnamed: 0,latitude,longitude,minimum_nights,availability_365,has_review,bedroom_count,log_price,log_reviews_per_month,log_reviews_ltm,log_days_since_review,...,boro_Other,boro_Southwark,boro_Tower Hamlets,boro_Wandsworth,boro_Westminster,room_Hotel room,room_Private room,room_Shared room,dist_to_tube_km,nearest_station
0,51.44306,-0.01948,3,293,1,4,5.697093,0.41211,2.564949,4.204693,...,1,0,0,0,0,0,0,0,6.443845,Canada Water Underground Station
1,51.44284,-0.01997,3,318,1,1,4.59512,0.198851,1.609438,5.459586,...,1,0,0,0,0,0,0,0,6.456216,Canada Water Underground Station
2,51.44359,-0.02275,3,302,1,2,5.003946,0.357674,1.94591,4.543295,...,1,0,0,0,0,0,0,0,6.31821,Canada Water Underground Station
3,51.44355,-0.02309,3,328,1,2,4.976734,0.262364,2.079442,4.744932,...,1,0,0,0,0,0,0,0,6.315627,Canada Water Underground Station
4,51.44333,-0.02307,3,255,1,2,5.062595,0.300105,1.609438,5.379897,...,1,0,0,0,0,0,0,0,6.339453,Canada Water Underground Station
