# Airbnb Capstone Project

## 1.Import all Libraries

In [3]:
### import all libraries and set settings 
import pandas as pd
import numpy as np
import requests
import json
import gzip
from py_functions import increase_bbox 

pd.set_option('display.max_columns', None) # show all columns  

## 2.Inside Airbnb pipeline

In [4]:
### Define path, .gz archive file name, country and city for url
path ='data/'
gz_file = "listings.csv.gz"
country = "united-kingdom"
state = "england"
city = "london"
url = f"http://data.insideairbnb.com/{country}/{state}/{city}/2023-03-14/data/{gz_file}"

In [5]:
### Create new directory for city
!mkdir {path}{city}

mkdir: data/london: File exists


In [6]:
### Download the .gz file
r = requests.get(url)
with open(path+city+'/'+gz_file, 'wb') as f:
    f.write(r.content)

In [7]:
### Unzip the .gz file and save the content as pd.DataFrame via read_csv
with gzip.open(path+city+'/'+gz_file) as f:
    listings = pd.read_csv(f)


In [8]:
### select only desired columns 
columns_keeper = (["id",
                   "listing_url",
                   "name",
                   "picture_url",
                   "host_id",
                   "host_response_rate",
                   "host_acceptance_rate",
                   "host_is_superhost",
                   "host_listings_count",
                   "host_total_listings_count",
                   "neighbourhood_cleansed",
                   "latitude",
                   "longitude",
                   "property_type",
                   "room_type",
                   "accommodates",
                   "bathrooms_text",
                   "bedrooms",
                   "beds",
                   "amenities",
                   "price",
                   "minimum_nights",
                   "maximum_nights",
                   "instant_bookable",
                   "number_of_reviews",
                   "number_of_reviews_ltm",
                   "number_of_reviews_l30d",
                   "first_review",
                   "last_review",
                   "review_scores_rating",
                   "review_scores_accuracy",
                   "review_scores_cleanliness",
                   "review_scores_checkin",
                   "review_scores_communication",
                   "review_scores_location",
                   "review_scores_value",
                   "reviews_per_month"]
                  )

In [9]:
### filter columns 
listings_short = listings[columns_keeper]

### 2.2.First Look - Airbnb Data

In [10]:
listings_short.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,picture_url,host_id,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,instant_bookable,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month
0,714569379355913481,https://www.airbnb.com/rooms/714569379355913481,20230314070633,2023-03-14,Lovely private bedroom in Muswell Hill.,https://a0.muscache.com/pictures/miso/Hosting-...,39009854,,,,f,1.0,1.0,Haringey,,51.59728,-0.13933,Private room in condo,Private room,1,,1 shared bath,1.0,1.0,"[""Iron"", ""Hangers"", ""Hair dryer"", ""Outdoor din...",$100.00,1,365,f,0,0,0,,,,,,,,,,
1,808038970516277767,https://www.airbnb.com/rooms/808038970516277767,20230314070633,2023-03-14,Studio Flat Franklin London,https://a0.muscache.com/pictures/miso/Hosting-...,495977998,within an hour,100%,100%,f,14.0,31.0,Barnet,,51.636518,-0.177475,Entire rental unit,Entire home/apt,1,,1 bath,1.0,1.0,[],$65.00,180,365,t,0,0,0,,,,,,,,,,
2,822557738577472503,https://www.airbnb.com/rooms/822557738577472503,20230314070633,2023-03-14,PropertyPlug - 2Bed Flat in Edgware SmartTV WiFi,https://a0.muscache.com/pictures/d77957d5-695a...,325629338,within an hour,100%,91%,t,4.0,8.0,Harrow,,51.60818,-0.2774,Entire rental unit,Entire home/apt,4,,2 baths,2.0,2.0,"[""Dining table"", ""Washer"", ""Outdoor furniture""...",$132.00,2,28,t,0,0,0,,,,,,,,,,
3,3518856,https://www.airbnb.com/rooms/3518856,20230314070633,2023-03-14,Wimbledon Double Bedroom Ensuite,https://a0.muscache.com/pictures/23a18442-fc1d...,187811,,,100%,f,2.0,5.0,Merton,,51.42231,-0.18841,Private room in rental unit,Private room,1,,1 private bath,1.0,1.0,"[""Washer"", ""Iron"", ""Hangers"", ""Kitchen"", ""Smok...",$100.00,5,1125,f,4,0,0,2015-12-27,2016-07-11,3.67,3.0,4.33,4.67,5.0,3.67,3.67,0.05
4,4876550,https://www.airbnb.com/rooms/4876550,20230314070633,2023-03-14,Stunning Apartment 2 minutes walk to Tube Station,https://a0.muscache.com/pictures/miso/Hosting-...,25087384,within a few hours,75%,46%,f,1.0,1.0,Barnet,,51.602282,-0.193606,Entire condo,Entire home/apt,2,,1 bath,1.0,1.0,"[""First aid kit"", ""Washer"", ""Fire extinguisher...",$120.00,5,90,f,0,0,0,,,,,,,,,,


In [11]:
listings_short.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75241 entries, 0 to 75240
Data columns (total 42 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            75241 non-null  int64  
 1   listing_url                   75241 non-null  object 
 2   scrape_id                     75241 non-null  int64  
 3   last_scraped                  75241 non-null  object 
 4   name                          75210 non-null  object 
 5   picture_url                   75241 non-null  object 
 6   host_id                       75241 non-null  int64  
 7   host_response_time            46285 non-null  object 
 8   host_response_rate            46285 non-null  object 
 9   host_acceptance_rate          51028 non-null  object 
 10  host_is_superhost             75223 non-null  object 
 11  host_listings_count           75236 non-null  float64
 12  host_total_listings_count     75236 non-null  float64
 13  n

In [12]:
listings_short.describe()

Unnamed: 0,id,scrape_id,host_id,host_listings_count,host_total_listings_count,neighbourhood_group_cleansed,latitude,longitude,accommodates,bathrooms,bedrooms,beds,minimum_nights,maximum_nights,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month
count,75241.0,75241.0,75241.0,75236.0,75236.0,0.0,75241.0,75241.0,75241.0,0.0,71768.0,74135.0,75241.0,75241.0,75241.0,75241.0,75241.0,56548.0,55595.0,55606.0,55564.0,55592.0,55565.0,55562.0,56548.0
mean,2.368628e+17,20230310000000.0,139076500.0,39.525958,71.3791,,51.509708,-0.128108,3.105793,,1.513153,1.772833,5.750748,7790.3,17.974668,5.736301,0.456467,4.588159,4.723349,4.623915,4.783393,4.801027,4.729358,4.607755,0.877064
std,3.425911e+17,12.96884,152962100.0,222.170789,420.039233,,0.048369,0.099341,1.936972,,0.885015,1.228013,24.240947,1914055.0,41.984021,12.991805,1.277612,0.779083,0.489328,0.550721,0.453835,0.448759,0.418873,0.521839,1.234003
min,13913.0,20230310000000.0,2594.0,1.0,1.0,,51.295937,-0.4978,0.0,,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
25%,19817400.0,20230310000000.0,19959230.0,1.0,1.0,,51.48354,-0.18939,2.0,,1.0,1.0,1.0,42.0,1.0,0.0,0.0,4.5,4.67,4.5,4.75,4.79,4.64,4.5,0.13
50%,39338750.0,20230310000000.0,67455190.0,2.0,2.0,,51.51384,-0.12628,2.0,,1.0,1.0,2.0,365.0,4.0,0.0,0.0,4.82,4.89,4.8,4.94,4.97,4.85,4.75,0.45
75%,6.562985e+17,20230310000000.0,224867000.0,5.0,8.0,,51.53945,-0.06846,4.0,,2.0,2.0,4.0,1125.0,17.0,6.0,0.0,5.0,5.0,5.0,5.0,5.0,5.0,4.97,1.09
max,8.463271e+17,20230310000000.0,505040000.0,2138.0,24047.0,,51.681142,0.28857,16.0,,22.0,38.0,1125.0,524855600.0,1328.0,564.0,68.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,51.05


## 2.3. Clean Airbnb 

### 2.3.1. Handling Missing Data 

In [13]:
listings_short.shape

(75241, 42)

In [14]:
listings_short.isnull().sum()

id                                  0
listing_url                         0
scrape_id                           0
last_scraped                        0
name                               31
picture_url                         0
host_id                             0
host_response_time              28956
host_response_rate              28956
host_acceptance_rate            24213
host_is_superhost                  18
host_listings_count                 5
host_total_listings_count           5
neighbourhood_cleansed              0
neighbourhood_group_cleansed    75241
latitude                            0
longitude                           0
property_type                       0
room_type                           0
accommodates                        0
bathrooms                       75241
bathrooms_text                    124
bedrooms                         3473
beds                             1106
amenities                           0
price                               0
minimum_nigh

**host_is_superhost**

In [15]:
# check the different values of "host_is_superhost"
listings_short["host_is_superhost"].value_counts(dropna=False)

f      64574
t      10649
NaN       18
Name: host_is_superhost, dtype: int64

In [16]:
# check how many listings the hosts with nan value for "host_is_superhost" have: 
listings_short[listings_short['host_is_superhost'].isna()]["host_total_listings_count"].value_counts()

5.0     4
2.0     3
6.0     2
10.0    2
7.0     2
4.0     2
26.0    2
1.0     1
Name: host_total_listings_count, dtype: int64

In [17]:
# we can fill values with "f" for false 
listings_short["host_is_superhost"] = listings_short["host_is_superhost"].fillna("f")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short["host_is_superhost"] = listings_short["host_is_superhost"].fillna("f")


In [18]:
# renaming Rows with NaN to "Unknown"
listings_short[["name", "host_response_time", "host_response_rate",
                "host_acceptance_rate"]] = listings_short[["name", "host_response_time", "host_response_rate",
                                                           "host_acceptance_rate"]].fillna("Unknown")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short[["name", "host_response_time", "host_response_rate",


**host_listings_count & host_total_listings_count**

In [19]:
# set the mode for host_listings_count & host_total_listings_count
listings_short["host_listings_count"].fillna(listings_short["host_listings_count"].mode()[0], inplace=True)

listings_short["host_total_listings_count"].fillna(listings_short["host_total_listings_count"].mode()[0], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short["host_listings_count"].fillna(listings_short["host_listings_count"].mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short["host_total_listings_count"].fillna(listings_short["host_total_listings_count"].mode()[0], inplace=True)


**neighbourhood_group_cleansed & bathrooms**

**bedrooms , beds & bathrooms_text**

In [21]:
# set the mode for above columns
listings_short["bathrooms_text"].fillna(listings_short["bathrooms_text"].mode()[0], inplace=True)

listings_short["bedrooms"].fillna(listings_short["bedrooms"].mode()[0], inplace=True)

listings_short["beds"].fillna(listings_short["beds"].mode()[0], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short["bathrooms_text"].fillna(listings_short["bathrooms_text"].mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short["bedrooms"].fillna(listings_short["bedrooms"].mode()[0], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short["beds"].fillna(listings_short["beds"].mode()[0], inplace=True)


In [22]:
#listings_short[(listings_short["bedrooms"].isna()) & (listings_short["beds"].isna())].count()

In [23]:
#listings_short[listings_short["bedrooms"].isna()][['property_type',"room_type"]].value_counts()

In [71]:
listings_short.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,picture_url,host_id,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,instant_bookable,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month,geometry,min_distance_subway_entrance,min_distance_entrance,min_distance_stop,min_distance_station,min_distance_tram_stop
0,714569379355913481,https://www.airbnb.com/rooms/714569379355913481,20230314070633,2023-03-14,Lovely private bedroom in Muswell Hill.,https://a0.muscache.com/pictures/miso/Hosting-...,39009854,Unknown,Unknown,Unknown,f,1.0,1.0,Haringey,51.59728,-0.13933,Private room in condo,Private room,1,1 shared bath,1.0,1.0,"[""Iron"", ""Hangers"", ""Hair dryer"", ""Outdoor din...",$100.00,1,365,f,0,0,0,,,,,,,,,,,POINT (-0.13933 51.59728),1504.797948,24908.390387,8990.334457,1293.474715,20080.648179
1,808038970516277767,https://www.airbnb.com/rooms/808038970516277767,20230314070633,2023-03-14,Studio Flat Franklin London,https://a0.muscache.com/pictures/miso/Hosting-...,495977998,within an hour,100%,100%,f,14.0,31.0,Barnet,51.636518,-0.177475,Entire rental unit,Entire home/apt,1,1 bath,1.0,1.0,[],$65.00,180,365,t,0,0,0,,,,,,,,,,,POINT (-0.17748 51.63652),2517.796038,29649.75949,13968.188953,655.797303,23993.972439
2,822557738577472503,https://www.airbnb.com/rooms/822557738577472503,20230314070633,2023-03-14,PropertyPlug - 2Bed Flat in Edgware SmartTV WiFi,https://a0.muscache.com/pictures/d77957d5-695a...,325629338,within an hour,100%,91%,t,4.0,8.0,Harrow,51.60818,-0.2774,Entire rental unit,Entire home/apt,4,2 baths,2.0,2.0,"[""Dining table"", ""Washer"", ""Outdoor furniture""...",$132.00,2,28,t,0,0,0,,,,,,,,,,,POINT (-0.27740 51.60818),2192.545111,28893.279595,9167.77822,632.542966,21354.166902
3,3518856,https://www.airbnb.com/rooms/3518856,20230314070633,2023-03-14,Wimbledon Double Bedroom Ensuite,https://a0.muscache.com/pictures/23a18442-fc1d...,187811,Unknown,Unknown,100%,f,2.0,5.0,Merton,51.42231,-0.18841,Private room in rental unit,Private room,1,1 private bath,1.0,1.0,"[""Washer"", ""Iron"", ""Hangers"", ""Kitchen"", ""Smok...",$100.00,5,1125,f,4,0,0,2015-12-27,2016-07-11,3.67,3.0,4.33,4.67,5.0,3.67,3.67,0.05,POINT (-0.18841 51.42231),802.858301,8443.787936,12267.316862,339.477485,1182.79792
4,4876550,https://www.airbnb.com/rooms/4876550,20230314070633,2023-03-14,Stunning Apartment 2 minutes walk to Tube Station,https://a0.muscache.com/pictures/miso/Hosting-...,25087384,within a few hours,75%,46%,f,1.0,1.0,Barnet,51.602282,-0.193606,Entire condo,Entire home/apt,2,1 bath,1.0,1.0,"[""First aid kit"", ""Washer"", ""Fire extinguisher...",$120.00,5,90,f,0,0,0,,,,,,,,,,,POINT (-0.19361 51.60228),2614.350465,26207.128973,11090.319136,183.752188,20125.705641


In [69]:
listings_short.duplicated().any()

False

In [65]:
listings_short["bathrooms_text"].value_counts()

1 bath               32853
1 shared bath        11817
2 baths               8867
1 private bath        6493
1.5 baths             5677
1.5 shared baths      3145
2.5 baths             2083
2 shared baths        1183
3 baths               1110
3.5 baths              566
2.5 shared baths       369
4 baths                217
3 shared baths         173
4.5 baths              120
0 shared baths          92
0 baths                 82
Half-bath               78
5 baths                 58
Shared half-bath        49
3.5 shared baths        42
Private half-bath       28
5.5 baths               27
6.5 baths               19
4.5 shared baths        15
4 shared baths          13
6 baths                 12
12 baths                 9
5 shared baths           7
7.5 baths                5
7 baths                  4
5.5 shared baths         4
23 baths                 3
9 baths                  3
8 baths                  3
12.5 baths               2
7 shared baths           2
8 shared baths           2
1

In [25]:
listings_short["amenities"].value_counts()

[]                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     568
["First aid kit", "Long term stays allowed", "Kitchen", "Refrigerator", "Elevator", "Lock on bedroom door", "Dedicated workspace", "Bed linens", "Heating", "Wifi", "Dishes and silverware", "Stove", "Microwave", "Essentials", "Oven"]                                                                                                                                                                                                                                                     

In [26]:
listings_short[listings_short["first_review"].isna()]["number_of_reviews"].value_counts()

0    18693
Name: number_of_reviews, dtype: int64

## 3.Overpass Pipeline

In [27]:
### Increase outside border of listings
london_bbox = increase_bbox(listings)

In [28]:
# Increasing the maxs by 0.01 and decreasing the mins by 0.01 
# will shift the outline's border by a bit more than 1km in each direction.

# See increase_bbox function in py_functions.py

In [29]:
# (northern hemisphere)
# latitude max = north
# latitude min = south
# longitude max = east
# longitude min = west

In [73]:
### Get OSM data for slightly bigger bbox
overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = f"""
[out:json];
(
    node["amenity"="bar"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["amenity"="pub"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["amenity"="restaurant"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["amenity"="cafe"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["amenity"="fast_food"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["railway"="subway_entrance"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["cuisine"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="bar"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="pub"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="restaurant"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="cafe"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="fast_food"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["railway"="subway_entrance"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["cuisine"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});

    );
    (._;>;);
out body;
"""
response = requests.get(overpass_url,
                        params={'data': overpass_query})
data = response.json()

osm = pd.json_normalize(data, record_path="elements")


In [79]:
### select only desired columns
osm_keepers = (["id",
                     "lat",
                     "lon",
                     "tags.name",
                     "tags.amenity",
                     "tags.cuisine",
                     "tags.diet:vegetarian",
                     "tags.diet:vegan",
                     "tags.railway"]
                     )

In [80]:
osm_short = osm[osm_keepers]

In [90]:
osm_short["lat_lon"] = osm_short["lat"].str + osm_short["lon"].str

AttributeError: Can only use .str accessor with string values!

In [89]:
osm_short[osm_short["lat_lon"].duplicated()]

Unnamed: 0,type,id,lat,lon,tags.addr:city,tags.addr:housename,tags.addr:housenumber,tags.addr:postcode,tags.addr:street,tags.amenity,tags.cuisine,tags.diet:gluten_free,tags.diet:meat,tags.diet:vegan,tags.diet:vegetarian,tags.food,tags.internet_access,tags.name,tags.opening_hours:covid19,tags.operator,tags.outdoor_seating,tags.payment:american_express,tags.payment:contactless,tags.payment:credit_cards,tags.payment:debit_cards,tags.payment:maestro,tags.payment:mastercard,tags.payment:visa,tags.smoking,tags.toilets,tags.toilets:access,tags.real_ale,tags.addr:country,tags.brand,tags.brand:wikidata,tags.brand:wikipedia,tags.indoor_seating,tags.website,tags.wikidata,tags.fhrs:id,tags.fhrs:local_authority_id,tags.source:addr,tags.wheelchair,tags.source,tags.upload_tag,tags.ale,tags.brewery,tags.check_date,tags.email,tags.opening_hours,tags.phone,tags.real_fire,tags.created_by,tags.note,tags.wheelchair:description,tags.dog,tags.entrance,tags.contact:email,tags.contact:phone,tags.wikipedia,tags.old_name,tags.contact:website,tags.opening_hours:food,tags.foot,tags.fhrs:authority,tags.fixme,tags.fhrs:inspectiondate,tags.fhrs:rating,tags.addr:suburb,tags.bicycle,tags.railway,tags.former_name,tags.previous_name,tags.level,tags.takeaway,tags.fhrs:confidence_management,tags.fhrs:hygiene,tags.fhrs:structural,tags.toilets:wheelchair,tags.addr:village,tags.survey:date,tags.alt_name,tags.contact:facebook,tags.contact:twitter,tags.historic,tags.contact:instagram,tags.drive_through,tags.official_name,tags.lgbtq,tags.opening_hours:kitchen,tags.website:menu,tags.source:name,tags.opening_hours:signed,tags.air_conditioning,tags.bar,tags.postal_code,tags.description,tags.beer_garden,tags.fixme:place,tags.facebook,tags.twitter,tags.internet_access:fee,tags.source:postcode,tags.source:addr:postcode,tags.floor,tags.start_date,tags.is_in,tags.addr:county,tags.source:old_name,tags.eat_in,tags.fhrs:name,tags.payment:cash,tags.check_date:wheelchair,tags.entrance_marker:subway,tags.addr:unit,tags.not:addr:housenumber,tags.not:addr:postcode,tags.source:not:addr,tags.source:not:addr:housenumber,tags.dontimport:fhrs:addrline1,tags.dontimport:fhrs:addrline2,tags.dontimport:fhrs:businesstype,tags.microbrewery,tags.comment,tags.ref,tags.ref:GB:tfl_uid,tags.addr:parentstreet,tags.not:addr:street,tags.addr:city:fa,tags.toilets:female,tags.toilets:male,tags.toilets:unisex,tags.wikimedia_commons,tags.shop,tags.real_cider,tags.fixme:addr:1,tags.accommodation,tags.comedy,tags.description:floor,tags.note:beer,tags.venue,tags.loc_name,tags.photo,tags.layer,tags.addr:interpolation,tags.roof_terrace,tags.opening_hours:url,tags.disused,tags.addr:place,tags.atm,tags.check_date:opening_hours,tags.delivery,tags.sport,tags.tourism,tags.closed,tags.addr:floor,tags.short_name,tags.wifi,tags.craft_keg,tags.warning,tags.reservation,tags.capacity,tags.currency:XLT,tags.payment:cash:XLT-BXTP,tags.payment:text:XLT-BXTP,tags.fryup,tags.image,tags.diet:halal,tags.name:signed,tags.survey_date,tags.garden,tags.addr:town,tags.old_fhrs:id,tags.gay,tags.access,tags.changing_table,tags.contact:whatsapp,tags.drink:coffee,tags.happycow:id,tags.internet_access:ssid,tags.branch,tags.amenity_1,tags.payment:bancomat,tags.payment:visa_debit,tags.payment:visa_electron,tags.url,tags.old_amenity,tags.internet_access:password,tags.station,tags.wine,tags.motorcycle:theme,tags.sky,tags.disused:amenity,tags.name:ja,tags.payment:lightning,tags.dontimport:fhrs:addrline4,tags.long_name,tags.source:addr:housenumber,tags.instagram,tags.deli,tags.drive_in,tags.name:en,tags.note:name:ko,tags.contact:untappd,tags.min_age,tags.not:brand:wikidata,tags.cost:coffee,tags.owner,tags.cocktails,tags.name:zh,tags.music_genre,tags.camera:mount,tags.camera:type,tags.man_made,tags.surveillance,tags.surveillance:type,tags.old_cuisine,tags.booth,tags.covered,tags.listed_status,tags.ref:vatin,tags.not:name,tags.source:internet_access,tags.seating,tags.diet:pescetarian,tags.seats,tags.changing_table:count,tags.changing_table:location,tags.restaurant,tags.diet:kosher,tags.operator:wikidata,tags.type,tags.name:ar,tags.football,tags.internet_access:free,tags.highchair,tags.disused:name,tags.floor:material,tags.level:ref,tags.demolished:amenity,tags.currency:XBT,tags.contact:fax,tags.barrier,tags.theme,tags.organic,tags.was:amenity,tags.alcohol,tags.FIXME,tags.live_music,tags.building,tags.old_fhrs:local_authority_id,tags.name:fa,tags.check_date:diet:vegetarian,tags.amenity:closed,tags.addr:full,tags.note:name,tags.addr:subdistrict,tags.cafe,tags.quiz,tags.mapillary,tags.was:atm,tags.was:name,tags.was:old_name,tags.was:source:name,tags.name:lit,tags.construction,tags.payment:coins,tags.diet:lacto_vegetarian,tags.diet:organic,tags.currency:GBP,tags.payment:lightning_contactless,tags.payment:onchain,tags.breakfast,tags.trendy,tags.addr:substreet,tags.erected_by,tags.inscription,tags.memorial,tags.openplaques:id,tags.fax,tags.addr:flat,tags.addr:flats,tags.old_name:zh,tags.bicycle:conditional,tags.franchise,tags.last_check,tags.number,tags.diet:dairy_free,tags.diet:lactose_free,tags.source:opening_hours,tags.outdoor_seating:comfort,tags.colour,tags.name:es,tags.name:gl,tags.name:-2013,tags.tourism:-2013,tags.disused:shop,tags.source:not:addr:postcode,tags.name:ru,tags.network,tags.building:levels,tags.highway,tags.lamp_mount,tags.music,tags.description:en,tags.payment:apple_pay,tags.payment:google_pay,tags.payment:nfc,tags.source:wheelchair,tags.diet:fish,tags.diet:healthy,tags.former_amenity,tags.construction:amenity,tags.was:fhrs:id,tags.product,tags.int_name,tags.material,tags.old:fhrs:id,tags.contact:pinterest,tags.drink:soft_drink,tags.drink:tea,tags.source:railway,tags.indoor,tags.dontimport:fhrs:addrline3,tags.payment:amex,tags.ele,tags.brand:website,tags.designation,tags.payment:android_pay,tags.display,tags.support,tags.second_hand,tags.drinking_water:refill,tags.drinking_water:refill:network,tags.source:cuisine,tags.line,tags.door,tags.board_type,tags.information,tags.office,tags.source_ref,tags.alt_postcode,tags.old_name:2007-2017,tags.old_name:2017-2020,tags.bollard,tags.fee,tags.note:addr,tags.surveillance:zone,tags.source:address,tags.status,tags.fixme:addr2,tags.craft,tags.defibrillator:location,tags.emergency,tags.orienteering,tags.changing_table:fee,tags.shop:units,tags.power_supply,tags.source:position,tags.historic:amenity,tags.ref:store,tags.pub,tags.not:fhrs:id,tags.source:geograph:image,tags.source_1,tags.alt_name:ru,tags.addr:district,tags.vegetarian,tags.petanque,tags.lgbtq:men,tags.lgbtq:trans,tags.lgbtq:women,tags.halal,tags.bouncers,tags.artwork_type,tags.subject:wikidata,tags.checkfirst:suggested:name,tags.drink:beer,tags.drink:wine,tags.scheme,tags.source:date,tags.opening_hours:note,tags.was:cuisine,tags.burrito,tags.note_1,tags.name:bg,tags.source:operator,tags.club,tags.automatic_door,tags.contact:snapchat,tags.contact:tiktok,tags.shisha,tags.payment:diners_club,tags.payment:jcb,tags.was:shop,tags.stars,tags.access:note,tags.memorial:type,tags.flickr,tags.max_level,tags.min_level,tags.micropub,tags.note:level,tags.payment:cards,tags.lunch,tags.craft_beer,tags.drink,tags.format,tags.name:gsw,tags.distillery,tags.drink:cider,tags.wheelchair:description:en,tags.leisure,tags.proposed:amenity,tags.name:nl,tags.service:bicycle:repair,tags.service:bicycle:retail,tags.workshop,tags.was:fhrs:local_authority_id,tags.name:it,tags.contact:tripadvisor,tags.drink:cola,tags.drink:milk,tags.cat,tags.diet:raw,tags.name:pt,tags.exit_only,tags.self_service,tags.was:brand,tags.currency:BCH,tags.payment:cryptocurrencies,tags.name:pl,tags.name:zh-Hant,tags.name:de,tags.exit,tags.drink:lemonade,tags.drink:water,tags.fixme:opening_hours,tags.drink:natural_wine,tags.local_name,tags.gift,tags.exit_to,tags.pop_up_shop,tags.brand:en,tags.brand:zh,tags.toilets:disposal,tags.unisex,tags.name:ro,tags.contact:youtube,tags.weather_protection,tags.was:leisure,tags.was:opening_hours,tags.was:sport,tags.old_shop,tags.drink:sparkling_wine,tags.addr:hamlet,tags.location,tags.camera:direction,tags.website:operator,tags.name:zh-Hans,tags.crossing,tags.building:part:levels,tags.source:geograph:id,tags.toilets:position,tags.bottle,tags.was:brand:wikidata,tags.was:brand:wikipedia,tags.payment:notes,tags.fast_food,tags.url:menu,tags.roof:shape,tags.bring_your_own_wine,tags.backrest,tags.coffee,tags.kids_area,tags.drink:cocktail,tags.drink:gin,tags.drink:rum,tags.drink:spirits,tags.drink:tequila,tags.drink:whisky,tags.nohousenumber,tags.contact:linkedin,tags.diet:ovo_vegetarian,tags.private,tags.building:colour,tags.building:material,tags.roof:levels,tags.name:fr,tags.artist_name,tags.building:part,tags.service_times,tags.phone:mobile,tags.access:conditional,tags.note:access,tags.name:cy,tags.natural,tags.menu:url,tags.name:ur,tags.diet:diabetes,tags.diet:egg_free,tags.diet:nut_free,tags.diet:soy_free,tags.source:addr:website,tags.source:housenumber,tags.post_office:type,tags.source:addr:flats,tags.last_checked,tags.diet:non-vegetarian,tags.artwork_subject,tags.subject,tags.bicycle_parking,tags.artist:wikidata,tags.locked,tags.vacant,tags.diet:seafood,tags.diet:seasonal,tags.public_transport,tags.memorial:conflict,tags.toilets:number,tags.posh,tags.zero_waste,tags.advertising,tags.animated,tags.direction,tags.land_property,tags.lit,tags.luminous,tags.message,tags.sides,tags.visibility,tags.origin,tags.check_date:diet:vegan,tags.full_name,tags.name:da,tags.motorcycle,tags.noname,tags.check_date:currency:XBT,tags.seasonal,tags.faces,tags.subject:wikipedia,tags.name:he,tags.male,tags.camra,tags.old:opening_hours,tags.operator:website,tags.website:booking,tags.street_vendor,tags.contact:foursquare,tags.female,nodes,tags.architect,tags.construction_date,tags.date_start,tags.roof:colour,tags.beer,tags.roof:material,tags.building:use,tags.amenity:disused,tags.opening_hours:takeaway,tags.old:contact:website,tags.roof:orientation,tags.store_ref,tags.massgis:ID,tags.verified,tags.noaddress,tags.alt_name_1,tags.craft_ale,tags.source:building,tags.lip_licking_flavour,tags.old_name:1,tags.outdoor_seating:weather_protection,tags.addr:state,tags.socket:bs1363,tags.heritage,tags.heritage:operator,tags.ref:GB:nhle,tags.building:architecture,tags.cats,tags.payment:electronic_purses,tags.landuse,tags.he:inscription_date,tags.heritage:website,tags.height,tags.fictional:amenity,tags.toilets_access,tags.source:outline,tags.name:be,tags.source:amenity,tags.addr:interval,tags.old_name:2006-2012,tags.old_name:2012-2019,tags.area,tags.disused:website,tags.disabled,tags.disabled:description,tags.phone2,tags.healthcare,tags.old_operator,tags.source:toilets,tags.note:housenumber,tags.value,tags.happy_hours,tags.note:alt_name,tags.source:real_cider,tags.amenity_2,tags.dogs,tags.quiznight,tags.old_brand,tags.old_brand:wikidata,tags.year_of_construction,tags.delivery:partner,tags.ice_cream,tags.disused:opening_hours,tags.diet:mediterranean,tags.name:old,tags.old_fhrs:confidence_management,tags.old_fhrs:hygiene,tags.old_fhrs:rating,tags.old_fhrs:structural,tags.old_opening_hours,tags.disused:fhrs:id,tags.disused:internet_access,tags.disused:internet_access:fee,tags.disused:phone,tags.disused:smoking,tags.ref:he,tags.microbrewery:note,tags.building:min_level,tags.award:michelin,tags.clothes,tags.sunday_roast,tags.whiskies,tags.openfire,tags.old_phone,tags.source:image,tags.drink:afri-cola,tags.alt_name2,tags.maxspeed,tags.note:wheelchair,tags.opening_date,tags.source:area,tags.toilets:handwashing,tags.source:postal_code,tags.oven,tags.gluten_free,tags.toilets:type,tags.internet_access:fee:note,tags.source:diet:vegan,tags.shop2,tags.HE_ref,tags.levels,tags.payment:app,tags.service:bicycle:second_hand,tags.mobile,tags.service,tags.building:name,tags.addr:locality,tags.fixme:housename,tags.money_transfer,tags.so,tags.yelp,tags.karaoke,tags.source:addr:housename,tags.rooms,tags.source:rooms,tags.amenity:2,tags.phone:signed,tags.addr:street_1,tags.building:levels:underground,tags.old:website,tags.historic:name,tags.historic:shop,tags.source:geometry,tags.old_addr:housenumber,tags.old_addr:street,tags.was:wikidata,tags.price,tags.ref:GB:uprn,tags.screen,tags.billiards:pool,tags.billiards:snooker,tags.opening_hours:drive_through,tags.was:office,tags.old_website,tags.television,tags.name:ko,tags.discount:camra_member,tags.disused:brand,tags.disused:brand:wikidata,tags.disused:brand:wikipedia,tags.old_craft,tags.abandoned:amenity,tags.abandoned:building,tags.pets_allowed,tags.wholesale,tags.roof:height,tags.old_ref:pol_id,tags.old_old_name,tags.floating,tags.diet:fruitarian,tags.stroller,tags.oneway,tags.terrace,tags.denomination,tags.religion,tags.note_2,tags.takeaway:covid19,tags.fireplace,tags.not:amenity,tags.ssid,tags.heritage:name,tags.golf,tags.house,tags.diet:chicken,tags.diet:dairy,tags.diet:omnivore,tags.amenity2,tags.internet_access:access,tags.internet_access:wlan:key,tags.name:zh_pinyin,tags.note:name:en,tags.note:name:zh,tags.website:en,tags.serving_system:buffet,tags.disused:short_name,tags.fixme:addr4,tags.not:website,tags.hotel,tags.diet:local,tags.construction:shop,tags.parking,lat_lon
1,node,451153,51.602031,-0.193503,,,,,,restaurant,,,,,,,,Central Restaurant,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...
2,node,451154,51.599579,-0.196028,,,319,,Regents Park Road,pub,,,,,,yes,,The Catcher in the Rye,,,,,,,,,,,,,,3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...
3,node,451271,51.614104,-0.176556,London,,749,,High Road,pub,,,,,,,,The Tally Ho,,,yes,,,,,,,,,,,,GB,Wetherspoon,Q6109362,en:Wetherspoons,yes,http://www.jdwetherspoon.co.uk/home/pubs/the-t...,Q24298976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...
4,node,12242503,51.592016,0.027962,,,,,,pub,,,,,,,,Railway Bell,,,,,,,,,,,,yes,customers,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...
5,node,12243302,51.575987,0.028219,London,,,E11 2RL,High Street,pub,,,,,,yes,,The George,,,,,,,,,,,,,,yes,,Wetherspoon,Q6109362,en:Wetherspoons,,,Q88916722,443368,155360,FHRS Open Data,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87026,way,1159232519,,,Ilford,,8,IG5 0LQ,Claybury Broadway,fast_food,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"[10780326257, 10780326261, 10780326262, 107803...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...
87027,way,1159232523,,,Ilford,,12,IG5 0LQ,Claybury Broadway,restaurant,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"[10780326275, 10780326274, 10780326276, 107803...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...
87028,way,1159232524,,,Ilford,,13,IG5 0LQ,Claybury Broadway,fast_food,fish_and_chips,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,yes,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"[10780326279, 10780326280, 10780326281, 107803...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...
87029,way,1161055856,,,,,,,,cafe,,,,,,,,Pier Point Cafe,,,no,,,,,,,,,,,,,,,,no,,,1573636,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,only,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,kiosk,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"[10799058331, 10799058332, 10799058333, 107990...",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0 51.406233\n1 51.408528\n2 ...


In [83]:
osm_short.head()

Unnamed: 0,id,lat,lon,tags.name,tags.amenity,tags.cuisine,tags.diet:vegetarian,tags.diet:vegan,tags.railway
0,451152,51.60084,-0.194608,King of Prussia,pub,pizza;burger,yes,yes,
1,451153,51.602031,-0.193503,Central Restaurant,restaurant,,,,
2,451154,51.599579,-0.196028,The Catcher in the Rye,pub,,,,
3,451271,51.614104,-0.176556,The Tally Ho,pub,,,,
4,12242503,51.592016,0.027962,Railway Bell,pub,,,,


In [31]:
### same for WAY
overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = f"""
[out:json];
(
    way["amenity"="bar"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="pub"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="restaurant"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="cafe"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="fast_food"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["railway"="tram_stop"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["railway"="subway_entrance"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["public_transport"="station"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    );
    (._;>;);
out body;
"""
response = requests.get(overpass_url,
                        params={'data': overpass_query})
data = response.json()

data_norm_way = pd.json_normalize(data, record_path="elements")



In [57]:
data_norm.head()

Unnamed: 0,type,id,lat,lon,tags.addr:city,tags.addr:housename,tags.addr:housenumber,tags.addr:postcode,tags.addr:street,tags.amenity,tags.cuisine,tags.diet:gluten_free,tags.diet:meat,tags.diet:vegan,tags.diet:vegetarian,tags.food,tags.internet_access,tags.name,tags.opening_hours:covid19,tags.operator,tags.outdoor_seating,tags.payment:american_express,tags.payment:contactless,tags.payment:credit_cards,tags.payment:debit_cards,tags.payment:maestro,tags.payment:mastercard,tags.payment:visa,tags.smoking,tags.toilets,tags.toilets:access,tags.real_ale,tags.addr:country,tags.brand,tags.brand:wikidata,tags.brand:wikipedia,tags.indoor_seating,tags.website,tags.wikidata,tags.naptan:AtcoCode,tags.network,tags.public_transport,tags.railway,tags.ref:crs,tags.wheelchair,tags.wikipedia,tags.fare_zone,tags.line,tags.name:ru,tags.source_ref,tags.station,tags.subway,tags.wheelchair:description,tags.old_name,tags.toilets:wheelchair,tags.fhrs:id,tags.fhrs:local_authority_id,tags.source:addr,tags.source,tags.upload_tag,tags.alt_name,tags.air_conditioning,tags.baby_feeding,tags.level,tags.train,tags.platforms,tags.ale,tags.brewery,tags.check_date,tags.email,tags.opening_hours,tags.phone,tags.real_fire,tags.light_rail,tags.public_transport:version,tags.tram,tags.source:name,tags.start_date,tags.wikimedia_commons,tags.created_by,tags.note,tags.network:subway,tags.network:train,tags.source:network,tags.ref:stanox,tags.ref:tiploc,tags.dog,tags.check_date:wheelchair,tags.contact:email,tags.contact:phone,tags.contact:website,tags.opening_hours:food,tags.electrified,tags.name:fr,tags.foot,tags.fhrs:authority,tags.fixme,tags.fhrs:inspectiondate,tags.fhrs:rating,tags.addr:suburb,tags.loc_name,tags.bicycle,tags.former_name,tags.name:da,tags.previous_name,tags.takeaway,tags.fhrs:confidence_management,tags.fhrs:hygiene,tags.fhrs:structural,tags.tracks,tags.postal_code,tags.addr:village,tags.survey:date,tags.contact:facebook,tags.contact:twitter,tags.historic,tags.contact:instagram,tags.bus,tags.drive_through,tags.official_name,tags.lgbtq,tags.opening_hours:kitchen,tags.website:menu,tags.opening_hours:signed,tags.bar,tags.description,tags.beer_garden,tags.fixme:place,tags.facebook,tags.twitter,tags.internet_access:fee,tags.source:postcode,tags.source:addr:postcode,tags.floor,tags.is_in,tags.addr:county,tags.source:old_name,tags.eat_in,tags.fhrs:name,tags.payment:cash,tags.name:he,tags.entrance,tags.entrance_marker:subway,tags.addr:unit,tags.not:addr:housenumber,tags.not:addr:postcode,tags.source:not:addr,tags.source:not:addr:housenumber,tags.dontimport:fhrs:addrline1,tags.dontimport:fhrs:addrline2,tags.dontimport:fhrs:businesstype,tags.microbrewery,tags.comment,tags.ref,tags.ref:GB:tfl_uid,tags.name:ar,tags.name:cs,tags.name:de,tags.name:en,tags.name:fa,tags.name:fi,tags.name:gan,tags.name:hu,tags.name:it,tags.name:ja,tags.name:lmo,tags.name:nl,tags.name:no,tags.name:pl,tags.name:uk,tags.name:ur,tags.name:yi,tags.name:yue,tags.name:zh,tags.addr:parentstreet,tags.not:addr:street,tags.addr:city:fa,tags.toilets:female,tags.toilets:male,tags.toilets:unisex,tags.real_cider,tags.fixme:addr:1,tags.layer,tags.accommodation,tags.comedy,tags.description:floor,tags.note:beer,tags.venue,tags.photo,tags.addr:interpolation,tags.roof_terrace,tags.opening_hours:url,tags.disused,tags.addr:place,tags.atm,tags.check_date:opening_hours,tags.delivery,tags.sport,tags.tourism,tags.closed,tags.addr:floor,tags.short_name,tags.wifi,tags.craft_keg,tags.warning,tags.reservation,tags.capacity,tags.currency:XLT,tags.payment:cash:XLT-BXTP,tags.payment:text:XLT-BXTP,tags.fryup,tags.image,tags.diet:halal,tags.name:signed,tags.survey_date,tags.garden,tags.addr:town,tags.old_fhrs:id,tags.gay,tags.access,tags.changing_table,tags.contact:whatsapp,tags.drink:coffee,tags.happycow:id,tags.internet_access:ssid,tags.ferry,tags.branch,tags.amenity_1,tags.payment:bancomat,tags.payment:visa_debit,tags.payment:visa_electron,tags.url,tags.old_amenity,tags.internet_access:password,tags.owner,tags.wine,tags.motorcycle:theme,tags.sky,tags.dontimport:fhrs:addrline4,tags.instagram,tags.source:addr:housenumber,tags.deli,tags.drive_in,tags.shop,tags.note:name:ko,tags.contact:untappd,tags.min_age,tags.not:brand:wikidata,tags.cost:coffee,tags.cocktails,tags.music_genre,tags.old_cuisine,tags.booth,tags.covered,tags.disused:amenity,tags.listed_status,tags.man_made,tags.ref:vatin,tags.not:name,tags.source:internet_access,tags.seating,tags.diet:pescetarian,tags.seats,tags.changing_table:count,tags.changing_table:location,tags.diet:kosher,tags.operator:wikidata,tags.type,tags.football,tags.internet_access:free,tags.highchair,tags.disused:name,tags.floor:material,tags.level:ref,tags.currency:XBT,tags.contact:fax,tags.theme,tags.organic,tags.naptan:Bearing,tags.naptan:CommonName,tags.naptan:Indicator,tags.naptan:verified,tags.was:amenity,tags.usage,tags.alcohol,tags.FIXME,tags.building,tags.old_fhrs:local_authority_id,tags.check_date:diet:vegetarian,tags.payment:lightning,tags.operator:subway,tags.operator:train,tags.addr:full,tags.note:name,tags.addr:subdistrict,tags.quiz,tags.mapillary,tags.was:atm,tags.was:name,tags.was:old_name,tags.was:source:name,tags.name:lit,tags.construction,tags.payment:coins,tags.diet:lacto_vegetarian,tags.diet:organic,tags.currency:GBP,tags.payment:lightning_contactless,tags.payment:onchain,tags.breakfast,tags.trendy,tags.addr:substreet,tags.fax,tags.addr:flat,tags.addr:flats,tags.old_name:zh,tags.bicycle:conditional,tags.franchise,tags.cafe,tags.last_check,tags.number,tags.diet:dairy_free,tags.diet:lactose_free,tags.source:opening_hours,tags.outdoor_seating:comfort,tags.name:es,tags.name:gl,tags.name:-2013,tags.tourism:-2013,tags.disused:shop,tags.source:not:addr:postcode,tags.building:levels,tags.music,tags.description:en,tags.source:wheelchair,tags.diet:fish,tags.diet:healthy,tags.former_amenity,tags.construction:amenity,tags.was:fhrs:id,tags.product,tags.int_name,tags.old:fhrs:id,tags.contact:pinterest,tags.drink:soft_drink,tags.drink:tea,tags.source:railway,tags.indoor,tags.dontimport:fhrs:addrline3,tags.payment:amex,tags.ele,tags.brand:website,tags.barrier,tags.source_ref:network,tags.designation,tags.payment:android_pay,tags.second_hand,tags.drinking_water:refill,tags.drinking_water:refill:network,tags.source:cuisine,tags.office,tags.alt_postcode,tags.old_name:2007-2017,tags.old_name:2017-2020,tags.note:addr,tags.source:address,tags.status,tags.fixme:addr2,tags.craft,tags.changing_table:fee,tags.shop:units,tags.power_supply,tags.source:position,tags.historic:amenity,tags.restaurant,tags.ref:store,tags.pub,tags.not:fhrs:id,tags.source:geograph:image,tags.source_1,tags.alt_name:ru,tags.addr:district,tags.vegetarian,tags.petanque,tags.lgbtq:men,tags.lgbtq:trans,tags.lgbtq:women,tags.halal,tags.bouncers,tags.checkfirst:suggested:name,tags.drink:beer,tags.drink:wine,tags.source:date,tags.opening_hours:note,tags.was:cuisine,tags.burrito,tags.highway,tags.traffic_signals:direction,tags.note_1,tags.name:bg,tags.long_name,tags.source:operator,tags.club,tags.source:ref,tags.automatic_door,tags.door,tags.contact:snapchat,tags.contact:tiktok,tags.shisha,tags.payment:apple_pay,tags.payment:diners_club,tags.payment:google_pay,tags.payment:jcb,tags.was:shop,tags.stars,tags.railway:ref:DB,tags.name:et,tags.uic_ref,tags.website:departures,tags.access:note,tags.flickr,tags.max_level,tags.min_level,tags.micropub,tags.phonetic:en,tags.live_music,tags.airside,tags.note:level,tags.payment:cards,tags.lunch,tags.craft_beer,tags.drink,tags.name:gsw,tags.distillery,tags.drink:cider,tags.wheelchair:description:en,tags.service:bicycle:repair,tags.service:bicycle:retail,tags.workshop,tags.was:fhrs:local_authority_id,tags.contact:tripadvisor,tags.drink:cola,tags.drink:milk,tags.cat,tags.diet:raw,tags.name:pt,tags.exit_only,tags.self_service,tags.was:brand,tags.currency:BCH,tags.payment:cryptocurrencies,tags.name:zh-Hant,tags.exit,tags.note:wheelchair,tags.drink:lemonade,tags.drink:water,tags.fixme:opening_hours,tags.drink:natural_wine,tags.local_name,tags.exit_to,tags.pop_up_shop,tags.brand:en,tags.brand:zh,tags.name:ro,tags.contact:youtube,tags.weather_protection,tags.old_shop,tags.drink:sparkling_wine,tags.frequency,tags.gauge,tags.name:lt,tags.tfl_travelzone,tags.voltage,tags.addr:hamlet,tags.location,tags.website:operator,tags.name:zh-Hans,tags.building:part:levels,tags.source:geograph:id,tags.toilets:disposal,tags.toilets:position,tags.was:brand:wikidata,tags.was:brand:wikipedia,tags.payment:notes,tags.internet_access:access,tags.internet_access:operator,tags.noaddress,tags.colour,tags.source:building,tags.name:el,tags.website:overground,tags.website:tube,tags.levels,tags.opening_date,tags.ref:STANOX,tags.ref:nalco,tags.fast_food,tags.url:menu,tags.source:alt_name,tags.roof:shape,tags.bring_your_own_wine,tags.network:wikidata,tags.coffee,tags.kids_area,tags.drink:cocktail,tags.drink:gin,tags.drink:rum,tags.drink:spirits,tags.drink:tequila,tags.drink:whisky,tags.name:pa,tags.name:pnb,tags.wheelchair:note,tags.landuse,tags.nohousenumber,tags.shelter,tags.contact:linkedin,tags.diet:ovo_vegetarian,tags.private,tags.building:colour,tags.building:material,tags.roof:levels,tags.leisure,tags.building:part,tags.service_times,tags.phone:mobile,tags.access:conditional,tags.note:access,tags.name:cy,tags.menu:url,tags.diet:diabetes,tags.diet:egg_free,tags.diet:nut_free,tags.diet:soy_free,tags.source:addr:website,tags.source:housenumber,tags.post_office:type,tags.last_checked,tags.diet:non-vegetarian,tags.vacant,tags.diet:seafood,tags.diet:seasonal,tags.toilets:number,tags.posh,tags.zero_waste,tags.check_date:diet:vegan,tags.full_name,tags.motorcycle,tags.noname,tags.check_date:currency:XBT,tags.seasonal,tags.male,tags.camra,tags.operator:website,tags.website:booking,tags.street_vendor,tags.contact:foursquare,tags.lit
0,node,451152,51.60084,-0.194608,London,King of Prussia,363.0,N3 1DH,Regents Park Road,pub,pizza;burger,yes,yes,yes,yes,yes,wlan,King of Prussia,open,Mitchells & Butlers,yes,yes,yes,yes,yes,yes,yes,yes,outside,yes,customers,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,node,451153,51.602031,-0.193503,,,,,,restaurant,,,,,,,,Central Restaurant,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,node,451154,51.599579,-0.196028,,,319.0,,Regents Park Road,pub,,,,,,yes,,The Catcher in the Rye,,,,,,,,,,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,node,451271,51.614104,-0.176556,London,,749.0,,High Road,pub,,,,,,,,The Tally Ho,,,yes,,,,,,,,,,,,GB,Wetherspoon,Q6109362,en:Wetherspoons,yes,http://www.jdwetherspoon.co.uk/home/pubs/the-t...,Q24298976,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,node,638645,51.372908,-0.414344,,,,,,,,,,,,,,Walton-on-Thames,,South Western Railway,,,,,,,,,,,,,,,,,,,Q2183741,9100WONT,National Rail,station,station,WAL,yes,en:Walton-on-Thames railway station,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
data_norm.columns.replace(".")

In [63]:
data_norm["lon"].value_counts(dropna=False)

-0.074670    2
-0.162390    2
-0.197462    2
-0.103991    2
-0.224031    2
            ..
-0.133973    1
-0.109033    1
-0.111255    1
-0.124223    1
-0.106666    1
Name: lon, Length: 16485, dtype: int64

In [55]:
data_norm_way["tags.opening_hours"].value_counts(dropna=False)

NaN                                                                                            69404
24/7                                                                                              25
Mo-Su 12:00-23:00                                                                                 22
Mo-Sa 12:00-23:00; Su 12:00-22:30                                                                 17
Mo-Sa 11:00-23:00; Su 12:00-22:30                                                                 14
                                                                                               ...  
Mo-Sa 12:00-14:30, 17:00-21:30                                                                     1
Mo-Th 12:00-14:00, 17:30-23:00; Fr 12:00-14:30, 17:30-00:00; Sa 12:00-00:00; Su 12:00-21:00        1
Mo-Sa 16:00-03:00                                                                                  1
Mo-Sa 17:30-23:00; Tu off; Su 17:00-22:30                                                  

In [33]:
### filter columns 
streetmap_short = data_norm[data_norm_keepers]

In [34]:
### filter columns 
streetmap_short_way = data_norm_way[data_norm_keepers]

In [35]:
### drop rows with no lat/lon values 
streetmap_short_way = streetmap_short_way.dropna(subset=['lat'])

In [36]:
streetmap = pd.concat([streetmap_short_way, streetmap_short], axis=0)

### 3.1. First Look - Open Street Maps-Data

In [61]:
streetmap_short[streetmap_short["tags.amenity"] == "restaurant"].shape

(5344, 10)

In [58]:
streetmap_short[streetmap_short["tags.amenity"] == "restaurant"]["tags.cuisine"].value_counts(dropna=False)

NaN                      1619
indian                    480
italian                   448
pizza                     273
chinese                   233
                         ... 
italian;chinese             1
sichuan;chinese             1
spanish;mediterranean       1
scottish                    1
uyghur                      1
Name: tags.cuisine, Length: 358, dtype: int64

In [53]:
streetmap_short["tags.cuisine"].value_counts(dropna=False)

NaN                      8554
coffee_shop               849
pizza                     613
indian                    601
italian                   485
                         ... 
coffee_shop;danish          1
tapas;spanish;seafood       1
English breakfast           1
ramen;japanese              1
lebanese;kebab              1
Name: tags.cuisine, Length: 640, dtype: int64

In [37]:
streetmap_short.head()

Unnamed: 0,id,lat,lon,tags.name,tags.amenity,tags.cuisine,tags.railway,tags.public_transport,tags.diet:vegetarian,tags.diet:vegan
0,451152,51.60084,-0.194608,King of Prussia,pub,pizza;burger,,,yes,yes
1,451153,51.602031,-0.193503,Central Restaurant,restaurant,,,,,
2,451154,51.599579,-0.196028,The Catcher in the Rye,pub,,,,,
3,451271,51.614104,-0.176556,The Tally Ho,pub,,,,,
4,638645,51.372908,-0.414344,Walton-on-Thames,,,station,station,,


In [38]:
streetmap_short["tags.amenity"].unique()

array(['pub', 'restaurant', nan, 'cafe', 'bar', 'bus_station',
       'fast_food', 'ferry_terminal'], dtype=object)

In [39]:
streetmap_short.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16540 entries, 0 to 16539
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     16540 non-null  int64  
 1   lat                    16540 non-null  float64
 2   lon                    16540 non-null  float64
 3   tags.name              15925 non-null  object 
 4   tags.amenity           15260 non-null  object 
 5   tags.cuisine           7986 non-null   object 
 6   tags.railway           1280 non-null   object 
 7   tags.public_transport  764 non-null    object 
 8   tags.diet:vegetarian   753 non-null    object 
 9   tags.diet:vegan        541 non-null    object 
dtypes: float64(2), int64(1), object(7)
memory usage: 1.3+ MB


In [40]:
streetmap_short.describe()

Unnamed: 0,id,lat,lon
count,16540.0,16540.0,16540.0
mean,4684916000.0,51.500684,-0.131178
std,3258168000.0,0.068268,0.130715
min,451152.0,51.286089,-0.507334
25%,1650741000.0,51.469531,-0.190261
50%,4603416000.0,51.51009,-0.125446
75%,7147501000.0,51.535076,-0.072672
max,10819590000.0,51.691127,0.298538


## 4. Web scraping test

In [None]:
from bs4 import BeautifulSoup

In [None]:
url = "http://insideairbnb.com/get-the-data/"
page = requests.get(url)

In [None]:
soup = BeautifulSoup(page.content, "html.parser")

In [None]:
print(soup)

## 5.Area-Calc-Test

In [41]:
streetmap.head()

Unnamed: 0,id,lat,lon,tags.name,tags.amenity,tags.cuisine,tags.railway,tags.public_transport,tags.diet:vegetarian,tags.diet:vegan
0,15262026,51.518156,-0.169879,,,,,,,
1,20576176,51.32522,-0.006881,,,,,,,
2,20576312,51.30953,-0.054178,,,,,,,
3,21141374,51.52227,-0.163755,,,,,,,
4,21310453,51.486288,-0.121714,,,,,,,


In [42]:
# Import libraries and set test subsets of the data
from sklearn.neighbors import BallTree
list_test = listings_short[["id", "name", "latitude", "longitude"]]
street_test = streetmap[["id", "lat", "lon", "tags.amenity", "tags.railway"]]

In [43]:
#rename the column name so that they are equal 
streetmap = streetmap.rename(columns={"lat": "latitude", "lon": "longitude"})


In [44]:
# Calculate the needed radius when converted to unit sphere.
distance_in_meter = 500
earth_radius_in_meter = 6_371_000

radius = distance_in_meter / earth_radius_in_meter

In [342]:
# Convert the latitude and longitude columns to radians
list_test = list_test.copy()
list_test.loc[:, 'lat_rad'] = np.radians(list_test['latitude'])
list_test.loc[:, 'lon_rad'] = np.radians(list_test['longitude'])
street_test = street_test.copy()
street_test.loc[:, 'lat_rad'] = np.radians(street_test['latitude'])
street_test.loc[:, 'lon_rad'] = np.radians(street_test['longitude'])

# Create a BallTree object with the latitude and longitude columns
tree = BallTree(street_test[['lat_rad', 'lon_rad']],
                leaf_size=15, metric='haversine')

# Find the indices of all neighbors within a radius of 500 meters
# for each row in list_test
indices = tree.query_radius(
    list_test[['lat_rad', 'lon_rad']], r=radius, count_only=False)

# Calculate the number of neighbors for each amenity type
amenity_types = street_test['tags.amenity'].unique()
amenity_counts = np.zeros((list_test.shape[0], amenity_types.shape[0]))
for i, amenity in enumerate(amenity_types):
    street_indices = street_test[street_test['tags.amenity'] == amenity].index
    intersection_counts = np.array(
        [np.intersect1d(street_indices, idx).size for idx in indices])
    amenity_counts[:, i] = intersection_counts

# Add the new columns to list_test
list_test = pd.concat([list_test, pd.DataFrame(amenity_counts, columns=[
                      f'num_neighbors_{amenity}' for amenity in amenity_types])], axis=1)

# Calculate the number of neighbors for each railway type
railway_types = street_test['tags.railway'].unique()
railway_counts = np.zeros((list_test.shape[0], railway_types.shape[0]))
for i, railway in enumerate(railway_types):
    street_indices = street_test[street_test['tags.railway'] == railway].index
    intersection_counts = np.array(
        [np.intersect1d(street_indices, idx).size for idx in indices])
    railway_counts[:, i] = intersection_counts

# Add the new columns to list_test
list_test = pd.concat([list_test, pd.DataFrame(railway_counts, columns=[
                      f'num_neighbors_{railway}' for railway in railway_types])], axis=1)


# Remove the temporary columns
list_test.drop(columns=['lat_rad', 'lon_rad'], inplace=True)
street_test.drop(columns=['lat_rad', 'lon_rad'], inplace=True)


In [343]:
list_test.describe()

Unnamed: 0,id,latitude,longitude,num_neighbors_nan,num_neighbors_pub,num_neighbors_fast_food,num_neighbors_clock,num_neighbors_atm,num_neighbors_toilets,num_neighbors_ferry_terminal,num_neighbors_restaurant,num_neighbors_dentist,num_neighbors_watering_place,num_neighbors_drinking_water,num_neighbors_bench,num_neighbors_parking_entrance,num_neighbors_bicycle_parking,num_neighbors_cafe,num_neighbors_bar,num_neighbors_bus_station,num_neighbors_nan.1,num_neighbors_pub.1,num_neighbors_fast_food.1,num_neighbors_clock.1,num_neighbors_atm.1,num_neighbors_toilets.1,num_neighbors_ferry_terminal.1,num_neighbors_restaurant.1,num_neighbors_dentist.1,num_neighbors_watering_place.1,num_neighbors_drinking_water.1,num_neighbors_bench.1,num_neighbors_parking_entrance.1,num_neighbors_bicycle_parking.1,num_neighbors_cafe.1,num_neighbors_bar.1,num_neighbors_bus_station.1,num_neighbors_nan.2,num_neighbors_subway_entrance,num_neighbors_entrance,num_neighbors_stop,num_neighbors_station,num_neighbors_tram_stop
count,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0
mean,2.368628e+17,51.509708,-0.128108,0.0,4.409511,10.803378,0.013809,0.020946,0.004187,0.069603,16.642974,0.001063,0.000173,0.002778,0.002831,0.001489,0.003203,12.25211,2.122952,0.039207,0.0,4.409511,10.803378,0.013809,0.020946,0.004187,0.069603,16.642974,0.001063,0.000173,0.002778,0.002831,0.001489,0.003203,12.25211,2.122952,0.039207,0.0,1.448878,0.001688,0.003588,2.308223,0.249917
std,3.425911e+17,0.048369,0.099341,0.0,9.416939,26.866415,0.116698,0.173963,0.06559,0.415069,42.29629,0.03259,0.013143,0.052631,0.053131,0.054543,0.056505,30.387748,4.906068,0.238124,0.0,9.416939,26.866415,0.116698,0.173963,0.06559,0.415069,42.29629,0.03259,0.013143,0.052631,0.053131,0.054543,0.056505,30.387748,4.906068,0.238124,0.0,3.823028,0.056099,0.059797,6.168676,1.346805
min,13913.0,51.295937,-0.4978,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,19817400.0,51.48354,-0.18939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,39338750.0,51.51384,-0.12628,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,6.562985e+17,51.53945,-0.06846,0.0,4.0,8.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,0.0,0.0,4.0,8.0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
max,8.463271e+17,51.681142,0.28857,0.0,77.0,247.0,1.0,2.0,2.0,5.0,383.0,1.0,1.0,1.0,1.0,2.0,1.0,287.0,47.0,2.0,0.0,77.0,247.0,1.0,2.0,2.0,5.0,383.0,1.0,1.0,1.0,1.0,2.0,1.0,287.0,47.0,2.0,0.0,43.0,2.0,1.0,52.0,14.0


## 6.Nearest Station-Test

In [421]:
streetmap["tags.railway"].unique()

array([nan, 'subway_entrance', 'entrance', 'stop', 'station', 'tram_stop'],
      dtype=object)

In [45]:
streetmap.dropna(subset=["tags.railway"], inplace=True)


In [49]:
from haversine import haversine, Unit

# define a function to calculate distance between two points
def calc_distance(lat1, lon1, lat2, lon2):
    return haversine((lat1, lon1), (lat2, lon2), unit=Unit.METERS)

# get all unique values in "tags.railway"
railway_tags = streetmap["tags.railway"].unique()

# loop through each row in listings_short and calculate the minimum distance
# for each value in streetmap["tags.railway"]
for tag in railway_tags:
    distances = []
    for _, row in listings_short.iterrows():
        min_distance = None
        for _, sm_row in streetmap[streetmap["tags.railway"] == tag].iterrows():
            distance = calc_distance(row["latitude"], row["longitude"], sm_row["latitude"], sm_row["longitude"])
            if min_distance is None or distance < min_distance:
                min_distance = distance
        distances.append(min_distance)

    # add the calculated minimum distances as a new column in listings_short
    col_name = "min_distance_{}".format(tag)
    listings_short[col_name] = distances


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short[col_name] = distances
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short[col_name] = distances
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  listings_short[col_name] = distances
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,

In [None]:
# from haversine import haversine, Unit

# # define a function to calculate distance between two points
# def calc_distance(lat1, lon1, lat2, lon2):
#     return haversine((lat1, lon1), (lat2, lon2), unit=Unit.METERS)

# # loop through each row in listings_short and calculate the minimum distance
# # for "subway_entrance" tag
# distances = []
# for _, row in listings_short.iterrows():
#     min_distance = None
#     for _, sm_row in streetmap[streetmap["tags.railway"] == "subway_entrance"].iterrows():
#         distance = calc_distance(row["latitude"], row["longitude"], sm_row["latitude"], sm_row["longitude"])
#         if min_distance is None or distance < min_distance:
#             min_distance = distance
#     distances.append(min_distance)

# # add the calculated minimum distances as a new column in listings_short
# listings_short["min_distance_subway_entrance"] = distances


In [50]:
listings_short.describe()

Unnamed: 0,id,scrape_id,host_id,host_listings_count,host_total_listings_count,latitude,longitude,accommodates,bedrooms,beds,minimum_nights,maximum_nights,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month,min_distance_subway_entrance,min_distance_entrance,min_distance_stop,min_distance_station,min_distance_tram_stop
count,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,56548.0,55595.0,55606.0,55564.0,55592.0,55565.0,55562.0,56548.0,75241.0,75241.0,75241.0,75241.0,75241.0
mean,2.368628e+17,20230310000000.0,139076500.0,39.523398,71.374424,51.509708,-0.128108,3.105793,1.489467,1.761473,5.750748,7790.3,17.974668,5.736301,0.456467,4.588159,4.723349,4.623915,4.783393,4.801027,4.729358,4.607755,0.877064,1175.880056,16688.645228,6880.722254,498.407503,11457.687812
std,3.425911e+17,12.96884,152962100.0,222.163629,420.025668,0.048369,0.099341,1.936972,0.871029,1.222497,24.240947,1914055.0,41.984021,12.991805,1.277612,0.779083,0.489328,0.550721,0.453835,0.448759,0.418873,0.521839,1.234003,1546.349009,5240.929974,3926.887173,350.291556,4912.939491
min,13913.0,20230310000000.0,2594.0,1.0,1.0,51.295937,-0.4978,0.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01,1.077511,78.976398,20.607206,1.150966,32.403778
25%,19817400.0,20230310000000.0,19959230.0,1.0,1.0,51.48354,-0.18939,2.0,1.0,1.0,1.0,42.0,1.0,0.0,0.0,4.5,4.67,4.5,4.75,4.79,4.64,4.5,0.13,345.896172,13687.440042,4022.241663,266.743858,8236.041146
50%,39338750.0,20230310000000.0,67455190.0,2.0,2.0,51.51384,-0.12628,2.0,1.0,1.0,2.0,365.0,4.0,0.0,0.0,4.82,4.89,4.8,4.94,4.97,4.85,4.75,0.45,649.363801,16742.68294,6321.069869,425.348579,11379.540076
75%,6.562985e+17,20230310000000.0,224867000.0,5.0,8.0,51.53945,-0.06846,4.0,2.0,2.0,4.0,1125.0,17.0,6.0,0.0,5.0,5.0,5.0,5.0,5.0,5.0,4.97,1.09,1280.920791,19581.228836,8836.758199,641.052645,14414.214378
max,8.463271e+17,20230310000000.0,505040000.0,2138.0,24047.0,51.681142,0.28857,16.0,22.0,38.0,1125.0,524855600.0,1328.0,564.0,68.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,51.05,20340.499863,39554.20209,27737.123484,7394.015515,30478.709223


Price correlation

In [132]:
list_test["price"] = listings_short["price"]

In [133]:
list_test.price = list_test.price.str[1:-3]
list_test.price = list_test.price.str.replace(",", "")
list_test.price = list_test.price.astype('int64')

In [134]:
list_test.corrwith(list_test['price'])

  list_test.corrwith(list_test['price'])


id                                0.064004
latitude                          0.005344
longitude                        -0.034336
num_neighbors_nan                      NaN
num_neighbors_pub                 0.044490
num_neighbors_fast_food           0.046168
num_neighbors_clock               0.004750
num_neighbors_atm                -0.004762
num_neighbors_toilets            -0.001710
num_neighbors_restaurant          0.046288
num_neighbors_dentist            -0.005625
num_neighbors_watering_place     -0.002393
num_neighbors_drinking_water     -0.004003
num_neighbors_bench               0.080874
num_neighbors_parking_entrance   -0.005989
num_neighbors_bicycle_parking    -0.009015
num_neighbors_cafe                0.044919
num_neighbors_bar                 0.048390
price                             1.000000
num_neighbors_nan                      NaN
num_neighbors_pub                 0.013740
num_neighbors_fast_food           0.017038
num_neighbors_clock               0.000094
num_neighbo