# Airbnb Capstone Project

## 1.Import all Libraries

In [1]:
### import all libraries and set settings 
import pandas as pd
import numpy as np
import requests
import json
import gzip
from py_functions import increase_bbox 

pd.set_option('display.max_columns', None) # show all columns  

## 2.Inside Airbnb pipeline

In [2]:
### Define path, .gz archive file name, country and city for url
path ='data/'
gz_file = "listings.csv.gz"
country = "united-kingdom"
state = "england"
city = "london"
url = f"http://data.insideairbnb.com/{country}/{state}/{city}/2023-03-14/data/{gz_file}"

In [3]:
### Create new directory for city
!mkdir {path}{city}

mkdir: data/london: File exists


In [4]:
### Download the .gz file
r = requests.get(url)
with open(path+city+'/'+gz_file, 'wb') as f:
    f.write(r.content)

In [5]:
### Unzip the .gz file and save the content as pd.DataFrame via read_csv
with gzip.open(path+city+'/'+gz_file) as f:
    listings = pd.read_csv(f)


In [6]:
### select only desired columns 
columns_keeper = (["id",
                   "listing_url",
                   "scrape_id",
                   "last_scraped",
                   "name",
                   "picture_url",
                   "host_id",
                   "host_response_time",
                   "host_response_rate",
                   "host_acceptance_rate",
                   "host_is_superhost",
                   "host_listings_count",
                   "host_total_listings_count",
                   "neighbourhood_cleansed",
                   "neighbourhood_group_cleansed",
                   "latitude",
                   "longitude",
                   "property_type",
                   "room_type",
                   "accommodates",
                   "bathrooms",
                   "bathrooms_text",
                   "bedrooms",
                   "beds",
                   "amenities",
                   "price",
                   "minimum_nights",
                   "maximum_nights",
                   "instant_bookable",
                   "number_of_reviews",
                   "number_of_reviews_ltm",
                   "number_of_reviews_l30d",
                   "first_review",
                   "last_review",
                   "review_scores_rating",
                   "review_scores_accuracy",
                   "review_scores_cleanliness",
                   "review_scores_checkin",
                   "review_scores_communication",
                   "review_scores_location",
                   "review_scores_value",
                   "reviews_per_month"]
                  )

In [7]:
### filter columns 
listings_short = listings[columns_keeper]

### 2.2.First Look - Airbnb Data

In [8]:
listings_short.head()

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,picture_url,host_id,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,instant_bookable,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month
0,714569379355913481,https://www.airbnb.com/rooms/714569379355913481,20230314070633,2023-03-14,Lovely private bedroom in Muswell Hill.,https://a0.muscache.com/pictures/miso/Hosting-...,39009854,,,,f,1.0,1.0,Haringey,,51.59728,-0.13933,Private room in condo,Private room,1,,1 shared bath,1.0,1.0,"[""Iron"", ""Hangers"", ""Hair dryer"", ""Outdoor din...",$100.00,1,365,f,0,0,0,,,,,,,,,,
1,808038970516277767,https://www.airbnb.com/rooms/808038970516277767,20230314070633,2023-03-14,Studio Flat Franklin London,https://a0.muscache.com/pictures/miso/Hosting-...,495977998,within an hour,100%,100%,f,14.0,31.0,Barnet,,51.636518,-0.177475,Entire rental unit,Entire home/apt,1,,1 bath,1.0,1.0,[],$65.00,180,365,t,0,0,0,,,,,,,,,,
2,822557738577472503,https://www.airbnb.com/rooms/822557738577472503,20230314070633,2023-03-14,PropertyPlug - 2Bed Flat in Edgware SmartTV WiFi,https://a0.muscache.com/pictures/d77957d5-695a...,325629338,within an hour,100%,91%,t,4.0,8.0,Harrow,,51.60818,-0.2774,Entire rental unit,Entire home/apt,4,,2 baths,2.0,2.0,"[""Dining table"", ""Washer"", ""Outdoor furniture""...",$132.00,2,28,t,0,0,0,,,,,,,,,,
3,3518856,https://www.airbnb.com/rooms/3518856,20230314070633,2023-03-14,Wimbledon Double Bedroom Ensuite,https://a0.muscache.com/pictures/23a18442-fc1d...,187811,,,100%,f,2.0,5.0,Merton,,51.42231,-0.18841,Private room in rental unit,Private room,1,,1 private bath,1.0,1.0,"[""Washer"", ""Iron"", ""Hangers"", ""Kitchen"", ""Smok...",$100.00,5,1125,f,4,0,0,2015-12-27,2016-07-11,3.67,3.0,4.33,4.67,5.0,3.67,3.67,0.05
4,4876550,https://www.airbnb.com/rooms/4876550,20230314070633,2023-03-14,Stunning Apartment 2 minutes walk to Tube Station,https://a0.muscache.com/pictures/miso/Hosting-...,25087384,within a few hours,75%,46%,f,1.0,1.0,Barnet,,51.602282,-0.193606,Entire condo,Entire home/apt,2,,1 bath,1.0,1.0,"[""First aid kit"", ""Washer"", ""Fire extinguisher...",$120.00,5,90,f,0,0,0,,,,,,,,,,


In [9]:
listings_short.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75241 entries, 0 to 75240
Data columns (total 42 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   id                            75241 non-null  int64  
 1   listing_url                   75241 non-null  object 
 2   scrape_id                     75241 non-null  int64  
 3   last_scraped                  75241 non-null  object 
 4   name                          75210 non-null  object 
 5   picture_url                   75241 non-null  object 
 6   host_id                       75241 non-null  int64  
 7   host_response_time            46285 non-null  object 
 8   host_response_rate            46285 non-null  object 
 9   host_acceptance_rate          51028 non-null  object 
 10  host_is_superhost             75223 non-null  object 
 11  host_listings_count           75236 non-null  float64
 12  host_total_listings_count     75236 non-null  float64
 13  n

In [10]:
listings_short.describe()

Unnamed: 0,id,scrape_id,host_id,host_listings_count,host_total_listings_count,neighbourhood_group_cleansed,latitude,longitude,accommodates,bathrooms,bedrooms,beds,minimum_nights,maximum_nights,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month
count,75241.0,75241.0,75241.0,75236.0,75236.0,0.0,75241.0,75241.0,75241.0,0.0,71768.0,74135.0,75241.0,75241.0,75241.0,75241.0,75241.0,56548.0,55595.0,55606.0,55564.0,55592.0,55565.0,55562.0,56548.0
mean,2.368628e+17,20230310000000.0,139076500.0,39.525958,71.3791,,51.509708,-0.128108,3.105793,,1.513153,1.772833,5.750748,7790.3,17.974668,5.736301,0.456467,4.588159,4.723349,4.623915,4.783393,4.801027,4.729358,4.607755,0.877064
std,3.425911e+17,12.96884,152962100.0,222.170789,420.039233,,0.048369,0.099341,1.936972,,0.885015,1.228013,24.240947,1914055.0,41.984021,12.991805,1.277612,0.779083,0.489328,0.550721,0.453835,0.448759,0.418873,0.521839,1.234003
min,13913.0,20230310000000.0,2594.0,1.0,1.0,,51.295937,-0.4978,0.0,,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01
25%,19817400.0,20230310000000.0,19959230.0,1.0,1.0,,51.48354,-0.18939,2.0,,1.0,1.0,1.0,42.0,1.0,0.0,0.0,4.5,4.67,4.5,4.75,4.79,4.64,4.5,0.13
50%,39338750.0,20230310000000.0,67455190.0,2.0,2.0,,51.51384,-0.12628,2.0,,1.0,1.0,2.0,365.0,4.0,0.0,0.0,4.82,4.89,4.8,4.94,4.97,4.85,4.75,0.45
75%,6.562985e+17,20230310000000.0,224867000.0,5.0,8.0,,51.53945,-0.06846,4.0,,2.0,2.0,4.0,1125.0,17.0,6.0,0.0,5.0,5.0,5.0,5.0,5.0,5.0,4.97,1.09
max,8.463271e+17,20230310000000.0,505040000.0,2138.0,24047.0,,51.681142,0.28857,16.0,,22.0,38.0,1125.0,524855600.0,1328.0,564.0,68.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,51.05


## 2.3. Clean Airbnb 

In [120]:
listings_short.head(2)

Unnamed: 0,id,listing_url,scrape_id,last_scraped,name,picture_url,host_id,host_response_time,host_response_rate,host_acceptance_rate,host_is_superhost,host_listings_count,host_total_listings_count,neighbourhood_cleansed,neighbourhood_group_cleansed,latitude,longitude,property_type,room_type,accommodates,bathrooms,bathrooms_text,bedrooms,beds,amenities,price,minimum_nights,maximum_nights,instant_bookable,number_of_reviews,number_of_reviews_ltm,number_of_reviews_l30d,first_review,last_review,review_scores_rating,review_scores_accuracy,review_scores_cleanliness,review_scores_checkin,review_scores_communication,review_scores_location,review_scores_value,reviews_per_month
0,714569379355913481,https://www.airbnb.com/rooms/714569379355913481,20230314070633,2023-03-14,Lovely private bedroom in Muswell Hill.,https://a0.muscache.com/pictures/miso/Hosting-...,39009854,,,,f,1.0,1.0,Haringey,,51.59728,-0.13933,Private room in condo,Private room,1,,1 shared bath,1.0,1.0,"[""Iron"", ""Hangers"", ""Hair dryer"", ""Outdoor din...",$100.00,1,365,f,0,0,0,,,,,,,,,,
1,808038970516277767,https://www.airbnb.com/rooms/808038970516277767,20230314070633,2023-03-14,Studio Flat Franklin London,https://a0.muscache.com/pictures/miso/Hosting-...,495977998,within an hour,100%,100%,f,14.0,31.0,Barnet,,51.636518,-0.177475,Entire rental unit,Entire home/apt,1,,1 bath,1.0,1.0,[],$65.00,180,365,t,0,0,0,,,,,,,,,,


In [123]:
listings_short.shape

(75241, 42)

In [121]:
listings_short.isnull().sum()

id                                  0
listing_url                         0
scrape_id                           0
last_scraped                        0
name                               31
picture_url                         0
host_id                             0
host_response_time              28956
host_response_rate              28956
host_acceptance_rate            24213
host_is_superhost                  18
host_listings_count                 5
host_total_listings_count           5
neighbourhood_cleansed              0
neighbourhood_group_cleansed    75241
latitude                            0
longitude                           0
property_type                       0
room_type                           0
accommodates                        0
bathrooms                       75241
bathrooms_text                    124
bedrooms                         3473
beds                             1106
amenities                           0
price                               0
minimum_nigh

## 3.Overpass Pipeline

In [11]:
### Increase outside border of listings
london_bbox = increase_bbox(listings)

In [None]:
# Increasing the maxs by 0.01 and decreasing the mins by 0.01 
# will shift the outline's border by a bit more than 1km in each direction.

# See increase_bbox function in py_functions.py

In [None]:
# (northern hemisphere)
# latitude max = north
# latitude min = south
# longitude max = east
# longitude min = west

In [82]:
### Get OSM data for slightly bigger bbox
### Example for nodes with an entry for "cuisine" - NODE
overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = f"""
[out:json];
(
    node["amenity"="bar"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["amenity"="pub"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["amenity"="restaurant"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["amenity"="cafe"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    node["amenity"="fast_food"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});

    );
out body;
"""
response = requests.get(overpass_url,
                        params={'data': overpass_query})
data = response.json()

data_norm = pd.json_normalize(data, record_path="elements")


In [94]:
### same for WAY
overpass_url = "http://overpass-api.de/api/interpreter"
overpass_query = f"""
[out:json];
(
    way["amenity"="bar"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="pub"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="restaurant"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="cafe"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});
    way["amenity"="fast_food"]({london_bbox["south_shifted"]},{london_bbox["west_shifted"]},{london_bbox["north_shifted"]},{london_bbox["east_shifted"]});

    );
    (._;>;);
out body;
"""
response = requests.get(overpass_url,
                        params={'data': overpass_query})
data = response.json()

data_norm_way = pd.json_normalize(data, record_path="elements")



In [83]:
### select only desired columns
data_norm_keepers = (["id",
                     "lat",
                     "lon",
                     "tags.name",
                     "tags.amenity",
                     "tags.cuisine",
                     "tags.diet:vegetarian",
                     "tags.diet:vegan"]
                     )

In [84]:
### filter columns 
streetmap_short = data_norm[data_norm_keepers]

In [98]:
streetmap_short_way = data_norm_way[data_norm_keepers]

In [112]:
streetmap_short_way = streetmap_short_way.dropna(subset=['lat'])

In [114]:
streetmap = pd.concat([streetmap_short_way, streetmap_short], axis=0)

### 3.1. First Look - Open Street Maps-Data

In [85]:
streetmap_short.head()

Unnamed: 0,id,lat,lon,tags.name,tags.amenity,tags.cuisine,tags.diet:vegetarian,tags.diet:vegan
0,451152,51.60084,-0.194608,King of Prussia,pub,pizza;burger,yes,yes
1,451153,51.602031,-0.193503,Central Restaurant,restaurant,,,
2,451154,51.599579,-0.196028,The Catcher in the Rye,pub,,,
3,451271,51.614104,-0.176556,The Tally Ho,pub,,,
4,12242503,51.592016,0.027962,Railway Bell,pub,,,


In [86]:
streetmap_short["tags.amenity"].unique()

array(['pub', 'restaurant', 'cafe', 'bar', 'fast_food'], dtype=object)

In [87]:
streetmap_short.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15227 entries, 0 to 15226
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    15227 non-null  int64  
 1   lat                   15227 non-null  float64
 2   lon                   15227 non-null  float64
 3   tags.name             14866 non-null  object 
 4   tags.amenity          15227 non-null  object 
 5   tags.cuisine          7982 non-null   object 
 6   tags.diet:vegetarian  752 non-null    object 
 7   tags.diet:vegan       540 non-null    object 
dtypes: float64(2), int64(1), object(5)
memory usage: 951.8+ KB


In [88]:
streetmap_short.describe()

Unnamed: 0,id,lat,lon
count,15227.0,15227.0,15227.0
mean,4712532000.0,51.500401,-0.130841
std,3310201000.0,0.068128,0.129899
min,451152.0,51.286089,-0.507334
25%,1581578000.0,51.469155,-0.189072
50%,4573260000.0,51.509937,-0.125368
75%,7263036000.0,51.534281,-0.073234
max,10818980000.0,51.691127,0.298538


## 4. Web scraping test

In [None]:
from bs4 import BeautifulSoup

In [None]:
url = "http://insideairbnb.com/get-the-data/"
page = requests.get(url)

In [None]:
soup = BeautifulSoup(page.content, "html.parser")

In [None]:
print(soup)

## Area-Calc Test

In [115]:
# Import libraries and set test subsets of the data
from sklearn.neighbors import BallTree
list_test = listings_short[["id", "name", "latitude", "longitude"]]
street_test = streetmap[["id", "lat", "lon", "tags.amenity"]]

In [129]:
# Calculate the needed radius when converted to unit sphere.
distance_in_meter = 50
earth_radius_in_meter = 6_371_000

radius = distance_in_meter / earth_radius_in_meter

In [130]:
# Convert the latitude and longitude columns to radians
list_test = list_test.copy()
list_test.loc[:, 'lat_rad'] = np.radians(list_test['latitude'])
list_test.loc[:, 'lon_rad'] = np.radians(list_test['longitude'])
street_test = street_test.copy()
street_test.loc[:, 'lat_rad'] = np.radians(street_test['lat'])
street_test.loc[:, 'lon_rad'] = np.radians(street_test['lon'])

# Create a BallTree object with the latitude and longitude columns
tree = BallTree(street_test[['lat_rad', 'lon_rad']],
                leaf_size=15, metric='haversine')

# Find the indices of all neighbors within a radius of 500 meters
# for each row in list_test
indices = tree.query_radius(
    list_test[['lat_rad', 'lon_rad']], r=radius, count_only=False)

# Calculate the number of neighbors for each amenity type
amenity_types = street_test['tags.amenity'].unique()
amenity_counts = np.zeros((list_test.shape[0], amenity_types.shape[0]))
for i, amenity in enumerate(amenity_types):
    street_indices = street_test[street_test['tags.amenity'] == amenity].index
    intersection_counts = np.array(
        [np.intersect1d(street_indices, idx).size for idx in indices])
    amenity_counts[:, i] = intersection_counts

# Add the new columns to list_test
list_test = pd.concat([list_test, pd.DataFrame(amenity_counts, columns=[
                      f'num_neighbors_{amenity}' for amenity in amenity_types])], axis=1)

# Remove the temporary columns
list_test.drop(columns=['lat_rad', 'lon_rad'], inplace=True)
street_test.drop(columns=['lat_rad', 'lon_rad'], inplace=True)


In [131]:
list_test.describe()

Unnamed: 0,id,latitude,longitude,num_neighbors_nan,num_neighbors_pub,num_neighbors_fast_food,num_neighbors_clock,num_neighbors_atm,num_neighbors_toilets,num_neighbors_restaurant,num_neighbors_dentist,num_neighbors_watering_place,num_neighbors_drinking_water,num_neighbors_bench,num_neighbors_parking_entrance,num_neighbors_bicycle_parking,num_neighbors_cafe,num_neighbors_bar,price,num_neighbors_nan.1,num_neighbors_pub.1,num_neighbors_fast_food.1,num_neighbors_clock.1,num_neighbors_atm.1,num_neighbors_toilets.1,num_neighbors_restaurant.1,num_neighbors_dentist.1,num_neighbors_watering_place.1,num_neighbors_drinking_water.1,num_neighbors_bench.1,num_neighbors_parking_entrance.1,num_neighbors_bicycle_parking.1,num_neighbors_cafe.1,num_neighbors_bar.1
count,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0,75241.0
mean,2.368628e+17,51.509708,-0.128108,0.0,4.555841,11.124334,0.013809,0.020946,0.004187,17.079478,0.001063,0.000173,0.002778,0.002831,0.001489,0.003203,12.861193,2.246647,175.734453,0.0,0.054505,0.143167,5.3e-05,0.000306,0.0,0.224864,1.3e-05,0.0,0.0,0.0,2.7e-05,2.7e-05,0.16705,0.027352
std,3.425911e+17,0.048369,0.099341,0.0,10.147787,27.105368,0.116698,0.173963,0.06559,44.052041,0.03259,0.013143,0.052631,0.053131,0.054543,0.056505,33.059435,5.719008,487.362025,0.0,0.33976,0.846937,0.007291,0.021566,0.0,1.354402,0.003646,0.0,0.0,0.0,0.007291,0.005156,1.005509,0.22614
min,13913.0,51.295937,-0.4978,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,19817400.0,51.48354,-0.18939,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,60.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,39338750.0,51.51384,-0.12628,0.0,1.0,1.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,6.562985e+17,51.53945,-0.06846,0.0,4.0,8.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,180.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,8.463271e+17,51.681142,0.28857,0.0,86.0,237.0,1.0,2.0,2.0,399.0,1.0,1.0,1.0,1.0,2.0,1.0,308.0,50.0,70111.0,0.0,9.0,18.0,1.0,2.0,0.0,34.0,1.0,0.0,0.0,0.0,2.0,1.0,25.0,7.0


In [118]:
list_test.head()

Unnamed: 0,id,name,latitude,longitude,num_neighbors_nan,num_neighbors_pub,num_neighbors_fast_food,num_neighbors_clock,num_neighbors_atm,num_neighbors_toilets,num_neighbors_restaurant,num_neighbors_dentist,num_neighbors_watering_place,num_neighbors_drinking_water,num_neighbors_bench,num_neighbors_parking_entrance,num_neighbors_bicycle_parking,num_neighbors_cafe,num_neighbors_bar
0,714569379355913481,Lovely private bedroom in Muswell Hill.,51.59728,-0.13933,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,808038970516277767,Studio Flat Franklin London,51.636518,-0.177475,0.0,0.0,0.0,0.0,0.0,0.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0
2,822557738577472503,PropertyPlug - 2Bed Flat in Edgware SmartTV WiFi,51.60818,-0.2774,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3518856,Wimbledon Double Bedroom Ensuite,51.42231,-0.18841,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4876550,Stunning Apartment 2 minutes walk to Tube Station,51.602282,-0.193606,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [119]:
list_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 75241 entries, 0 to 75240
Data columns (total 19 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   id                              75241 non-null  int64  
 1   name                            75210 non-null  object 
 2   latitude                        75241 non-null  float64
 3   longitude                       75241 non-null  float64
 4   num_neighbors_nan               75241 non-null  float64
 5   num_neighbors_pub               75241 non-null  float64
 6   num_neighbors_fast_food         75241 non-null  float64
 7   num_neighbors_clock             75241 non-null  float64
 8   num_neighbors_atm               75241 non-null  float64
 9   num_neighbors_toilets           75241 non-null  float64
 10  num_neighbors_restaurant        75241 non-null  float64
 11  num_neighbors_dentist           75241 non-null  float64
 12  num_neighbors_watering_place    

In [132]:
list_test["price"] = listings_short["price"]

In [133]:
list_test.price = list_test.price.str[1:-3]
list_test.price = list_test.price.str.replace(",", "")
list_test.price = list_test.price.astype('int64')

In [134]:
list_test.corrwith(list_test['price'])

  list_test.corrwith(list_test['price'])


id                                0.064004
latitude                          0.005344
longitude                        -0.034336
num_neighbors_nan                      NaN
num_neighbors_pub                 0.044490
num_neighbors_fast_food           0.046168
num_neighbors_clock               0.004750
num_neighbors_atm                -0.004762
num_neighbors_toilets            -0.001710
num_neighbors_restaurant          0.046288
num_neighbors_dentist            -0.005625
num_neighbors_watering_place     -0.002393
num_neighbors_drinking_water     -0.004003
num_neighbors_bench               0.080874
num_neighbors_parking_entrance   -0.005989
num_neighbors_bicycle_parking    -0.009015
num_neighbors_cafe                0.044919
num_neighbors_bar                 0.048390
price                             1.000000
num_neighbors_nan                      NaN
num_neighbors_pub                 0.013740
num_neighbors_fast_food           0.017038
num_neighbors_clock               0.000094
num_neighbo