# Problem Statement

1. Top Areas with the highest number of listing (mapping of listing pointers with their neighbourhood polygons)
2. Thematic Map of the neighbourhood - broken down by single room, entire home.
3. Top 10 hosts by their total number of listings.
4. Personal Metric.




In [1]:
import pandas as pd 
import numpy as np 

In [2]:
listings = pd.read_csv("data/listings.csv")
listings.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2265,Zen-East in the Heart of Austin (monthly rental),2466,Paddy,,78702,30.2775,-97.71398,Entire home/apt,200,7,24,2019-03-16,0.17,3,131
1,5245,"Eco friendly, Colorful, Clean, Cozy monthly share",2466,Paddy,,78702,30.27577,-97.71379,Private room,114,30,9,2018-03-14,0.06,3,0
2,5456,"Walk to 6th, Rainey St and Convention Ctr",8028,Sylvia,,78702,30.26112,-97.73448,Entire home/apt,104,2,543,2021-02-01,3.74,1,354
3,5636,"Soco, Barton Springs Private Guest Cottage",7148,Peggy,,78704,30.2463,-97.76361,Entire home/apt,46,30,21,2020-11-06,0.15,2,67
4,5769,NW Austin Room,8186,Elizabeth,,78729,30.45697,-97.78422,Private room,39,1,259,2021-02-10,1.96,1,79


In [4]:
host_names = list(listings['host_name'])
host_ids = list(listings['host_id'])
unique_host_ids = np.unique(host_ids)
print(f"There are {len(unique_host_ids)} unique host IDs")
room_types = np.unique(list(listings['room_type']))
print(f"Room types - {room_types}")
print(f"Total samples : {len(host_names)}")


There are 6773 unique host IDs
Room types - ['Entire home/apt' 'Hotel room' 'Private room' 'Shared room']
Total samples : 10272


In [26]:
neighbourhoods = list(listings['neighbourhood'])
unique_neighbourhoods = np.unique(neighbourhoods)
listings_by_neighbourhood = {n:0 for n in unique_neighbourhoods}
listings_count = list(listings["calculated_host_listings_count"])

for i, lst in enumerate(listings_count):
    listings_by_neighbourhood[neighbourhoods[i]]+=lst 

sorted_neighbourhood_tuples = sorted(listings_by_neighbourhood.items(), key = lambda x : x[1])
print("Top 10 Neighbourhoods by Listings - \n")
print("Neighbourhood  | Listings")
print("~"*len("Neighbourhood  | Listings"))
for x, y in (sorted_neighbourhood_tuples[-10:][::-1]):
    print(x, "         |", y)


Top 10 Neighbourhoods by Listings - 

Neighbourhood  | Listings
~~~~~~~~~~~~~~~~~~~~~~~~~
78745          | 26733
78751          | 23334
78758          | 22029
78735          | 20357
78702          | 19429
78701          | 15938
78704          | 13740
78729          | 10191
78754          | 6326
78717          | 6299


In [6]:
import matplotlib.pyplot as plt
geoframe = gpd.read_file("data/neighbourhoods.geojson")
geoframe.head()

Unnamed: 0,neighbourhood,neighbourhood_group,geometry
0,78739,,"MULTIPOLYGON (((-97.89002 30.20941, -97.88929 ..."
1,78754,,"MULTIPOLYGON (((-97.63658 30.40266, -97.63754 ..."
2,78732,,"MULTIPOLYGON (((-97.87303 30.43806, -97.87335 ..."
3,78737,,"MULTIPOLYGON (((-98.01876 30.24167, -98.01617 ..."
4,78756,,"MULTIPOLYGON (((-97.72792 30.32906, -97.72760 ..."


## #2 Thematic Map

In [12]:
# Adding required columns to the geoframe.
len(list(geoframe['neighbourhood']))

44

## #3 Top 10 Hosts

In [8]:
listings_count = list(listings["calculated_host_listings_count"])
host_listings = {id:0 for id in unique_host_ids}
for i in range(len(listings_count)):
    host_listings[host_ids[i]]+=listings_count[i]

sorted_tuples = sorted(host_listings.items(), key=lambda x : x[1])
print(sorted_tuples[-10:])

[(124060715, 1444), (368944610, 1600), (210733801, 1849), (194953121, 2025), (346827202, 2304), (118565935, 3136), (8167447, 6561), (109638288, 6724), (104309976, 17161), (359036978, 128164)]


In [9]:
print("The top 10 hosts by listings are : ")
id10, lst10, name10 = [], [], []
for x in sorted_tuples[-10:][::-1]:
    id, lst = x
    id10.append(id); lst10.append(lst)
    name = host_names[host_ids.index(id)]
    name10.append(name)
top10_df = pd.DataFrame(data = {"Host_ID":id10, "Name": name10,  "Listings":lst10})
top10_df.head(10)

The top 10 hosts by listings are : 


Unnamed: 0,Host_ID,Name,Listings
0,359036978,Kia,128164
1,104309976,WanderJaunt,17161
2,109638288,TurnKey Vacation Rentals,6724
3,8167447,Martin,6561
4,118565935,Renters Club,3136
5,346827202,Madalynn,2304
6,194953121,Christian,2025
7,210733801,Above,1849
8,368944610,Kia,1600
9,124060715,Evolve Vacation Rental,1444


## #4 Personal Metric
