In [16]:
import pandas as pd
from pathlib import Path
import hvplot.pandas

In [2]:
# Read the CSV File as a Dataframe
airbnb_df = pd.read_csv(Path('listings.csv'))

In [3]:
#See the column names to exclude unnecessary ones
for col in airbnb_df.columns:
    print(col)

id
listing_url
scrape_id
last_scraped
name
description
neighborhood_overview
picture_url
host_id
host_url
host_name
host_since
host_location
host_about
host_response_time
host_response_rate
host_acceptance_rate
host_is_superhost
host_thumbnail_url
host_picture_url
host_neighbourhood
host_listings_count
host_total_listings_count
host_verifications
host_has_profile_pic
host_identity_verified
neighbourhood
neighbourhood_cleansed
neighbourhood_group_cleansed
latitude
longitude
property_type
room_type
accommodates
bathrooms
bathrooms_text
bedrooms
beds
amenities
price
minimum_nights
maximum_nights
minimum_minimum_nights
maximum_minimum_nights
minimum_maximum_nights
maximum_maximum_nights
minimum_nights_avg_ntm
maximum_nights_avg_ntm
calendar_updated
has_availability
availability_30
availability_60
availability_90
availability_365
calendar_last_scraped
number_of_reviews
number_of_reviews_ltm
number_of_reviews_l30d
first_review
last_review
review_scores_rating
review_scores_accuracy
review_sc

In [4]:
# Made a new dataframe that has only the columns we are interested in.

clean_airbnb_df = airbnb_df.loc[:, ('id', 'host_neighbourhood', 'neighbourhood_cleansed', 'latitude', 'longitude', 'room_type', 'accommodates', 'bedrooms', 'bathrooms_text', 'price', 'review_scores_rating')]
clean_airbnb_df.head()

Unnamed: 0,id,host_neighbourhood,neighbourhood_cleansed,latitude,longitude,room_type,accommodates,bedrooms,bathrooms_text,price,review_scores_rating
0,5456.0,East Downtown,78702,30.26057,-97.73441,Entire home/apt,3,1.0,1 bath,$111.00,4.83
1,5769.0,SW Williamson Co.,78729,30.45697,-97.78422,Private room,2,1.0,1 shared bath,$39.00,4.9
2,6413.0,Travis Heights,78704,30.24885,-97.73587,Entire home/apt,2,,1 bath,$66.00,4.97
3,6448.0,Zilker,78704,30.26034,-97.76487,Entire home/apt,3,1.0,1 bath,$149.00,4.97
4,341382.0,East Downtown,78702,30.26543,-97.71338,Entire home/apt,10,4.0,2 baths,$750.00,4.72


In [5]:
# Finding how many of each type of room are available because we only want entire homes/apt.
clean_airbnb_df.value_counts('room_type')

room_type
Entire home/apt    14123
Private room        2778
Shared room          123
Hotel room            47
dtype: int64

In [6]:
# Dropping all types of rooms that aren't entire homes/apartments from the dataframe.
values = ['Private room', 'Shared room', 'Hotel room']
clean_airbnb_df = clean_airbnb_df[clean_airbnb_df.room_type.isin(values) == False]
clean_airbnb_df.head()

Unnamed: 0,id,host_neighbourhood,neighbourhood_cleansed,latitude,longitude,room_type,accommodates,bedrooms,bathrooms_text,price,review_scores_rating
0,5456.0,East Downtown,78702,30.26057,-97.73441,Entire home/apt,3,1.0,1 bath,$111.00,4.83
2,6413.0,Travis Heights,78704,30.24885,-97.73587,Entire home/apt,2,,1 bath,$66.00,4.97
3,6448.0,Zilker,78704,30.26034,-97.76487,Entire home/apt,3,1.0,1 bath,$149.00,4.97
4,341382.0,East Downtown,78702,30.26543,-97.71338,Entire home/apt,10,4.0,2 baths,$750.00,4.72
5,8502.0,East Riverside,78741,30.23466,-97.73682,Entire home/apt,2,1.0,1 bath,$44.00,4.53


In [7]:
# See how many of each "accommodates" to see if it's worth filtering through
clean_airbnb_df.value_counts('accommodates')

accommodates
4     3453
6     2399
2     2374
8     1240
3     1064
5      969
10     712
7      464
16     417
12     360
9      194
14     173
11      97
1       84
13      63
15      60
dtype: int64

In [8]:
# Clean the Airbnb data of the symbols and convert price to a float
clean_airbnb_df.loc[:, 'price'] = clean_airbnb_df.loc[:, 'price'].str.replace('$', '')
clean_airbnb_df.loc[:, 'price'] = clean_airbnb_df.loc[:, 'price'].str.replace(',', '')
clean_airbnb_df.loc[:, 'price'] = clean_airbnb_df.loc[:, 'price'].astype('float')

  


In [9]:
clean_airbnb_df.dtypes

id                        float64
host_neighbourhood         object
neighbourhood_cleansed      int64
latitude                  float64
longitude                 float64
room_type                  object
accommodates                int64
bedrooms                  float64
bathrooms_text             object
price                     float64
review_scores_rating      float64
dtype: object

In [10]:
#Rename columns
clean_airbnb_df = clean_airbnb_df.rename(columns = {'neighbourhood_cleansed' : 'Zip Code', 'accommodates' : 'Accommodates', 'bedrooms' : 'Bedrooms', 
                                                    'price' : 'Avg Price', 'review_scores_rating' : 'Avg Rating'})

clean_airbnb_df

Unnamed: 0,id,host_neighbourhood,Zip Code,latitude,longitude,room_type,Accommodates,Bedrooms,bathrooms_text,Avg Price,Avg Rating
0,5.456000e+03,East Downtown,78702,30.260570,-97.734410,Entire home/apt,3,1.0,1 bath,111.0,4.83
2,6.413000e+03,Travis Heights,78704,30.248850,-97.735870,Entire home/apt,2,,1 bath,66.0,4.97
3,6.448000e+03,Zilker,78704,30.260340,-97.764870,Entire home/apt,3,1.0,1 bath,149.0,4.97
4,3.413820e+05,East Downtown,78702,30.265430,-97.713380,Entire home/apt,10,4.0,2 baths,750.0,4.72
5,8.502000e+03,East Riverside,78741,30.234660,-97.736820,Entire home/apt,2,1.0,1 bath,44.0,4.53
...,...,...,...,...,...,...,...,...,...,...,...
17061,5.202809e+07,,78641,30.438200,-97.893440,Entire home/apt,16,7.0,4.5 baths,1042.0,5.00
17066,5.397320e+17,,78640,30.011284,-97.839622,Entire home/apt,8,3.0,2.5 baths,163.0,4.69
17067,5.869100e+17,,78626,30.612930,-97.574600,Entire home/apt,10,4.0,3 baths,270.0,4.84
17069,4.181946e+07,,78676,29.955320,-98.161201,Entire home/apt,2,1.0,1 bath,254.0,4.96


In [18]:
# Find the average cost in each zip code.
avg_price = clean_airbnb_df.groupby('Zip Code').mean()
avg_price

Unnamed: 0_level_0,id,latitude,longitude,Accommodates,Bedrooms,Avg Price,Avg Rating
Zip Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
76530,1.706011e+17,30.756911,-97.506575,9.000000,5.571429,502.714286,4.828571
76574,8.671036e+16,30.554429,-97.417301,5.642857,2.571429,151.285714,4.784615
78602,1.010234e+17,30.123666,-97.308228,5.750000,2.103093,219.340000,4.897778
78605,4.396018e+07,30.740370,-98.024428,3.400000,1.666667,105.400000,4.832000
78610,1.642750e+17,30.082712,-97.838870,6.341463,2.354430,208.341463,4.801897
...,...,...,...,...,...,...,...
78754,7.834579e+16,30.349887,-97.651665,5.546667,2.364865,199.573333,4.742766
78756,7.860179e+16,30.321080,-97.737616,4.433071,1.784483,237.000000,4.808900
78757,7.215021e+16,30.347589,-97.730093,4.486034,1.994186,199.625698,4.777987
78758,1.339052e+17,30.396229,-97.712616,4.194514,1.719895,172.164589,4.647607


In [21]:
avg_price.hvplot.bar(x = 'Zip Code',
                     y = 'Avg Price',
                     title = 'Average Price by Zip Code',
                     rot = 90)

By this graph you can see that the zipcodes that have the highest average price per night are:

78732 at $1,798, 

78730 at $1,239, 

78733 at $1,184, 

and 78746 at $1,045.

In [11]:
# Find the averages
avg_price_by_neighbourhood_accommodates = clean_airbnb_df.groupby(['Zip Code', 'Accommodates']).mean()

# Drop extra columns
avg_price_by_neighbourhood_accommodates = avg_price_by_neighbourhood_accommodates.drop(columns = ['id', 'latitude', 'longitude'])

avg_price_by_neighbourhood_accommodates

Unnamed: 0_level_0,Unnamed: 1_level_0,Bedrooms,Avg Price,Avg Rating
Zip Code,Accommodates,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
76530,2,1.000000,90.0,5.000000
76530,5,2.000000,182.0,5.000000
76530,7,3.000000,234.0,4.860000
76530,8,2.000000,249.0,4.740000
76530,9,3.000000,293.0,5.000000
...,...,...,...,...
78759,7,3.000000,583.0,4.640000
78759,8,3.466667,362.2,4.869091
78759,9,4.000000,614.0,5.000000
78759,10,4.500000,395.0,5.000000


In [17]:
avg_price_by_neighbourhood_accommodates.hvplot.bar(x = 'Accommodates',
                                                   y = 'Avg Price',
                                                   groupby = 'Zip Code',
                                                   xlabel = 'Accommodates',
                                                   title = 'Average Price of Accommodation Number by Zip Code')

It depends on the zip code, but on average, the homes that make the most money are the homes that house the most people at one time. 

In [13]:
avg_price_by_neighbourhood_accommodates.hvplot.bar(x = 'Accommodates',
                                                   y = 'Bedrooms',
                                                   groupby = 'Zip Code',
                                                   xlabel = 'Accommodates',
                                                   title = 'Average Bedrooms of Accommodation Number by Zip Code')

In [22]:
avg_price_by_neighbourhood_accommodates.hvplot.bar(x = 'Accommodates',
                                                   y = 'Avg Rating',
                                                   groupby = 'Zip Code',
                                                   xlabel = 'Accommodates',
                                                   title = 'Average Rating of Accommodation Number by Zip Code')

In [29]:
avg_price.hvplot.points('longitude',
                              'latitude',
                              geo = True,
                              color = 'Avg Price',
                              tiles = 'OSM',
                              frame_width = 700,
                              frame_height = 500)