Analyzing Availability by Zip Code in Austin, Texas


In [25]:
# Import required libraries for analysis
import pandas as pd
from pathlib import Path
import hvplot.pandas

Import required files for visualization

In [27]:
# Read in the Airbnb data csv file
airbnb_df = pd.read_csv(Path('resources/austin_listings.csv'))

# Drop the columns we will not need for this analysis
airbnb_df.drop(columns=['minimum_nights', 'number_of_reviews','neighbourhood_group','host_name','host_id','name','calculated_host_listings_count','number_of_reviews_ltm','license','last_review','id'], inplace=True)

In [28]:
# Create a new dataframe that includes only Entire home/apt room types
entire_home_df = airbnb_df.loc[airbnb_df['room_type'] == 'Entire home/apt']

# Group the new dataframe by neighborhood and calculate the mean of the columns
home_zip_df = entire_home_df.groupby('neighbourhood').mean()
home_zip_df

Unnamed: 0_level_0,latitude,longitude,price,reviews_per_month,availability_365
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
76530,30.756911,-97.506575,502.714286,2.577143,241.714286
76574,30.554429,-97.417301,151.285714,2.248462,263.571429
78602,30.123666,-97.308228,219.340000,1.988333,261.900000
78605,30.740370,-98.024428,105.400000,3.320000,307.000000
78610,30.082712,-97.838870,208.341463,2.130345,245.341463
...,...,...,...,...,...
78754,30.349887,-97.651665,199.573333,1.864468,214.866667
78756,30.321080,-97.737616,237.000000,1.814000,183.748031
78757,30.347589,-97.730093,199.625698,1.334631,131.614525
78758,30.396229,-97.712616,172.164589,1.949444,209.453865


In [29]:
# Plot the average availability by zip code in Austin
home_zip_df.hvplot.points(
    'longitude',
    'latitude', 
    geo=True, 
    color='availability_365',
    tiles='OSM',
    title='Annual Average Availability by Zip Code',
    hover_cols='neighbourhood',
    xlabel='',
    ylabel='',
    width=700,
    height=500,
    s=50
)



In [30]:
# Sort the dataframe by availability to find the most and least popular zip codes
home_zip_df = home_zip_df.sort_values(['availability_365'], ascending=False)
display(home_zip_df.head())
display(home_zip_df.tail())

Unnamed: 0_level_0,latitude,longitude,price,reviews_per_month,availability_365
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
78655,29.74905,-97.780981,145.393162,1.052941,315.581197
78605,30.74037,-98.024428,105.4,3.32,307.0
78656,29.862377,-97.856747,473.26087,1.921818,291.217391
78636,30.264814,-98.394493,231.58871,2.000924,279.387097
78611,30.721852,-98.399604,452.285714,1.281728,275.267857


Unnamed: 0_level_0,latitude,longitude,price,reviews_per_month,availability_365
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
78749,30.215793,-97.850432,292.544304,1.759219,114.455696
78739,30.182573,-97.887144,358.52,1.229412,107.08
78726,30.418496,-97.851203,175.285714,2.07,106.428571
78722,30.288238,-97.71604,237.818182,1.911942,90.587879
78742,30.233328,-97.671309,314.0,1.055714,69.444444


In [21]:
# Create a dataframe to visualize the top five neighborhoods in Austin 
popular_neighborhoods_df = home_zip_df.loc[(78749,78739,78726,78722,78742),:]

# Drop the columns we will not use
popular_neighborhoods_df = popular_neighborhoods_df.drop(['latitude','longitude','reviews_per_month'],axis=1)
popular_neighborhoods_df

Unnamed: 0_level_0,price,availability_365
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1
78749,292.544304,114.455696
78739,358.52,107.08
78726,175.285714,106.428571
78722,237.818182,90.587879
78742,314.0,69.444444


In [22]:
# Create a bar graph to view the most popular neighborhoods average price compared
popular_neighborhoods_df.hvplot.bar(
    x='neighbourhood',
    xlabel='Austin Zip Codes',
    width=700,
    height=500,
    rot=65,
    title='Top Five Most Popular Neighborhoods to Rent in Austin'
)

In [32]:
# Create a new dataframe to analyze the least popular zip codes in Austin
unpop_neighborhood_df = home_zip_df.loc[(78655,78605,78656,78636,78611),:]

# Drop unnecessary columns for the analysis
unpop_neighborhood_df = unpop_neighborhood_df.drop(['latitude','longitude','reviews_per_month'], axis=1)
unpop_neighborhood_df

Unnamed: 0_level_0,price,availability_365
neighbourhood,Unnamed: 1_level_1,Unnamed: 2_level_1
78655,145.393162,315.581197
78605,105.4,307.0
78656,473.26087,291.217391
78636,231.58871,279.387097
78611,452.285714,275.267857


In [33]:
# Create a chart to visualize the least popular neighborhood average prices
unpop_neighborhood_df.hvplot.bar(
    x='neighbourhood',
    xlabel='Austin Zip Codes',
    width=700,
    height=500,
    rot=65,
    title='Top Five Least Popular Neighborhoods to Rent in Austin'
)