In [None]:
# Importing the requisite packages
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import pysal
import splot

In [None]:
### Setting the file path
data_path = r"Your File Path"

In [None]:
# Reading in the data - REMINDER: You'll need to download this data yourself from InsideAirbnb
listings = pd.read_csv(data_path + 'NY Airbnb June 2020\listings.csv.gz', compression='gzip')

## Exploratory Data Analysis

In [None]:
print(listings.columns)

In [None]:
# Subsetting the data

listings_sub = listings[['id','property_type','neighbourhood_cleansed', 'neighbourhood_group_cleansed','beds','bathrooms', 'price','latitude','longitude']]

listings_sub.head()

In [None]:
listings_sub.info()

In [None]:
# Cleaning the data
listings_sub.drop(columns=['beds','bathrooms'], inplace=True)
listings_sub["price"] = listings_sub["price"].replace("[$,]", "", regex=True).astype(float)

In [None]:
listings_sub

In [None]:
listings_sub['price'].describe()

In [None]:
%matplotlib inline
plt.hist(listings_sub['price'], bins=25, density=True, alpha=0.6)
plt.show()

In [None]:
import seaborn as sns
sns.distplot(listings_sub['price'], kde=True)
plt.show()

## Exploratory Spatial Data Analysis

In [None]:
# Convert the pandas dataframe to a geopandas dataframe
listings_sub_gpd = gpd.GeoDataFrame(listings_sub, geometry=gpd.points_from_xy(listings_sub.longitude, listings_sub.latitude, crs=4326))

In [None]:
import geoplot.crs as gcrs
import geoplot as gplt
ax = gplt.webmap(listings_sub_gpd,projection=gcrs.WebMercator())
gplt.pointplot(listings_sub_gpd, ax=ax)

In [None]:
# Reading in the New York Census Tracts
NY_tracts_path = "https://www2.census.gov/geo/tiger/TIGER2021/TRACT/tl_2021_36_tract.zip"
NY_Tracts = gpd.read_file(NY_tracts_path)
NY_Tracts = NY_Tracts.to_crs(4326)

# Subsetting the census tracts to those in the New York CBSA
cbsa_path = 'https://www2.census.gov/geo/tiger/TIGER2021/CBSA/tl_2021_us_cbsa.zip'
cbsas = gpd.read_file(cbsa_path)
NY_cbsa = cbsas[cbsas['GEOID']=='35620']

In [None]:
# Borough Boundaries
Boroughs = gpd.read_file(data_path + r"NYC Boroughs\nybb_22a\nybb.shp")
Boroughs = Boroughs.to_crs(4326)
Boroughs.plot()

In [None]:
# Creating a heatmap of Airbnb locations

# Creating a heatmap raster from the points
ax = gplt.kdeplot(
    listings_sub_gpd,shade=True, cmap='Reds',
    clip=Boroughs.geometry,
    projection=gcrs.WebMercator())

# Plotting the heatmap on top of the boroughs for context
gplt.polyplot(Boroughs, ax=ax, zorder=1)

In [None]:
# Reading in the New York Census Tracts
NY_tracts_path = "https://www2.census.gov/geo/tiger/TIGER2021/TRACT/tl_2021_36_tract.zip"
NY_Tracts = gpd.read_file(NY_tracts_path)
NY_Tracts = NY_Tracts.to_crs(4326)

# Subsetting the census tracts to those in the New York CBSA
cbsa_path = 'https://www2.census.gov/geo/tiger/TIGER2021/CBSA/tl_2021_us_cbsa.zip'
cbsas = gpd.read_file(cbsa_path)
NY_cbsa = cbsas[cbsas['GEOID']=='35620']

mask = NY_Tracts.intersects(NY_cbsa.loc[620,'geometry'])
NY_Tracts_subset = NY_Tracts.loc[mask]

# Aggregating the airbnb locations to the NY census tracts
NY_Tracts_sj = gpd.sjoin(NY_Tracts_subset, listings_sub_gpd, how='left', op='contains')
NY_Tracts_sj = NY_Tracts_sj[['GEOID','price','geometry']]
NY_Tracts_Agg = NY_Tracts_sj.dissolve(by='GEOID', aggfunc='mean')

NY_Tracts_Agg

In [None]:
gplt.choropleth(NY_Tracts_Agg, hue="price", cmap="Greens", figsize=(60,30), legend=True)

In [None]:
import geoviews
geoviews.extension("bokeh")

choropleth = geoviews.Polygons(data=NY_Tracts_Agg, vdims=["price","GEOID"])

choropleth.opts(height=600, width=900, title="NYC Airbnb Price",
                tools=["hover"], cmap="Greens", colorbar=True, colorbar_position="bottom")

In [None]:
import statistics

# Calculating the mean and standard deviation
mean_price = statistics.mean(NY_Tracts_Agg['price'].dropna())
stdev = statistics.stdev(NY_Tracts_Agg['price'].dropna())

print("The mean Airbnb price is: % s " % (round(mean_price, 2)))
print("The standard deviation of Airbnb prices is: % s " % (round(stdev, 2)))

# Droping records that are more than 1 standard deviation from the mean
NY_Tracts_Agg_filtered = NY_Tracts_Agg[NY_Tracts_Agg['price'] < mean_price+stdev]

choropleth = geoviews.Polygons(data=NY_Tracts_Agg_filtered, vdims=["price","GEOID"])

choropleth.opts(height=600, width=900, title="NYC Airbnb Price",
                tools=["hover"], cmap="Greens", colorbar=True, colorbar_position="bottom")