# New York City Airbnb Open Data
Let's use this Dataset from 2019 available on Kaggle:

👉 kaggle.com/dgomonov/new-york-city-airbnb-open-data

In [None]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
listings_df = pd.read_csv('../airbnb/AB_NYC_2019.csv')

In [None]:
listings_df

## Quick look

In [None]:
listings_df.shape

In [None]:
listings_df.columns

In [None]:
listings_df.dtypes

In [None]:
listings_df.head()

In [None]:
listings_df.tail()

In [None]:
listings_df.describe()

## Cleaning Data

In [None]:
listings_df.isnull().sum()

In [None]:
columns_to_drop = ['id', 'host_name', 'last_review']

listings_df.drop(columns_to_drop, axis=1, inplace=True)

listings_df

In [None]:
listings_df.fillna({'reviews_per_month': 0}, inplace=True)

In [None]:
listings_df

In [None]:
listings_df.hist(bins = 100, figsize= (20,15))

## Columns & Rows

In [None]:
listings_df['name'] # => Series

In [None]:
listings_df[['name', 'neighbourhood_group', 'price']] # => DataFrame

In [None]:
listings_df[5:10]

In [None]:
listings_df[5:10][['name', 'neighbourhood_group', 'price']]

In [None]:
listings_df['price'] < 100

In [None]:
listings_df[listings_df['price'] < 100]

## Exploratory Data Analysis

In [None]:
# 🤔 What are the 10 most reviewed listings?

listings_df.nlargest(10, 'number_of_reviews')

In [None]:
# 🤔 What are the NY neighourhood groups with listings?

listings_df['neighbourhood_group'].unique()

In [None]:
# 🤔 Follow-up: How many listings per neighbourhood group?

listings_df['neighbourhood_group'].value_counts()

In [None]:
# 🤔 What are the Top 10 neighbourhoods with Airbnb listings?

listings_df['neighbourhood'].value_counts().head(10)

In [None]:
listings_df['neighbourhood'].value_counts().head(10).plot(kind='bar')

In [None]:
sns.countplot(data=listings_df, x='neighbourhood_group')

In [None]:
order = listings_df['neighbourhood_group'].value_counts().index

sns.countplot(data=listings_df, x='neighbourhood_group', order=order)

In [None]:
# 🤔 What's the influence neighbourhood group on room type?

# FYI, here are the room types:

listings_df['room_type'].unique()

In [None]:
sns.countplot(data=listings_df, x="neighbourhood_group", hue="room_type")

In [None]:
# 🤔 What is the distribution of listing prices?

# We can use a Distplot:

sns.distplot(listings_df['price'])

In [None]:
# How many listings are more expensive than 500$/night? Could we remove those outliers from the visualization? What technique can we use?

affordable_df = listings_df[listings_df['price'] <= 500]
sns.distplot(affordable_df['price'])

#nombrar esas curvas, sentimiento humano de ponerlo mas barato

In [None]:
listings_df.price.mean() 

In [None]:
affordable_df.price.mean()

In [None]:
# 🤔 What is the distribution of listing prices based on the Neighbourhood Group?

#We can use a Violinplot:

plt.figure(figsize=(14,10))
sns.violinplot(data=affordable_df, x="neighbourhood_group", y="price")

In [None]:

affordable_df.plot(kind='scatter',x='longitude',y='latitude', figsize=(14,10)) #alpha = 0.1

In [None]:
# 🤔 Can we plot the listings on a map?

# Back to matplotlib, we can use a scatterplot:
    
affordable_df.plot(
kind='scatter',
x='longitude',
y='latitude',
c='price',
cmap='inferno',
colorbar=True,
alpha=0.8,
figsize=(14,10))

In [None]:
# Bonus: Adding NYC map underneath!

background_image = plt.imread('https://raw.githubusercontent.com/lewagon/data-images/master/workshops/Neighbourhoods_New_York_City_Map.png')
plt.imshow(background_image, zorder=0, extent=[-74.258, -73.7, 40.49, 40.92])

ax = plt.gca()
affordable_df.plot(
  ax=ax,
  zorder=1,
  kind='scatter',
  x='longitude',
  y='latitude',
  c='price',
  cmap='inferno',
  colorbar=True,
  alpha=0.8,
  figsize=(14,10))

In [None]:
import folium

map = folium.Map(location = [40.42,-74.00], zoom_starts = 5)
map

In [None]:
districts = affordable_df[['latitude', 'longitude']].to_numpy()
districts

In [None]:
from folium import plugins
from folium.plugins import HeatMap

HeatMap(districts, radius= 15, max_val= 0.1, min_opacity = 0.4).add_to(map)

map