## DATA ANALYTICS

In [None]:
import pandas as pd
import seaborn as sns

## Loading a csv into a DataFrame

In [None]:
listing_df = pd.read_csv('data/AB_NYC_2019.csv')

## Quick look at the data

In [None]:
listing_df.shape

In [None]:
listing_df.columns

In [None]:
listing_df.dtypes

In [None]:
listing_df.head()

## Cleaning Data

### How many nulls

In [None]:
listing_df.isnull().sum()

### Drop irrelevant columns

In [None]:
columns_to_drop = ['id', 'host_name', 'last_review']
listing_df.drop(columns_to_drop, axis='columns', inplace=True)

### Replace NaN with 0

In [None]:
listing_df.fillna({'reviews_per_month': 0}, inplace=True)

## Filtering

### Columns (by name)

In [None]:
listing_df['name']

In [None]:
listing_df[['name', 'neighbourhood_group', 'price']]

### Rows (by index)

In [None]:
listing_df[5:10]

## Boolean indexing

In [None]:
listing_df['price'] < 100

In [None]:
listing_df[listing_df['price'] < 100]

## Exploratory Data analysis

Let's start asking some questions to our dataset !

### 🤔 What are the 10 most reviewed listings?

In [None]:
listing_df.nlargest(10, 'number_of_reviews')

### 🤔 What are the NY neighourhood groups with listings?

In [None]:
listing_df['neighbourhood_group'].unique()

### 🤔 Follow-up: How many listings per neighbourhood group?

In [None]:
listing_df['neighbourhood_group'].value_counts()

### 🤔 What are the Top 10 neighbourhoods with Airbnb listings?

In [None]:
listing_df['neighbourhood'].value_counts().head(10)

In [None]:
listing_df['neighbourhood'].value_counts().head(10).plot(kind='bar')

In [None]:
sns.countplot(data=listing_df, x='neighbourhood_group')

In [None]:
order = listing_df['neighbourhood_group'].value_counts().index
sns.countplot(data=listing_df, x='neighbourhood_group', order=order)

### 🤔 What's the influence neighbourhood group on room type?

In [None]:
listing_df['room_type'].unique()

In [None]:
sns.countplot(data=listing_df, x="neighbourhood_group", hue="room_type")

### 🤔 What is the distribution of flat prices?

In [None]:
sns.distplot(listing_df['price'])

In [None]:
affordable_df = listing_df[listing_df['price'] <= 500]
sns.distplot(affordable_df['price'])

In [None]:
listing_df.price.mean()

In [None]:
affordable_df.price.mean()

### 🤔 What is the distribution of flat prices based on the Neighbourhood Group?

In [None]:
sns.violinplot(data=affordable_df, x="neighbourhood_group", y="price")

### 🤔 Can we plot the listings on a map?

In [None]:
affordable_df.plot(
    kind='scatter',
    x='longitude',
    y='latitude',
    c='price',
    cmap='inferno',
    colorbar=True,
    alpha=0.8,
    figsize=(12,8))

In [None]:
import matplotlib.pyplot as plt
import urllib
i = urllib.request.urlopen('https://upload.wikimedia.org/wikipedia/commons/e/ec/Neighbourhoods_New_York_City_Map.PNG')
plt.imshow(plt.imread(i), zorder=0, extent=[-74.258, -73.7, 40.49, 40.92])
ax = plt.gca()
affordable_df.plot(
    ax=ax,
    zorder=1,
    kind='scatter',
    x='longitude',
    y='latitude',
    c='price',
    cmap='inferno',
    colorbar=True,
    alpha=0.8,
    figsize=(12,8))