
# Contents

01. Importing libraries
02. Importing data
03. Data wrangling and cleaning
04. Setting up a classification variable to display on the Choropleth Map
05. Creating the Choropleth Map
06. Saving and exporting the Choropleth Map

### 01. Importing libraries

In [1]:
#Import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import os
import folium

In [2]:
# Allow matplotlib visuals
%matplotlib inline

### 02. Importing data

In [3]:
# Import main folder
path = r'C:\Users\amwil\OneDrive\Career Foundry Project Work\07-2021 ABNB NYC Data Analysis'

In [4]:
# Import json file for NYC Boroughs
nyc_boroughs = r'C:\Users\amwil\OneDrive\Career Foundry Project Work\07-2021 ABNB NYC Data Analysis\02 Data\Original Data\NYC Boroughs.json'

In [5]:
# Import AB_NYC_2019_cleaned.csv
df = pd.read_csv(os.path.join(path,'02 Data', 'Prepared Data', 'AB_NYC_2019_Visually_Explored.csv'), index_col = False)

### 03. Data wrangling and cleaning

In [6]:
#Check beginning of df
df.head()

Unnamed: 0,Listing id,Host id,Borough,Neighborhood,Latitude,Longitude,Room type,Price,Minimum nights for rent,Number of reviews,Last review,Reviews per month,Number of listings per host,Availability per year,Price category
0,2539,2787,Brooklyn,Kensington,40.64749,-73.97237,Private room,149.0,1.0,9,2018-10-19,0.21,6,365,Middle price
1,2595,2845,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225.0,1.0,45,2019-05-21,0.38,2,355,High price
2,3647,4632,Manhattan,Harlem,40.80902,-73.9419,Private room,150.0,3.0,0,2019-06-23,1.373221,1,365,Middle price
3,3831,4869,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89.0,1.0,270,2019-07-05,4.64,1,194,Low price
4,5022,7192,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80.0,10.0,9,2018-11-19,0.1,1,0,Low price


In [7]:
# Check shape of df
df.shape

(48895, 15)

In [8]:
# Rename columns (Borough, Price) to match geoJSON
df.rename(columns = {'Borough' : 'borough'}, inplace = True)
df.rename(columns = {'Price' : 'price'}, inplace = True)

In [9]:
# Recheck beginning of df
df.head()

Unnamed: 0,Listing id,Host id,borough,Neighborhood,Latitude,Longitude,Room type,price,Minimum nights for rent,Number of reviews,Last review,Reviews per month,Number of listings per host,Availability per year,Price category
0,2539,2787,Brooklyn,Kensington,40.64749,-73.97237,Private room,149.0,1.0,9,2018-10-19,0.21,6,365,Middle price
1,2595,2845,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225.0,1.0,45,2019-05-21,0.38,2,355,High price
2,3647,4632,Manhattan,Harlem,40.80902,-73.9419,Private room,150.0,3.0,0,2019-06-23,1.373221,1,365,Middle price
3,3831,4869,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89.0,1.0,270,2019-07-05,4.64,1,194,Low price
4,5022,7192,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80.0,10.0,9,2018-11-19,0.1,1,0,Low price


### 04. Setting up a classification variable to display on the Choropleth Map

In [10]:
# Check Price category frequency
df['Price category'].value_counts(dropna = False)

Low price       21877
Middle price    18996
High price       8022
Name: Price category, dtype: int64

In [11]:
# Select 'Low price' as a layer as it has the most values
hist_indicator =  'Low price' 

# Create a mask for the map
mask1 = df['Price category'].str.contains(hist_indicator) 

# Apply the mask
stage = df[mask1]

# View results
stage.head() 

Unnamed: 0,Listing id,Host id,borough,Neighborhood,Latitude,Longitude,Room type,price,Minimum nights for rent,Number of reviews,Last review,Reviews per month,Number of listings per host,Availability per year,Price category
3,3831,4869,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89.0,1.0,270,2019-07-05,4.64,1,194,Low price
4,5022,7192,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80.0,10.0,9,2018-11-19,0.1,1,0,Low price
6,5121,7356,Brooklyn,Bedford-Stuyvesant,40.68688,-73.95596,Private room,60.0,2.0,49,2017-10-05,0.4,1,0,Low price
7,5178,8967,Manhattan,Hell's Kitchen,40.76489,-73.98493,Private room,79.0,2.0,430,2019-06-24,3.47,1,220,Low price
8,5203,7490,Manhattan,Upper West Side,40.80178,-73.96723,Private room,79.0,2.0,118,2017-07-21,0.99,1,0,Low price


In [12]:
# Create a data frame with the boroughs and the values for price

data_to_plot = stage[['borough','price']]
data_to_plot.head()

Unnamed: 0,borough,price
3,Brooklyn,89.0
4,Manhattan,80.0
6,Brooklyn,60.0
7,Manhattan,79.0
8,Manhattan,79.0


### 05. Creating the Choropleth Map

In [13]:
# Setup a folium map 
m = folium.Map([40.730610, -73.935242], zoom_start=10)

# Create choropleth using folium
folium.Choropleth(
             geo_data = nyc_boroughs, 
             data = data_to_plot,
             columns = ['borough', 'price'],
             key_on = 'feature.properties.borough', 
             fill_color = 'YlOrRd', 
             fill_opacity=0.7, 
             line_opacity=0.2,
             legend_name = "price",
             highlight=True
).add_to(m)

m

#### Q9. Discuss the results and what they mean in a markdown section. 
#### * Does the analysis answer any of your existing research questions? Partially, this analysis gives me more information about the distribution of low price rentals in each borough.  
#### * Does the analysis lead you to any new research questions? Perhaps, what is the distribution of the other price categories among the different boroughs? Having created the choropleth map, it would be easy to run the code for the others to find out.

### 06. Saving and exporting the Choropleth Map

In [14]:
# Save the choropleth map
m.save(r'C:\Users\amwil\OneDrive\Career Foundry Project Work\07-2021 ABNB NYC Data Analysis\04 Analysis\Visualizations\m.html', index= False)