## Analysing Airbnb listings in Barcelona - Visualizing geographical distributed Data

In [1]:
# import all necessary libraries for this project

import pandas as pd
import matplotlib.pyplot as plt
import geopandas as gpd
import contextily as ctx
import numpy as np
import json as js

In [None]:
# The barris file contains geographical information (polygon object) about Barcelona neighborhoods.
barris = gpd.read_file("Data/barris.geojson")

In [None]:
# Plot the graph of the neighborhood to see what we are working with
barris.plot(figsize=(10,10), alpha=0.5, edgecolor='b')

In [None]:
#read listings 24 into a dataframe, hence df24.... lack of a better name
df24= pd.read_csv("Data/listings_24.csv")

#gdf will be the geo dataframe of the geometry points of listings 24
gdf = gpd.GeoDataFrame(df24, geometry=gpd.points_from_xy(df24.longitude, df24.latitude))

In [None]:
#plot the graphs (or picture)
axe = barris.plot(figsize=(10,10), alpha=0.5, edgecolor='b')
ax1 = gdf.plot(ax=axe, figsize=(10,10), alpha=0.5, markersize=2.2, edgecolor='r')

To make the map look nicer, we add a basemap 

In [None]:
# change the coordinaate reference system so the polygons for both maps march
barris = barris.to_crs(epsg=3857)

#read in geojson file containing the districts.
dists = gpd.read_file("Data/districts.geojson")

#Just to experiment a bit, we make the edge of Eixample, a district, red.
dists1 = dists[dists["NOM"]=='Eixample']
dists1 = dists1.to_crs(epsg=3857)

#Plot the maps (or graphs)
axe = barris.plot(figsize=(10,10), alpha=0.5, edgecolor='b')
ctx.add_basemap(axe)

#Use the barris as the axis for this map
dists1.plot(ax=axe, figsize=(10,10), alpha=0.6, edgecolor='r')


#### We have four listings (Listings 24, 30, 33 and 45).
For each one, we will calculate the following for Airbnb Entire home/apt room type
    - the total number of listings for each neighborhood
    - the density of the listings for each neighborhood
    - the median price of the listings for each neighborhood
**All NaN "cells" are replaced with zero

#### Area of the Barris neigborhood

In [None]:
area = barris[['NOM', 'AREA']].sort_values(by='NOM').reset_index()
area

### Listings 24

In [None]:
df24= pd.read_csv("Data/listings_24.csv")
new_df24 = df24[df24['room_type']=="Entire home/apt"]

In [None]:
list24 = new_df24.groupby(["neighbourhood"]).agg(count_col=pd.NamedAgg(column="neighbourhood", aggfunc="count")).reset_index()

In [None]:
#Density
density_24 = list24['count_col']/area['AREA']
density_24.fillna(0)

In [None]:
#Median
median_24 = new_df24.groupby(["neighbourhood"]).median().reset_index()

_24median = pd.DataFrame(median_24[["neighbourhood","price"]])
_24median = _24median.rename(columns={'price' : 'price_24'})

_24median.fillna(0)


### Listing 30

In [None]:
df30= pd.read_csv("Data/listings_30.csv")
new_df30 = df30[df30['room_type']=="Entire home/apt"]

In [None]:
#Total Number
list30 = new_df30.groupby(["neighbourhood"]).agg(count_col=pd.NamedAgg(column="neighbourhood", aggfunc="count")).reset_index()

In [None]:
#Density
density_30 = list30['count_col']/area['AREA']
density_30.fillna(0)

In [None]:
#Median
median30 = new_df30.groupby(["neighbourhood"]).median().reset_index()
_30median = pd.DataFrame(median30[["neighbourhood","price"]])
_30median = _30median.rename(columns={'price' : 'price_30'})
_30median.fillna(0)

### Listings 33

In [None]:
#Total Number
df33= pd.read_csv("Data/listings_33.csv")
new_df33 = df30[df30['room_type']=="Entire home/apt"]

list33 = new_df33.groupby(["neighbourhood"]).agg(count_col=pd.NamedAgg(column="neighbourhood", aggfunc="count")).reset_index()
list33.fillna(0)

In [None]:
#Density
density_33 = list33['count_col']/area['AREA']
density_33.fillna(0)

In [None]:
#Median
median33 = new_df33.groupby(["neighbourhood"]).median().reset_index()
median30["price"]
_33median = pd.DataFrame(median33[["neighbourhood","price"]])
_33median = _33median.rename(columns={'price' : 'price_33'})
_33median.fillna(0)

### Listing 45

In [None]:
df45= pd.read_csv("Data/listings_45.csv")
new_df45 = df45[df45['room_type']=="Entire home/apt"]

#Total Number
list45 = new_df45.groupby(["neighbourhood"]).agg(count_col=pd.NamedAgg(column="neighbourhood", aggfunc="count")).reset_index()
list45

In [None]:
#Density
density_45 = list45['count_col']/area['AREA']
density_45.fillna(0)

In [None]:
#Median
median45 = new_df45.groupby(["neighbourhood"]).median().reset_index()
median45
_45median = pd.DataFrame(median45[["neighbourhood","price"]])
_45median = _45median.rename(columns={'price' : 'price_45'})
_45median.fillna(0)

### Now we add the densities and prices of the listings we just calculated to the barris dataset

In [None]:
#create a copy of the barris dataset
Neighbourhood2 = barris.copy()

In [None]:
# Add the new density columns
Neighbourhood2["density_24"] = density_24
Neighbourhood2["density_30"] = density_30
Neighbourhood2["density_33"] = density_33
Neighbourhood2["density_45"] = density_45


**For some reason I have not figured out yet, 
key_0 column is created everytime I merge the price column to the main dataset, so I run the drop command after every merge**

In [None]:
Neighbourhood2 = Neighbourhood2.merge(_24median, how='left', left_on=Neighbourhood2["NOM"], right_on= _24median["neighbourhood"])
Neighbourhood2.drop(columns=["key_0", "neighbourhood"], inplace=True)

In [None]:
Neighbourhood2 = Neighbourhood2.merge(_30median, how='left', left_on=Neighbourhood2["NOM"], right_on= _30median["neighbourhood"])
Neighbourhood2.drop(columns=["key_0", "neighbourhood"], inplace=True)

In [None]:
Neighbourhood2 = Neighbourhood2.merge(_33median, how='left', left_on=Neighbourhood2["NOM"], right_on= _33median["neighbourhood"])
Neighbourhood2.drop(columns=["key_0", "neighbourhood"], inplace=True)


In [None]:
Neighbourhood2.fillna(value={'density_24' : 0, 'density_30' : 0, 'density_33' : 0,'density_45' : 0,}, inplace=True)
Neighbourhood2.fillna(value={'price_24' : 0, 'price_30' : 0, 'price_33' : 0,'price_45' : 0,}, inplace=True)

### Now we will visualize the price range or density of the neighborhoods (remember we are visualizing those with room type: "Entire home/apt") to see how expensive they are in comparison. We will use a choropleth

In [None]:
axe = Neighbourhood2.plot(figsize=(10, 10), alpha=0.5, column='price_33', cmap='Reds', vmin=20, vmax=120, legend = True)
ctx.add_basemap(axe)

### Choropleth maps for all four price columns (listings)

In [None]:
figure, axes = plt.subplots(1,4, figsize=(25,20))

Neighbourhood2.plot(ax=axes[0], alpha=0.5, column='price_24', cmap='Reds', vmin=20, vmax=120)
Neighbourhood2.plot(ax=axes[1], alpha=0.5, column='price_30', cmap='Reds', vmin=20, vmax=120)
Neighbourhood2.plot(ax=axes[2], alpha=0.5, column='price_33', cmap='Reds', vmin=20, vmax=120)
Neighbourhood2.plot(ax=axes[3], alpha=0.5, column='price_45', cmap='Reds', vmin=20, vmax=120)

axes[0].set_title('Price for listings 24')
axes[1].set_title('Price for listings 30')
axes[2].set_title('Price for listings 33')
axes[3].set_title('Price for listings 45')

figure.tight_layout()