In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import geopandas as gpd
import seaborn as sns

In [2]:
listings = pd.read_csv('./madrid_data/listings_cleaned.csv')

display(listings.head(), listings.describe())

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm
0,21853,Bright and airy room,83531,Abdel,Latina,Cármenes,40.40381,-3.7413,Private room,31.0,4,33,2018-07-15,0.29,2,233,0
1,204570,Lovely 110m2 home - Best location!,1004721,Alex,Centro,Cortes,40.4152,-3.69668,Entire home/apt,180.0,6,103,2023-07-12,0.68,1,10,2
2,24805,Gran Via Studio Madrid,346366726,Alvaro,Centro,Universidad,40.42183,-3.70529,Entire home/apt,92.0,5,35,2024-02-28,0.31,1,91,10
3,205199,Tafari Gran Via,1008659,Nuria,Centro,Universidad,40.42116,-3.70384,Entire home/apt,65.0,7,333,2024-02-25,2.18,4,334,26
4,209373,Attic in the Heart of Madrid-WIFI,1031664,José,Centro,Embajadores,40.41126,-3.70347,Private room,32.0,1,77,2023-07-02,0.52,1,174,18


Unnamed: 0,id,host_id,latitude,longitude,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm
count,26020.0,26020.0,26020.0,26020.0,26020.0,26020.0,26020.0,26020.0,26020.0,26020.0,26020.0
mean,4.70331e+17,225522100.0,40.42144,-3.693995,137.574294,7.902152,43.681015,1.64992,26.053036,160.137394,13.720638
std,4.587651e+17,188655900.0,0.023565,0.027799,278.72146,35.344443,82.903573,1.6813,60.019859,127.143249,21.118614
min,21853.0,17453.0,40.331397,-3.832059,8.0,1.0,0.0,0.01,1.0,0.0,0.0
25%,33281120.0,42295210.0,40.4093,-3.70748,72.0,1.0,1.0,0.57,1.0,25.0,0.0
50%,6.016226e+17,183697200.0,40.42013,-3.701057,120.0,2.0,10.0,1.17,3.0,159.0,4.0
75%,9.281254e+17,414973100.0,40.431479,-3.685421,137.574294,3.0,47.0,2.12,14.0,271.0,20.0
max,1.117381e+18,567968900.0,40.53553,-3.545904,21000.0,1125.0,1027.0,28.73,300.0,365.0,288.0


In [3]:
neighbourhoods = pd.read_csv('./madrid_data/neighbourhoods.csv')
display(neighbourhoods.head(), neighbourhoods.describe(include='O'))

Unnamed: 0,neighbourhood_group,neighbourhood
0,Arganzuela,Acacias
1,Arganzuela,Atocha
2,Arganzuela,Chopera
3,Arganzuela,Delicias
4,Arganzuela,Imperial


Unnamed: 0,neighbourhood_group,neighbourhood
count,128,128
unique,21,128
top,Ciudad Lineal,Acacias
freq,9,1


In [4]:
listings_count= listings['neighbourhood'].value_counts().reset_index()
listings_count.columns=['neighbourhood','listing_count']
listings_count

Unnamed: 0,neighbourhood,listing_count
0,Embajadores,3051
1,Universidad,2324
2,Palacio,1904
3,Sol,1364
4,Justicia,1285
...,...,...
122,Aeropuerto,9
123,Palomas,7
124,Fuentelareina,5
125,El Pardo,4


In [5]:
# Read geojson file
neighbourhood_geojson= gpd.read_file('./madrid_data/neighbourhoods.geojson')


# Merge geojson and csv files 
loc_madrid= listings.merge(neighbourhood_geojson, on=['neighbourhood','neighbourhood_group'])

#convert to GeoDF
madrid_gdf=gpd.GeoDataFrame(loc_madrid)
madrid_gdf.head()


Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,geometry
0,21853,Bright and airy room,83531,Abdel,Latina,Cármenes,40.40381,-3.7413,Private room,31.0,4,33,2018-07-15,0.29,2,233,0,"MULTIPOLYGON (((-3.72265 40.40583, -3.72274 40..."
1,204570,Lovely 110m2 home - Best location!,1004721,Alex,Centro,Cortes,40.4152,-3.69668,Entire home/apt,180.0,6,103,2023-07-12,0.68,1,10,2,"MULTIPOLYGON (((-3.69805 40.41928, -3.69654 40..."
2,24805,Gran Via Studio Madrid,346366726,Alvaro,Centro,Universidad,40.42183,-3.70529,Entire home/apt,92.0,5,35,2024-02-28,0.31,1,91,10,"MULTIPOLYGON (((-3.70117 40.42133, -3.70164 40..."
3,205199,Tafari Gran Via,1008659,Nuria,Centro,Universidad,40.42116,-3.70384,Entire home/apt,65.0,7,333,2024-02-25,2.18,4,334,26,"MULTIPOLYGON (((-3.70117 40.42133, -3.70164 40..."
4,209373,Attic in the Heart of Madrid-WIFI,1031664,José,Centro,Embajadores,40.41126,-3.70347,Private room,32.0,1,77,2023-07-02,0.52,1,174,18,"MULTIPOLYGON (((-3.70393 40.41431, -3.70286 40..."


## Average price per location

In [6]:
avg_price_loc= madrid_gdf.groupby(['neighbourhood','neighbourhood_group','geometry'])['price'].mean().reset_index()
display(avg_price_loc.round(2).sort_values(by='price',ascending=False))

#plot
fig= px.choropleth_mapbox(avg_price_loc, geojson=neighbourhood_geojson, locations='neighbourhood', color='price',
                          featureidkey="properties.neighbourhood",
                          mapbox_style='carto-positron', center = {"lat": 40.4168, "lon": -3.7038},
                          title='Average Airbnb Price by Neighbourhood in Madrid', zoom=10, opacity=0.6,
                          labels={'price':'Average Price'})
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
fig.write_image('airbnb_avg_price_map.png')

Unnamed: 0,neighbourhood,neighbourhood_group,geometry,price
55,Fuentelareina,Fuencarral - El Pardo,"MULTIPOLYGON (((-3.73751 40.48956, -3.73716 40...",360.80
84,Palomeras Bajas,Puente de Vallecas,"MULTIPOLYGON (((-3.65309 40.38362, -3.65297 40...",310.06
32,Castillejos,Tetuán,"MULTIPOLYGON (((-3.68997 40.45902, -3.69022 40...",309.51
11,Amposta,San Blas - Canillejas,"MULTIPOLYGON (((-3.61822 40.42678, -3.62081 40...",250.16
99,Recoletos,Salamanca,"MULTIPOLYGON (((-3.69219 40.41951, -3.69309 40...",230.69
...,...,...,...,...
12,Apostol Santiago,Hortaleza,"MULTIPOLYGON (((-3.65658 40.47278, -3.65774 40...",65.07
3,Aeropuerto,Barajas,"MULTIPOLYGON (((-3.57795 40.51037, -3.57588 40...",62.11
10,Ambroz,Vicálvaro,"MULTIPOLYGON (((-3.60636 40.41254, -3.59014 40...",61.81
0,Abrantes,Carabanchel,"MULTIPOLYGON (((-3.72182 40.38524, -3.72114 40...",61.73


In [7]:
color_palette={
                       'Private room': '#008B8B',   
                       'Entire home/apt': '#8B008B',    
                       'Shared room': '#00CED1',    
                       'Hotel room': '#E9967A'          
             }
fig = px.scatter_mapbox(madrid_gdf, 
                        lat='latitude', 
                        lon='longitude', 
                        color='room_type', 
                        color_discrete_map=color_palette,
                        #size='price', 
                        hover_name='neighbourhood',
                        hover_data={'latitude': False, 'longitude': False},
                        size_max=15, 
                        zoom=10,
                        opacity=0.5,
                        mapbox_style='carto-positron',
                        center={'lat': 40.4168, 'lon': -3.7038},
                        title='Distribution of Airbnb Property Types in Madrid',
                        labels={'room_type':'Property Type'})

fig.show()

fig.write_image('airbnb_property_distribution_map.png')