# Maps for exploratory data analysis

In [5]:
import pandas as pd
import geopandas as gpd
import plotly.express as px
import plotly.io as pio

In [6]:
def clean_neighbourhood(frame):
    frame['neighbourhood_cleansed'] = frame['neighbourhood_cleansed'].fillna("None")
    neighbourhood_frame = pd.read_csv('data/neighbourhood_csv.csv')
    name_to_value = neighbourhood_frame.set_index('name')['value']
    frame['neighbourhood_cleansed'] = frame['neighbourhood_cleansed'].map(name_to_value)
    frame['neighbourhood_cleansed'] = frame['neighbourhood_cleansed'].fillna(0)
    return frame


attractions_df = pd.read_csv('data/tourist_spots.csv')
cities_dict = {
    0: "Athens",
    1: "Berlin",
    2: "Madrid",
    3: "Paris",
    4: "Rome",
    5: "Vienna"
}
attractions_df['city'] = attractions_df['city'].map(cities_dict)
eda_df = pd.read_csv('data/eda.csv')
eda_df = clean_neighbourhood(eda_df)

In [7]:
# This is for Athens
df = eda_df[eda_df['city'] == 'Athens']
# Load the GeoJSON file
geojson_data = gpd.read_file('data/Athens/athens.geojson')
geojson_data = geojson_data.to_crs(epsg=4326)

# Aggregate the price by neighbourhood
df_agg = df.groupby('neighbourhood_cleansed')['price'].mean().reset_index()

# Merge the price data with the geojson data
geojson_data['neighbourhood_cleansed'] = geojson_data['neighbourhood_cleansed'].astype(
    int)  # Ensure same data type for merge
merged_data = geojson_data.merge(df_agg, on='neighbourhood_cleansed', how='left')

# Create a choropleth map
fig_athens = px.choropleth(merged_data,
                           geojson=merged_data.geometry,
                           locations=merged_data.index,  # Use index to match locations
                           color='price',  # Column with the price data
                           color_continuous_scale=px.colors.sequential.Plasma_r,  # Choose a color scale
                           title="Airbnb Prices by Neighbourhood")

# Update the layout for the map
fig_athens.update_geos(fitbounds="locations", visible=False)
fig_athens.update_layout(title=dict(
    text="Airbnb prices (in EUR) by neighborhood in Athens",
    x=0.5,  # Center the title (0 is left, 1 is right)
    xanchor='center',  # Anchor the title to the center
    yanchor='top'  # Optionally set vertical alignment
),
    font=dict(
        family="Times New Roman",  # Set the font to Times New Roman
    ),
    legend=dict(
        x=0,  # Position the legend closer to the right of the map
        title_text="Price (in EUR)"  # Add a legend title
    ),
    width=800,
    height=600,
    geo=dict(showcoastlines=True, coastlinecolor="Black"))

tourist_df = attractions_df[attractions_df['city'] == 'Athens']
fig_athens.add_scattergeo(
    lat=tourist_df['lat'],
    lon=tourist_df['lng'],
    mode='markers',
    marker=dict(color='black', size=4, symbol='circle'),
    name='Tourist Attractions'
)
# Show the map
fig_athens.show()

In [8]:
# This is for Berlin
df = eda_df[eda_df['city'] == 'Berlin']
# Load the GeoJSON file
geojson_data = gpd.read_file('data/Berlin/berlin.geojson')
geojson_data = geojson_data.to_crs(epsg=4326)

# Aggregate the price by neighbourhood
df_agg = df.groupby('neighbourhood_cleansed')['price'].mean().reset_index()

# Merge the price data with the geojson data
geojson_data['neighbourhood_cleansed'] = geojson_data['neighbourhood_cleansed'].astype(
    int)  # Ensure same data type for merge
merged_data = geojson_data.merge(df_agg, on='neighbourhood_cleansed', how='left')

# Create a choropleth map
fig_berlin = px.choropleth(merged_data,
                           geojson=merged_data.geometry,
                           locations=merged_data.index,  # Use index to match locations
                           color='price',  # Column with the price data
                           color_continuous_scale=px.colors.sequential.Plasma_r,  # Choose a color scale
                           title="Airbnb Prices by Neighbourhood")

# Update the layout for the map
fig_berlin.update_geos(fitbounds="locations", visible=False)
fig_berlin.update_layout(title=dict(
    text="Airbnb prices (in EUR) by neighborhood in Berlin",
    x=0.5,  # Center the title (0 is left, 1 is right)
    xanchor='center',  # Anchor the title to the center
    yanchor='top'  # Optionally set vertical alignment
), font=dict(
    family="Times New Roman",  # Set the font to Times New Roman
),
    legend=dict(
        x=0,  # Position the legend closer to the right of the map
        title_text="Price"  # Add a legend title
    ),
    width=800,
    height=600,
    geo=dict(showcoastlines=True, coastlinecolor="Black"))

tourist_df = attractions_df[attractions_df['city'] == 'Berlin']
fig_berlin.add_scattergeo(
    lat=tourist_df['lat'],
    lon=tourist_df['lng'],
    mode='markers',
    marker=dict(color='black', size=4, symbol='circle'),
    name='Tourist Attractions'
)
# Show the map
fig_berlin.show()

In [9]:
# This is for Madrid
df = eda_df[eda_df['city'] == 'Madrid']
# Load the GeoJSON file
geojson_data = gpd.read_file('data/Madrid/madrid.geojson')
geojson_data = geojson_data.to_crs(epsg=4326)

# Aggregate the price by neighbourhood
df_agg = df.groupby('neighbourhood_cleansed')['price'].mean().reset_index()

# Merge the price data with the geojson data
geojson_data['neighbourhood_cleansed'] = geojson_data['neighbourhood_cleansed'].astype(
    int)  # Ensure same data type for merge
merged_data = geojson_data.merge(df_agg, on='neighbourhood_cleansed', how='left')

# Create a choropleth map
fig_madrid = px.choropleth(merged_data,
                           geojson=merged_data.geometry,
                           locations=merged_data.index,  # Use index to match locations
                           color='price',  # Column with the price data
                           color_continuous_scale=px.colors.sequential.Plasma_r,  # Choose a color scale
                           title="Airbnb Prices by Neighbourhood")

# Update the layout for the map
fig_madrid.update_geos(fitbounds="locations", visible=False)
fig_madrid.update_layout(title=dict(
    text="Airbnb prices (in EUR) by neighborhood in Madrid",
    x=0.5,  # Center the title (0 is left, 1 is right)
    xanchor='center',  # Anchor the title to the center
    yanchor='top'  # Optionally set vertical alignment
), font=dict(
    family="Times New Roman",  # Set the font to Times New Roman
),
    legend=dict(
        x=0,  # Position the legend closer to the right of the map
        title_text="Price"  # Add a legend title
    ),
    width=800,
    height=600,
    geo=dict(showcoastlines=True, coastlinecolor="Black"))

tourist_df = attractions_df[attractions_df['city'] == 'Madrid']
fig_madrid.add_scattergeo(
    lat=tourist_df['lat'],
    lon=tourist_df['lng'],
    mode='markers',
    marker=dict(color='black', size=4, symbol='circle'),
    name='Tourist Attractions'
)
# Show the map
fig_madrid.show()

In [10]:
# This is for Paris
df = eda_df[eda_df['city'] == 'Paris']
# Load the GeoJSON file
geojson_data = gpd.read_file('data/Paris/paris.geojson')
geojson_data = geojson_data.to_crs(epsg=4326)

# Aggregate the price by neighbourhood
df_agg = df.groupby('neighbourhood_cleansed')['price'].mean().reset_index()

# Merge the price data with the geojson data
geojson_data['neighbourhood_cleansed'] = geojson_data['neighbourhood_cleansed'].astype(
    int)  # Ensure same data type for merge
merged_data = geojson_data.merge(df_agg, on='neighbourhood_cleansed', how='left')

# Create a choropleth map
fig_paris = px.choropleth(merged_data,
                          geojson=merged_data.geometry,
                          locations=merged_data.index,  # Use index to match locations
                          color='price',  # Column with the price data
                          color_continuous_scale=px.colors.sequential.Plasma_r,  # Choose a color scale
                          title="Airbnb Prices by Neighbourhood")

# Update the layout for the map
fig_paris.update_geos(fitbounds="locations", visible=False)
fig_paris.update_layout(title=dict(
    text="Airbnb prices (in EUR) by neighborhood in Paris",
    x=0.5,  # Center the title (0 is left, 1 is right)
    xanchor='center',  # Anchor the title to the center
    yanchor='top'  # Optionally set vertical alignment
), font=dict(
    family="Times New Roman",  # Set the font to Times New Roman
),
    legend=dict(
        x=0,  # Position the legend closer to the right of the map
        title_text="Price"  # Add a legend title
    ),
    width=800,
    height=600,
    geo=dict(showcoastlines=True, coastlinecolor="Black"))

tourist_df = attractions_df[attractions_df['city'] == 'Paris']
fig_paris.add_scattergeo(
    lat=tourist_df['lat'],
    lon=tourist_df['lng'],
    mode='markers',
    marker=dict(color='black', size=4, symbol='circle'),
    name='Tourist Attractions'
)
# Show the map
fig_paris.show()

In [11]:
# This is for Rome
df = eda_df[eda_df['city'] == 'Rome']
# Load the GeoJSON file
geojson_data = gpd.read_file('data/Rome/rome.geojson')
geojson_data = geojson_data.to_crs(epsg=4326)

# Aggregate the price by neighbourhood
df_agg = df.groupby('neighbourhood_cleansed')['price'].mean().reset_index()

# Merge the price data with the geojson data
geojson_data['neighbourhood_cleansed'] = geojson_data['neighbourhood_cleansed'].astype(
    int)  # Ensure same data type for merge
merged_data = geojson_data.merge(df_agg, on='neighbourhood_cleansed', how='left')

# Create a choropleth map
fig_rome = px.choropleth(merged_data,
                         geojson=merged_data.geometry,
                         locations=merged_data.index,  # Use index to match locations
                         color='price',  # Column with the price data
                         color_continuous_scale=px.colors.sequential.Plasma_r,  # Choose a color scale
                         title="Airbnb Prices by Neighbourhood")

# Update the layout for the map
fig_rome.update_geos(fitbounds="locations", visible=False)
fig_rome.update_layout(title=dict(
    text="Airbnb prices (in EUR) by neighborhood in Rome",
    x=0.5,  # Center the title (0 is left, 1 is right)
    xanchor='center',  # Anchor the title to the center
    yanchor='top'  # Optionally set vertical alignment
), font=dict(
    family="Times New Roman",  # Set the font to Times New Roman
),
    legend=dict(
        x=0,  # Position the legend closer to the right of the map
        title_text="Price"  # Add a legend title
    ),
    width=800,
    height=600,
    geo=dict(showcoastlines=True, coastlinecolor="Black"))

tourist_df = attractions_df[attractions_df['city'] == 'Rome']
fig_rome.add_scattergeo(
    lat=tourist_df['lat'],
    lon=tourist_df['lng'],
    mode='markers',
    marker=dict(color='black', size=4, symbol='circle'),
    name='Tourist Attractions'
)
# Show the map
fig_rome.show()

In [12]:
# This is for Vienna
df = eda_df[eda_df['city'] == 'Vienna']
# Load the GeoJSON file
geojson_data = gpd.read_file('data/Vienna/vienna.geojson')
geojson_data = geojson_data.to_crs(epsg=4326)

# Aggregate the price by neighbourhood
df_agg = df.groupby('neighbourhood_cleansed')['price'].mean().reset_index()

# Merge the price data with the geojson data
geojson_data['neighbourhood_cleansed'] = geojson_data['neighbourhood_cleansed'].astype(
    int)  # Ensure same data type for merge
merged_data = geojson_data.merge(df_agg, on='neighbourhood_cleansed', how='left')

# Create a choropleth map
fig_vienna = px.choropleth(merged_data,
                           geojson=merged_data.geometry,
                           locations=merged_data.index,  # Use index to match locations
                           color='price',  # Column with the price data
                           color_continuous_scale=px.colors.sequential.Plasma_r,  # Choose a color scale
                           title="Airbnb Prices by Neighbourhood")

# Update the layout for the map
fig_vienna.update_geos(fitbounds="locations", visible=False)
fig_vienna.update_layout(title=dict(
    text="Airbnb prices (in EUR) by neighborhood in Vienna",
    x=0.5,  # Center the title (0 is left, 1 is right)
    xanchor='center',  # Anchor the title to the center
    yanchor='top'  # Optionally set vertical alignment
), font=dict(
    family="Times New Roman",  # Set the font to Times New Roman
),
    legend=dict(
        x=0,  # Position the legend closer to the right of the map
        title_text="Price"  # Add a legend title
    ),
    width=800,
    height=600,
    geo=dict(showcoastlines=True, coastlinecolor="Black"))

tourist_df = attractions_df[attractions_df['city'] == 'Vienna']
fig_vienna.add_scattergeo(
    lat=tourist_df['lat'],
    lon=tourist_df['lng'],
    mode='markers',
    marker=dict(color='black', size=4, symbol='circle'),
    name='Tourist Attractions'
)
# Show the map
fig_vienna.show()