Note: You need data (all_city_data_with_pop.csv) and some images for the mapping at the end of this notebook. You define their paths on the cell below.


In [2]:
#@title File paths definition
FILE_PATH = r"BuzzOnEarth\datasets\processed\all_city_data_with_pop.csv"

ev_image_path = "map_images/ev.jpg"
restaurant_image_path = "map_images/restaurants.png"
park_image_path = "map_images/park.jpg"
school_image_path = "map_images/school.png"

In [3]:
#@title imports
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import folium

In [27]:
#@title read csv

# dont add index column
df = pd.read_csv(FILE_PATH, index_col=0)
# remove parking_space because its already covered in "parking" column
df = df.drop(columns=['parking_space'])

### getting longitude and latitude from polygon

In [28]:
# what we have : string : "POLYGON ((6.981833471527156 49.24179263859822, 6.982014735194567 49.237345764477745, 6.975276694899576 49.23722781871863, 6.975094827000435 49.24167467444893, 6.981833471527156 49.24179263859822))"
# what we want : polygon_vertices = [(lat1, lon1), (lat2, lon2), (lat3, lon3), (lat4, lon4), (lat5, lon5)]

def polygon_to_vertices(polygon):
    polygon = polygon.replace("POLYGON ((", "")
    polygon = polygon.replace("))", "")
    polygon = polygon.split(",")
    polygon_vertices = []
    for vertex in polygon:
        vertex = vertex.strip()
        lon, lat = vertex.split(" ")[0], vertex.split(" ")[1]
        polygon_vertices.append((float(lat), float(lon)))
    return polygon_vertices

def calculate_polygon_centroid(polygon_str, ind):
    '''
    given the polygon string, calculate the centroid of the polygon.

    @args:
        polygon_str: string of the polygon
    @returns :
        centroid_lat: latitude of the centroid
        centroid_lon: longitude of the centroid
    '''

    vertices = polygon_to_vertices(polygon_str)
    total_lat = 0.0
    total_lon = 0.0

    num_vertices = len(vertices)

    for lon, lat in vertices:
        total_lon += lon
        total_lat += lat

    centroid_lat = total_lat / num_vertices
    centroid_lon = total_lon / num_vertices
    if ind==1:
        return centroid_lat

    return centroid_lon


In [29]:
#@title adding long,lat to each polygon/data point
df['latitude'] = df['geometry'].apply(lambda x: calculate_polygon_centroid(x, 0))
df['longitude'] = df['geometry'].apply(lambda x: calculate_polygon_centroid(x, 1))

In [30]:
df['geometry'].head()

0    POLYGON ((8.4727605 50.099822499999995, 8.4730...
1    POLYGON ((8.4775730092433 50.10302720327834, 8...
2    POLYGON ((8.479750879173663 50.09863320231676,...
3    POLYGON ((8.479688060978736 50.10443297769501,...
4    POLYGON ((8.47965547981383 50.107440331063444,...
Name: geometry, dtype: object

In [31]:
df['longitude'].head()

0    8.475216
1    8.478859
2    8.476534
3    8.481015
4    8.482331
Name: longitude, dtype: float64

In [32]:
#@title count how many entries are for each city

# get the city counts in a dictionary from df
city_counts = df['city'].value_counts().to_dict()
df_city = df.groupby('city')
# DataFrameGroupBy to dataframe
df_city = df_city.first()
# add a new column "city_counts" in df_city and put the values from city_counts. df_city is indexed by city
df_city['data_points'] = df_city.index.map(city_counts)


## EDA

In [33]:
#@title Variable definition

# divided the columns into 2 groups based on their type
cols1 = ['EV_stations', 'restaurant', 'park', 'school', 'Community_centre', 'place_of_worship', 'university', 'cinema',
       'library']

cols2 = ['residential' 'commercial' 'retail' 'townhall' 'government']


cols_infra_long_stay = ["EV_stations", "school", "university", "cinema", "library","residential", "commercial", "retail", "townhall"]
COLS_ENTERTAINMENT_LOCS = ["restaurant", "park", "school","place_of_worship", "cinema", "library"]

PUBLIC_PLACES = ['EV_stations', 'restaurant', 'park', 'school', 'Community_centre', 'place_of_worship', 'university', 'cinema',
'library', 'residential' 'commercial' 'retail' 'townhall' 'government']


In [34]:
corr_df = df.drop(['Unnamed: 0', 'geometry', 'city'], axis=1)
corr_df.head()

Unnamed: 0,parking,edges,EV_stations,civic,restaurant,park,school,node,Community_centre,place_of_worship,...,library,commercial,retail,townhall,government,residential,population,Berlin_data_onlycenter_,latitude,longitude
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.0,0,9.014051,,50.100948,8.475216
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.0,0,0.0,,50.103681,8.478859
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.0,0,9.014051,,50.097913,8.476534
3,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0.0,0,9.014051,,50.105448,8.481015
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0.0,0,0.0,,50.108984,8.482331


In [35]:
#@title Plot: Correlation of EV_stations to other variables
fig = px.bar(corr_df.corr()['EV_stations'].drop('EV_stations').sort_values(ascending=False), title="Correlation of EV_stations to other variables")
fig.show()

In [36]:
#@title Number of data points / polygons we have on each city
fig = px.bar(df_city, x=df_city.index, y='data_points', color='data_points', color_continuous_scale='Bluered_r', title="Number of data points per city")
fig.show()


In [37]:
#@title Plot : Proportion of Infrastructure in the whole data

# Calculate the total counts for each column
total_counts = df[cols_infra_long_stay].sum()

# Create a DataFrame for the pie chart data
data = {'Infrastructure': cols_infra_long_stay, 'Count': total_counts}
df_pie = pd.DataFrame(data)

# Plot the pie chart
fig = px.pie(df_pie, values='Count', names='Infrastructure', title='Proportion of Infrastructure in the whole data')
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.show()

In [38]:
#@title Plot: Number of EV stations per city

df_city_sum = df.copy()

df_city_sum = df_city_sum.groupby(['city']).sum()

df_city_sum = df_city_sum.sort_values(by=['EV_stations'], ascending=False)
fig = px.bar(df_city_sum, x=df_city_sum.index, y='EV_stations', color='EV_stations', color_continuous_scale='Bluered_r', title="Number of EV stations per city")
fig.show()

df_city_sum = df_city_sum.reset_index()



In [39]:
#@title Plot : Count of different types of places in each city
fig = px.bar(df_city_sum, x='city', y=cols1, barmode='group', color_discrete_sequence=px.colors.qualitative.Pastel, title="Count of different types of places in each city")
fig.show()


In [40]:
#@title Plot: Number of EV stations vs Number of parking per city
fig = px.scatter(df_city_sum, x='EV_stations', y='parking', color='city', color_continuous_scale='Bluered_r', title="Number of EV stations vs Number of parking per city")
fig.update_traces(marker={'size': 15})
fig.show()

In [41]:
#@title Plot: Number of EV stations vs Number of restaurants per city
fig = px.scatter(df_city_sum, x='EV_stations', y='restaurant', color='city', color_continuous_scale='Bluered_r', title="Number of EV stations vs Number of restaurants per city")
fig.update_traces(marker={'size': 15})
fig.show()

In [42]:
#@title Plot: Number of different types of infrastructure in Saarbrucken

city_name = "Saarbrucken"
# get the row of berlin
df_sb = df_city_sum[df_city_sum['city'] == city_name]

# get the values of the columns in cols_infra_long_stay
city_long_stay_counts = df_sb[cols_infra_long_stay]

fig = px.pie(city_long_stay_counts, values=city_long_stay_counts.values.flatten(), names=city_long_stay_counts.columns, title="Number of different types of infrastructure in "+ city_name)
fig.show()

In [43]:
#@title Plot: Number of different types of infrastructure in Berlin

city_name = "Berlin"

# get the row of berlin
df_berlin = df_city_sum[df_city_sum['city'] == city_name]

# get the values of the columns in cols_infra_long_stay
city_long_stay_counts = df_berlin[cols_infra_long_stay]

fig = px.pie(city_long_stay_counts, values=city_long_stay_counts.values.flatten(), names=city_long_stay_counts.columns, title="Number of different types of infrastructure in "+ city_name)
fig.show()

In [44]:
#@title Plot: Number of different types of infrastructure in Munich

city_name = "Munich"
# get the row of berlin
df_munich = df_city_sum[df_city_sum['city'] == city_name]

# get the values of the columns in cols_infra_long_stay
city_long_stay_counts = df_munich[cols_infra_long_stay]

fig = px.pie(city_long_stay_counts, values=city_long_stay_counts.values.flatten(), names=city_long_stay_counts.columns, title="Number of different types of infrastructure in "+ city_name)
fig.show()

In [45]:
#@title Plot: Number of different types of infrastructure in Stuttgart

city_name = "Stuttgart"
# get the row of berlin
df_stuttgart = df_city_sum[df_city_sum['city'] == city_name]

# get the values of the columns in cols_infra_long_stay
city_long_stay_counts = df_stuttgart[cols_infra_long_stay]

fig = px.pie(city_long_stay_counts, values=city_long_stay_counts.values.flatten(), names=city_long_stay_counts.columns, title="Number of different types of infrastructure in "+ city_name)
fig.show()

# Mapping Saarbrucken

In [46]:
#@title variable definitions
df_sb = df[df['city'] == "Saarbrucken"]


In [47]:
#@title get the mean of the longitudes and latitudes
longitudes = df_sb['longitude'].apply(lambda x: x)
latitudes = df_sb['latitude'].apply(lambda x: x)
map_center = (longitudes.mean(), latitudes.mean())

map_out_filepath = "ev_stations_map_with_school.html"

In [48]:
#@title mapping and putting the markers

# you can also add other markers to the map. just go through the comments below and you will get it

map = folium.Map(location=map_center, zoom_start=12)

# Iterate over the DataFrame rows and add markers for EV stations
for index, row in df_sb.iterrows():
    ev_lat = row['latitude']
    ev_lon = row['longitude']

    school_count = row['school']
    ev_count = row['EV_stations']
    restaurant_count = row['restaurant']
    park_count = row['park']
    #get more counts here if you want

    if school_count != 0:
        # add schools
        icon = folium.CustomIcon(school_image_path, icon_size=(20, 20))
        marker = folium.Marker(
            location=[ev_lat, ev_lon],
            icon=icon,
            popup=f"Schools: {school_count}"
        )
        marker.add_to(map)
        ev_lat += 0.001 # adding so the next marker is not on top of it

    if ev_count != 0:
        # add ev stations
        icon = folium.CustomIcon(ev_image_path, icon_size=(20, 20))
        marker = folium.Marker(
            location=[ev_lat, ev_lon],
            icon=icon,
            popup=f"EV Stations: {ev_count}"
        )
        marker.add_to(map)
        ev_lat += 0.001 # adding so the next marker is not on top of it

    if restaurant_count != 0:
        # add restaurants
        icon = folium.CustomIcon(restaurant_image_path, icon_size=(20, 20))
        marker = folium.Marker(
            location=[ev_lat, ev_lon],
            icon=icon,
            popup=f"Restaurants: {restaurant_count}"
        )
        marker.add_to(map)
        ev_lat += 0.001 # adding so the next marker is not on top of it


    if park_count != 0:
        # add parks
        icon = folium.CustomIcon(park_image_path, icon_size=(20, 20))
        marker = folium.Marker(
            location=[ev_lat, ev_lon],
            icon=icon,
            popup=f"Parks: {park_count}"
        )
        marker.add_to(map)
        ev_lat += 0.001 # adding so the next marker is not on top of it

    # repeast similar if conditions for other features if you want

# Save the map as an HTML file
map.save(map_out_filepath)