In [348]:
import pandas as pd
import geopandas as gpd
import mapclassify as mapc
import folium
import mplleaflet
import shapely as shp
from shapely.geometry import Point, LineString, Polygon
import matplotlib.pyplot as plt
import numpy as np
import functions as funcs
import json
import pygal

#Import all the necessary spatial data for the analysis. Data used are the districts and voting areas in
#the HRI-area.
districts=gpd.read_file("data/aluejaot/pks_pienalue.shp")
voting_areas=gpd.read_file("data/aluejaot/Aanestysaluejako_PKS_2019.shp")

#Import the election results by party and voting districts in absolute values and percentages.
election_val=pd.read_csv("data/vaalit2019.csv")
election_pr=pd.read_csv("data/vaalit2019%.csv")

#Next we'll import some data categorised by districts in the capital area. This includes unemployment rates
#and education levels from the year 2017. Kauniainen is excluded for lack of data.
#Helsinki area is separated by 'peruspiiri', Espoo by 'pienalue' and Vantaa by 'kaupunginosa'.
unemployment_Espoo=pd.read_csv("data/tyottomyys_Espoo.csv")
unemployment_Helsinki=pd.read_csv("data/tyottomyys_Helsinki.csv")
unemployment_Vantaa=pd.read_csv("data/tyottomyys_Vantaa.csv")

education_Espoo=pd.read_csv("data/koulutus_Espoo.csv")
education_Vantaa=pd.read_csv("data/koulutus_Vantaa.csv")
education_Helsinki=pd.read_csv("data/koulutus_Helsinki.csv")

In [349]:
#Merge the absolute and percentage values of the election results together.
election=election_val.merge(election_pr, on="alue", suffixes=("","_p"))

In [350]:
#Replace the '-' -values representing no candidate for the area with 0 value for easier handling of the data.
election.replace("-", value=0, inplace=True)

In [351]:
#Some results were returned as object types. Here they are converted to int64 for absolute values and float64
#for percentages.
election.iloc[:,12:21]=election.iloc[:,12:21].astype("int64")
election.iloc[:,32:]=election.iloc[:,32:].astype("float64")

In [352]:
election["winner"]=None
for i, row in election.iterrows():
    election.loc[i,"winner"]=row[2:22].astype("int64").idxmax()

In [353]:
#Create new columns to the vote results -dataframe containing only the district and city codes
election["tunnus"]=None
election["kuntanro"]=None

election=election.apply(funcs.code_extract, code="tunnus", city_code="kuntanro", area="alue", axis=1)

In [354]:
#There are a lot of unnecessary columns in the voting area and district -shapefiles. We filter them
#out and leave only the necessary columns which we will use later on when we need to combine the statistics
#of different voting areas to one district area.

districts=districts[["geometry","Nimi", "KUNTA", "PIEN", "TILA", "SUUR"]]
voting_areas=voting_areas[["geometry", "kuntanro", "tunnus", "nimi"]]

In [355]:
#Merge the vote-results to the correct voting areas by the columns "tunnus" and "kuntanro".
vote_results=voting_areas.merge(election, on=["tunnus", "kuntanro"])

In [356]:
del_cols=list(vote_results.columns[15:25])+list(vote_results.columns[35:45])
print(del_cols)

['Piraattip.', 'STL', 'KP', 'FP', 'LIBE', 'SKP', 'EOP', 'IP', 'SKE', 'KTP', 'Piraattip._p', 'STL_p', 'KP_p', 'FP_p', 'LIBE_p', 'SKP_p', 'EOP_p', 'IP_p', 'SKE_p', 'KTP_p']


In [357]:
vote_results.rename(columns={"Muut":"Others", "Muut_p":"Others_p"}, inplace=True)
print(vote_results.columns[13:200])
for i, row in vote_results.iterrows():
    row["others"]=row[15:26].sum()
    row["others_p"]=row[35:46].sum()
vote_results.drop(columns=del_cols, inplace=True)

Index(['KD', 'SIN', 'Piraattip.', 'STL', 'KP', 'FP', 'LIBE', 'SKP', 'EOP',
       'IP', 'SKE', 'KTP', 'Others', 'SDP_p', 'PS_p', 'KOK_p', 'KESK_p',
       'VIHR_p', 'VAS_p', 'RKP_p', 'KD_p', 'SIN_p', 'Piraattip._p', 'STL_p',
       'KP_p', 'FP_p', 'LIBE_p', 'SKP_p', 'EOP_p', 'IP_p', 'SKE_p', 'KTP_p',
       'Others_p', 'winner'],
      dtype='object')
                                              geometry kuntanro tunnus  \
0    POLYGON ((25490458.174 6685787.017, 25490761.5...      092    104   
1    POLYGON ((25490896.360 6683954.576, 25490911.8...      092    105   
2    POLYGON ((25491988.712 6686909.527, 25492005.6...      092    209   
3    POLYGON ((25499681.921 6691170.095, 25500004.6...      092    304   
4    POLYGON ((25501170.347 6690202.704, 25501179.6...      092    305   
..                                                 ...      ...    ...   
295  POLYGON ((25486028.639 6670888.373, 25485997.2...      049     78   
296  POLYGON ((25482093.026 6676699.177, 25482074.6...

In [359]:
#Extract the centroid of a voting area for map representation later on.
vote_results_centroid=vote_results
vote_results_centroid["geometry"]=vote_results["geometry"].centroid
vote_results_centroid=vote_results_centroid.to_crs(epsg=4326)

In [360]:
#Merge unemployment and education datasets for different cities together for easier use.
unemployment=pd.concat([unemployment_Espoo, unemployment_Helsinki, unemployment_Vantaa], ignore_index=True)
education=pd.concat([education_Espoo, education_Helsinki, education_Vantaa], ignore_index=True)

In [361]:
#Delete the no-data values marked with '.' and '..', and empty rows.
education.replace([".", ".."], value=np.NaN, inplace=True)
education.dropna(axis=0, inplace=True)

unemployment.replace([".", ".."], value=np.NaN, inplace=True)
unemployment.dropna(axis=0, inplace=True)

In [362]:
#Change the CRS of the districts to TM35Fin which is also used in the voting area map
districts=districts.to_crs({'init': 'epsg:3879'})

  return _prepare_from_string(" ".join(pjargs))


In [363]:
#Extract the district and city codes to separate columns from
#the full names of districts in education and unemployment datasets.
education["code"]=None
education["KUNTA"]=None
education=education.apply(funcs.code_extract, code="code",city_code="KUNTA", area="Alue", axis=1)

unemployment["code"]=None
unemployment["KUNTA"]=None
unemployment=unemployment.apply(funcs.code_extract, code="code",city_code="KUNTA", area="Alue", axis=1)

#Extract the district and city codes to separate columns from
#the full names of districts in district data.
districts["code"]=None
districts=districts.apply(funcs.sep_code, axis=1)

In [364]:
#Merge the education and unemployment data to district geometry
district_ed=districts.merge(education, on=["KUNTA","code"])
district_unemp=districts.merge(unemployment, on=["KUNTA","code"])

In [365]:
#Change the CRS of districts, education and unemployment dataframes to TM35Fin for easier mapping in Folium
districts.crs={'init': 'epsg:3879'}
district_unemp.crs={'init': 'epsg:3879'}
district_ed.crs={'init': 'epsg:3879'}

In [366]:
#Convert the education and unemployment dataframes' population values from object to float so they can be used
#for calculations.

for pop in district_ed.columns[8:]:
    district_ed[pop]=district_ed[pop].astype("float64")
    
district_unemp["Tyottomyysaste"]=district_unemp["Tyottomyysaste"].astype("float64")
district_unemp=district_unemp.rename(columns={"Tyottomyysaste": "Unemployment rate"})

In [367]:
#Rename the education dataframe columns to shorter ones.
district_ed.rename(columns={'Kaikki koulutusasteet': 'all', 'Tutkinnon suorittaneita yhteensä': 'ed_sum',
                  'Keskiaste':'mid_ed', 'Alin korkea- ja alempi korkeakouluaste': 'low_uni',
                  'Ylempi korkeakouluaste ja tutkijakoulutus': 'high_uni',
                  'Perusaste tai tuntematon': 'basic_edu'}, inplace=True)

In [368]:
#Calculate the proportions of education levels to separate columns of the education dataframe.
district_ed["Middle level education"]=round(district_ed["mid_ed"]/district_ed["all"]*100,2)
district_ed["Lower university education"]=round(district_ed["low_uni"]/district_ed["all"]*100,2)
district_ed["Higher university education"]=round(district_ed["high_uni"]/district_ed["all"]*100,2)
district_ed["Basic education"]=round(district_ed["basic_edu"]/district_ed["all"]*100,2)

In [369]:
#Classify the amount of votes with Natural Breaks and create a new column for the values.
#To be used for creating different sized markers for the map indicating the amount of votes from each area.

cl_votes=mapc.NaturalBreaks(y=vote_results_centroid["Yhteensä"], k=4)
vote_results_centroid["vote_cl"]=vote_results_centroid["Yhteensä"].apply(cl_votes).astype(int)

In [370]:
#Create a new index column called 'geoid' for unemployment and district dataframes.
#Folium requires an index column named 'geoid' to read the data from the rows correctly.

district_unemp['geoid'] = list(range(len(district_unemp)))
district_unemp.index=district_unemp["geoid"].astype(str)

district_ed['geoid'] = list(range(len(district_ed)))
district_ed.index=district_ed["geoid"].astype(str)

In [371]:
print(vote_results_centroid.iloc[1,6:16])

SDP       394
PS        339
KOK       591
KESK      103
VIHR      352
VAS        96
RKP        83
KD         65
SIN        58
Others    111
Name: 1, dtype: object


In [384]:
from pygal.style import Style
custom_style = Style(
    background='#ffffff',
    plot_background='#ffffff',
    foreground='#000000',
    foreground_strong='#000000',
    foreground_subtle='#000000',
    opacity='.7',
    opacity_hover='.9',
    font_family='googlefont:Poppins',
    title_font_size=34,
    legend_font_size=26,
    tooltip_font_size=26,
    transition='300ms ease-out',
    colors=('#ff0000', '#42f0d6', '#004bd6', '#30af1d', '#58fd3f', '#b60205', '#fdf926','#cb45fc', '#6dd2fd', '#a1a1a1'))

for i, row in vote_results_centroid.iterrows():
    pie_chart = pygal.Pie(style=custom_style)
    pie_chart.title = row["nimi"]
    for j in range (6,16):
        pie_chart.add(row.index[j], row[j])
    pie_chart.render_to_file('data/charts/'+row["nimi"]+'.svg')

In [385]:
#Create a new folium map instance for the different layers.
m = folium.Map(location=[60.25, 24.8], zoom_start=10, control_scale=True, max_bounds=True,
              min_lat=59.60, max_lat=60.90, min_lon=24.0, max_lon=25.9, control=False)
folium.raster_layers.TileLayer(tiles="CartoDB positron", control=False).add_to(m)

<folium.raster_layers.TileLayer at 0x23729ee00f0>

In [386]:
#Add the districts as a GeoJson map layout
style={'fillColor': 'Black', 'fillOpacity' : '0.05', 'color': 'Black', 'weight' : '0.5'}
folium.GeoJson(
    districts,
    name='Districts',
    control=True,
    style_function=lambda x: style,
    show=False
).add_to(m)

<folium.features.GeoJson at 0x23725d252e8>

In [387]:
#Add the voting areas as a GeoJson map layout
style_vote={'fillColor': 'Black', 'fillOpacity' : '0.05', 'color': 'Purple', 'weight' : '0.5'}
folium.GeoJson(
    voting_areas,
    name='Voting areas',
    overlay=True,
    show=False,
    style_function=lambda x: style_vote
).add_to(m)

<folium.features.GeoJson at 0x23725ca6668>

In [388]:
#Create a choropleth map on the distribution of unemployed people in each district.
district_unemp
unemp_map=folium.Choropleth(
    geo_data=district_unemp,
    name='Unemployment rate',
    legend_name='Unemployment rate (%)',
    data=district_unemp,
    columns=["geoid","Unemployment rate"],
    key_on='feature.properties.geoid',
    fill_color='YlOrRd',
    nan_fill_color="Grey",
    fill_opacity=0.5,
    line_opacity=0.2,
    line_color='black',
    line_weight=1,
    highlight=True,
    smooth_factor=1,
    show=False
).add_to(m)

#Add a GeoJson tooltip to the map element which shows the name and unemplyment statistics of the district
#when hovering over the map object
unemp_map.geojson.add_child(
folium.features.GeoJsonTooltip(fields=['Nimi', "Unemployment rate"])
)

<folium.features.GeoJson at 0x2372b3570b8>

In [389]:
#Create a choropleth map on the distribution of people with only basic education in each district.
edu_map=folium.Choropleth(
    geo_data=district_ed,
    name='People with basic education',
    legend_name='People with basic education (%)',
    data=district_ed,
    columns=["geoid","Basic education"],
    key_on='feature.properties.geoid',
    fill_color='YlOrRd',
    nan_fill_color="Grey",
    fill_opacity=0.5,
    line_opacity=0.2,
    line_color='black',
    line_weight=1,
    highlight=True,
    smooth_factor=1,
    show=False,
).add_to(m)

#Add a GeoJson tooltip to the map element which shows the name and education statistics of the district
#when hovering over the map object.
edu_map.geojson.add_child(
folium.features.GeoJsonTooltip(fields=['Nimi', "Basic education","Middle level education", "Lower university education", "Higher university education"])
)

<folium.features.GeoJson at 0x2372b8761d0>

In [390]:
'''Iterate over the rows in the vote_results_centroid dataframe and create a folium Circle-element to each
location with color according to the winning party and size scaled to the amount of voters in the voting area.
Categorise these elements by the winning party and add them to separate FeatureGroups.
Also add a popup to each circle-element containing a Vincent-piechart on the distribution of votes in the area.
'''


#Create lists for the winning parties and their colors.
partycmap=["red", "turquoise", "blue", "lime"]
parties=["SDP", "PS", "KOK","VIHR"]

#Create an empty dictionary and add winning parties to it as Folium Feature groups with name and text color by
#party.
partyfglist={}
text_style='<span style="color: {col};">{txt}</span>'
for idx, color in enumerate(partycmap):
    partyfglist["{0}".format(parties[idx])]=folium.FeatureGroup(name= text_style.format(txt= parties[idx], col= color), overlay=True)

#Iterate over the vote_results_centoid dataframe
for i, row in vote_results_centroid.iterrows():
    
    #HTML-formating for the pop-up chart
    chartsrc= '<embed type="image/svg+xml" src="https://Posanderi.github.io/data/charts/'+row["nimi"]+'.svg" alt="Cannot find the chart" width="300" height="300">'
    
    #Add a circle element to the map with color according to the winning party and size scaled according to the
    #amount of voters and add the formerly created Vincent chart to it as a popup.
    vote_circle=folium.Circle([row.geometry.y, row.geometry.x], radius=((row["vote_cl"]+5)*50),
                  stroke=False, fill=True, fill_opacity=0.9, color=funcs.party_cmap(party=row["winner"]),
                  popup=folium.Popup(html=chartsrc))
    
    #Keep the circle in front of other map layers.
    m.keep_in_front(vote_circle)

    #Add the circle element to a feature group according to the winning party.
    for party in partyfglist:
        if row["winner"]==party:
            vote_circle.add_to(partyfglist[party])
            
#Add the party feature groups to the map.    
for party in partyfglist:
    partyfglist[party].add_to(m)

In [391]:
#Add a maplayer control tool to show/hide the different map elements
folium.map.LayerControl(collapsed=False).add_to(m)

<folium.map.LayerControl at 0x23729ed7ac8>

In [392]:
#Save the map to the root folder
m.save("index.html")