In [1]:
import pandas as pd
import geopandas as gpd
import mapclassify as mapc
import folium
import mplleaflet
import shapely as shp
from shapely.geometry import Point, LineString, Polygon
import matplotlib.pyplot as plt
import numpy as np
from folium import IFrame
import functions as funcs
import vincent
import json

#Import all the necessary spatial data for the analysis. Data used are the districts and voting areas in
#the HRI-area.
districts=gpd.read_file("data/aluejaot/pks_pienalue.shp")
voting_areas=gpd.read_file("data/aluejaot/Aanestysaluejako_PKS_2019.shp")

#Import the election results by party and voting districts in absolute values and percentages.
election_val=pd.read_csv("data/vaalit2019.csv")
election_pr=pd.read_csv("data/vaalit2019%.csv")

#Next we'll import some data categorised by districts in the capital area. This includes unemployment rates
#and education levels from the year 2017. Kauniainen is excluded for lack of data.
#Helsinki area is separated by 'peruspiiri', Espoo by 'pienalue' and Vantaa by 'kaupunginosa'.
unemployment_Espoo=pd.read_csv("data/tyottomyys_Espoo.csv")
unemployment_Helsinki=pd.read_csv("data/tyottomyys_Helsinki.csv")
unemployment_Vantaa=pd.read_csv("data/tyottomyys_Vantaa.csv")

education_Espoo=pd.read_csv("data/koulutus_Espoo.csv")
education_Vantaa=pd.read_csv("data/koulutus_Vantaa.csv")
education_Helsinki=pd.read_csv("data/koulutus_Helsinki.csv")

In [2]:
#Merge the absolute and percentage values of the election results together.
election=election_val.merge(election_pr, on="alue", suffixes=("","_p"))

In [3]:
#Replace the '-' -values representing no candidate for the area with 0 value for easier handling of the data.
election.replace("-", value=0, inplace=True)

In [4]:
#Some results were returned as object types. Here they are converted to int64 for absolute values and float64
#for percentages.
election.iloc[:,12:21]=election.iloc[:,12:21].astype("int64")
election.iloc[:,32:]=election.iloc[:,32:].astype("float64")

In [5]:
#Add a column containing the party which received the most votes in a given area.
election=election.apply(funcs.find_winner, axis=1)

In [6]:
#Create new columns to the vote results -dataframe containing only the district and city codes
election["tunnus"]=None
election["kuntanro"]=None

election=election.apply(funcs.code_extract, code="tunnus", city_code="kuntanro", area="alue", axis=1)

In [7]:
#There are a lot of unnecessary columns in the voting area and district -shapefiles. We filter them
#out and leave only the necessary columns which we will use later on when we need to combine the statistics
#of different voting areas to one district area.

districts=districts[["geometry","Nimi", "KUNTA", "PIEN", "TILA", "SUUR"]]
voting_areas=voting_areas[["geometry", "kuntanro", "tunnus", "nimi"]]

In [8]:
#Merge the vote-results to the correct voting areas by the columns "tunnus" and "kuntanro".
vote_results=voting_areas.merge(election, on=["tunnus", "kuntanro"])

In [9]:
#Extract the centroid of a voting area for map representation later on.
vote_results_centroid=vote_results
vote_results_centroid["geometry"]=vote_results["geometry"].centroid
vote_results_centroid=vote_results_centroid.to_crs(epsg=4326)

  return _prepare_from_string(" ".join(pjargs))


In [10]:
#Merge unemployment and education datasets for different cities together for easier use.
unemployment=pd.concat([unemployment_Espoo, unemployment_Helsinki, unemployment_Vantaa], ignore_index=True)
education=pd.concat([education_Espoo, education_Helsinki, education_Vantaa], ignore_index=True)

In [11]:
#Delete the no-data values marked with '.' and '..', and empty rows.
education.replace([".", ".."], value=np.NaN, inplace=True)
education.dropna(axis=0, inplace=True)

unemployment.replace([".", ".."], value=np.NaN, inplace=True)
unemployment.dropna(axis=0, inplace=True)

In [12]:
#Change the CRS of the districts to TM35Fin which is also used in the voting area map
districts=districts.to_crs({'init': 'epsg:3879'})

  return _prepare_from_string(" ".join(pjargs))


In [13]:
#Extract the district and city codes to separate columns from
#the full names of districts in education and unemployment datasets.
education["code"]=None
education["KUNTA"]=None
education=education.apply(funcs.code_extract, code="code",city_code="KUNTA", area="Alue", axis=1)

unemployment["code"]=None
unemployment["KUNTA"]=None
unemployment=unemployment.apply(funcs.code_extract, code="code",city_code="KUNTA", area="Alue", axis=1)

#Extract the district and city codes to separate columns from
#the full names of districts in district data.
districts["code"]=None
districts=districts.apply(funcs.sep_code, axis=1)

In [14]:
#Merge the education and unemployment data to district geometry
district_ed=districts.merge(education, on=["KUNTA","code"])
district_unemp=districts.merge(unemployment, on=["KUNTA","code"])

In [15]:
#Change the CRS of districts, education and unemployment dataframes to TM35Fin for easier mapping in Folium
districts.crs={'init': 'epsg:3879'}
district_unemp.crs={'init': 'epsg:3879'}
district_ed.crs={'init': 'epsg:3879'}

In [16]:
#Convert the education and unemployment dataframes' population values from object to float so they can be used
#for calculations.

for pop in district_ed.columns[8:]:
    district_ed[pop]=district_ed[pop].astype("float64")
    
district_unemp["Tyottomyysaste"]=district_unemp["Tyottomyysaste"].astype("float64")
district_unemp=district_unemp.rename(columns={"Tyottomyysaste": "Unemployment rate"})

In [17]:
#Rename the education dataframe columns to shorter ones.
district_ed.rename(columns={'Kaikki koulutusasteet': 'all', 'Tutkinnon suorittaneita yhteensä': 'ed_sum',
                  'Keskiaste':'mid_ed', 'Alin korkea- ja alempi korkeakouluaste': 'low_uni',
                  'Ylempi korkeakouluaste ja tutkijakoulutus': 'high_uni',
                  'Perusaste tai tuntematon': 'basic_edu'}, inplace=True)

In [18]:
#Calculate the proportions of education levels to separate columns of the education dataframe.
district_ed["Middle level education"]=round(district_ed["mid_ed"]/district_ed["all"]*100,2)
district_ed["Lower university education"]=round(district_ed["low_uni"]/district_ed["all"]*100,2)
district_ed["Higher university education"]=round(district_ed["high_uni"]/district_ed["all"]*100,2)
district_ed["Basic education"]=round(district_ed["basic_edu"]/district_ed["all"]*100,2)

In [19]:
#Classify the amount of votes with Natural Breaks and create a new column for the values.
#To be used for creating different sized markers for the map indicating the amount of votes from each area.

cl_votes=mapc.NaturalBreaks(y=vote_results_centroid["Yhteensä"], k=4)
vote_results_centroid["vote_cl"]=vote_results_centroid["Yhteensä"].apply(cl_votes).astype(int)

In [20]:
#Create a new index column called 'geoid' for unemployment and district dataframes.
#Folium requires an index column named 'geoid' to read the data from the rows correctly.

district_unemp['geoid'] = list(range(len(district_unemp)))
district_unemp.index=district_unemp["geoid"].astype(str)

district_ed['geoid'] = list(range(len(district_ed)))
district_ed.index=district_ed["geoid"].astype(str)

In [84]:
#Create a new folium map instance for the different layers.
m = folium.Map(location=[60.25, 24.8], zoom_start=9, control_scale=True, tiles="CartoDB positron", control=False)
m.name="Cardodb layer"

In [85]:
#Add the districts as a GeoJson map layout
style={'fillColor': 'Black', 'fillOpacity' : '0.05', 'color': 'Black', 'weight' : '0.5'}
folium.GeoJson(
    districts,
    name='Districts',
    control=True,
    style_function=lambda x: style
).add_to(m)

  return _prepare_from_string(" ".join(pjargs))


<folium.features.GeoJson at 0x1e8de5f6588>

In [86]:
#Add the voting areas as a GeoJson map layout
style_vote={'fillColor': 'Black', 'fillOpacity' : '0.05', 'color': 'Purple', 'weight' : '0.5'}
folium.GeoJson(
    voting_areas,
    name='Voting areas',
    overlay=True,
    show=False,
    style_function=lambda x: style_vote
).add_to(m)

  return _prepare_from_string(" ".join(pjargs))


<folium.features.GeoJson at 0x1e8de5f6438>

In [87]:
#Create a choropleth map on the distribution of unemployed people in each district.
unemp_map=folium.Choropleth(
    geo_data=district_unemp,
    name='Unemployment rate',
    legend_name='Unemployment rate (%)',
    data=district_unemp,
    columns=["geoid","Unemployment rate"],
    key_on='feature.properties.geoid',
    fill_color='YlOrRd',
    nan_fill_color="Grey",
    fill_opacity=0.5,
    line_opacity=0.2,
    line_color='black',
    line_weight=1,
    highlight=True,
    smooth_factor=1,
    show=False
).add_to(m)

#Add a GeoJson tooltip to the map element which shows the name and unemplyment statistics of the district
#when hovering over the map object
unemp_map.geojson.add_child(
folium.features.GeoJsonTooltip(fields=['Nimi', "Unemployment rate"])
)

  return _prepare_from_string(" ".join(pjargs))


<folium.features.GeoJson at 0x1e8debfa278>

In [88]:
#Create a choropleth map on the distribution of people with only basic education in each district.
edu_map=folium.Choropleth(
    geo_data=district_ed,
    name='People with basic education',
    legend_name='People with basic education (%)',
    data=district_ed,
    columns=["geoid","Basic education"],
    key_on='feature.properties.geoid',
    fill_color='YlOrRd',
    nan_fill_color="Grey",
    fill_opacity=0.5,
    line_opacity=0.2,
    line_color='black',
    line_weight=1,
    highlight=True,
    smooth_factor=1,
    show=False,
).add_to(m)

#Add a GeoJson tooltip to the map element which shows the name and education statistics of the district
#when hovering over the map object.
edu_map.geojson.add_child(
folium.features.GeoJsonTooltip(fields=['Nimi', "Basic education","Middle level education", "Lower university education", "Higher university education"])
)

  return _prepare_from_string(" ".join(pjargs))


<folium.features.GeoJson at 0x1e8dee1a080>

In [89]:
'''Iterate over the rows in the vote_results_centroid dataframe and create a folium Circle-element to each
location with color according to the winning party and size scaled to the amount of voters in the voting area.
Categorise these elements by the winning party and add them to separate FeatureGroups.
Also add a popup to each circle-element containing a Vincent-piechart on the distribution of votes in the area.
'''


#Create lists for the winning parties and their colors.
partycmap=["red", "turquoise", "blue", "lime"]
parties=["SDP", "PS", "KOK","VIHR"]

#Create an empty dictionary and add winning parties to it as Folium Feature groups with name and text color by
#party.
partyfglist={}
text_style='<span style="color: {col};">{txt}</span>'
for idx, color in enumerate(partycmap):
    partyfglist["{0}".format(parties[idx])]=folium.FeatureGroup(name= text_style.format(txt= parties[idx], col= color), overlay=True)

#Iterate over the vote_results_centoid dataframe
for i, row in vote_results_centroid.iterrows():
    '''    #Create a Vincent pie chart for each row from the voting data 
    vinchart=vincent.Pie(row[6:13], width=200, height=200)
    vinchart.legend(row["nimi"]+"\n Biggest party: "+row["winner"])
    vinchart.colors(brew="Set1")
    votechart=folium.Vega(vinchart.to_json(), width=450, height=200)'''
    
    #HTML-formating for the pop-up chart
    chartsrc= '<img src="/GitHub/Posanderi.github.io/test.png" alt="Cannot find the chart" width="500" height="500">'
    
    #Add a circle element to the map with color according to the winning party and size scaled according to the
    #amount of voters and add the formerly created Vincent chart to it as a popup.
    vote_circle=folium.Circle([row.geometry.y, row.geometry.x], radius=((row["vote_cl"]+5)*30),
                  stroke=False, fill=True, fill_opacity=0.9, color=funcs.party_cmap(party=row["winner"]),
                  popup=folium.Popup(html=chartsrc))
    '''popup=folium.Popup(row[46]).add_child(votechart))'''
    
    #Keep the circle in front of other map layers.
    m.keep_in_front(vote_circle)

    #Add the circle element to a feature group according to the winning party.
    for party in partyfglist:
        if row["winner"]==party:
            vote_circle.add_to(partyfglist[party])
            
#Add the party feature groups to the map.    
for party in partyfglist:
    partyfglist[party].add_to(m)

In [90]:
#Add a maplayer control tool to show/hide the different map elements
folium.map.LayerControl(collapsed=False).add_to(m)

<folium.map.LayerControl at 0x1e8dcef0eb8>

In [91]:
#Save the map to the root folder
m.save("map.html")