# Interactive Vizualisation

As always, some necessary imports:

In [166]:
import folium
import pandas as pd
from bs4 import BeautifulSoup
import requests
from geopy.geocoders import Nominatim
import numpy
from geopy.exc import GeocoderTimedOut
from helpers import *

We prepare dictionary file to store cantons:

In [196]:
f = open('myfile.html','w')
f.write("<table>") # python will convert \n to os.linesep
f.close()

Then we read the data from the csv file and remove null values for "University" field:

In [197]:
data = pd.read_csv("P3_GrantExport.csv", delimiter=";")

data = data[data["University"].str.contains("Nicht zuteilbar")==False]
data = data[data["University"].str.contains("NaN")==False]

The data has the following form:

In [198]:
numpy.array(data)
data

Unnamed: 0,"﻿""Project Number""",Project Title,Project Title English,Responsible Applicant,Funding Instrument,Funding Instrument Hierarchy,Institution,University,Discipline Number,Discipline Name,Discipline Name Hierarchy,Start Date,End Date,Approved Amount,Keywords
1,4,Batterie de tests à l'usage des enseignants po...,,Massarenti Léonard,Project funding (Div. I-III),Project funding,Faculté de Psychologie et des Sciences de l'Ed...,Université de Genève - GE,10104,Educational science and Pedagogy,"Human and Social Sciences;Psychology, educatio...",01.10.1975,30.09.1976,41022.00,
2,5,"Kritische Erstausgabe der ""Evidentiae contra D...",,Kommission für das Corpus philosophorum medii ...,Project funding (Div. I-III),Project funding,Kommission für das Corpus philosophorum medii ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10101,Philosophy,Human and Social Sciences;Linguistics and lite...,01.03.1976,28.02.1985,79732.00,
3,6,Katalog der datierten Handschriften in der Sch...,,Burckhardt Max,Project funding (Div. I-III),Project funding,Abt. Handschriften und Alte Drucke Bibliothek ...,Universität Basel - BS,10302,Swiss history,Human and Social Sciences;Theology & religious...,01.10.1975,30.09.1976,52627.00,
4,7,Wissenschaftliche Mitarbeit am Thesaurus Lingu...,,Schweiz. Thesauruskommission,Project funding (Div. I-III),Project funding,Schweiz. Thesauruskommission,"NPO (Biblioth., Museen, Verwalt.) - NPO",10303,Ancient history and Classical studies,Human and Social Sciences;Theology & religious...,01.01.1976,30.04.1978,120042.00,
5,8,Die schweizerische Wirtschaftspolitik seit dem...,,Kleinewefers Henner,Project funding (Div. I-III),Project funding,"Séminaire de politique économique, d'économie ...",Université de Fribourg - FR,10203,Economics,"Human and Social Sciences;Economics, law",01.01.1976,31.12.1978,53009.00,
6,9,Theologische Forschungen zur Oekumene (Studien...,,Stirnimann Heinrich,Project funding (Div. I-III),Project funding,Institut für ökumenische Studien Université de...,Université de Fribourg - FR,10102,"Religious sciences, Theology",Human and Social Sciences;Theology & religious...,01.01.1976,31.12.1976,25403.00,
7,10,Konfuzianische Kulturwerte in der sozialen Ent...,,Deuchler Martina,Project funding (Div. I-III),Project funding,Ostasiatisches Seminar Universität Zürich,Universität Zürich - ZH,10301,History in general,Human and Social Sciences;Theology & religious...,01.10.1975,31.03.1977,47100.00,
8,11,Edizione degli scritti di Aurelio de' Giorgi B...,,Stäuble Antonio,Project funding (Div. I-III),Project funding,,Université de Lausanne - LA,10502,Romance languages and literature,Human and Social Sciences;Linguistics and lite...,01.10.1975,31.03.1977,25814.00,
9,13,La construction de nouveautés au sein des morp...,,Piaget Jean,Project funding (Div. I-III),Project funding,Laboratoire de Didactique et Epistémologie des...,Université de Genève - GE,10105,Psychology,"Human and Social Sciences;Psychology, educatio...",01.10.1975,30.09.1978,360000.00,
10,14,"Wissensstruktur, Fragegenese und Wissenserweit...",,Flammer August,Project funding (Div. I-III),Project funding,Klinische Psychologie und Psychotherapie Insti...,Université de Fribourg - FR,10105,Psychology,"Human and Social Sciences;Psychology, educatio...",01.10.1975,31.12.1978,153886.00,


Now, we drop the duplicates in the "University" column:

In [203]:
data2 = data.drop_duplicates(subset=data.columns[7])
df = data2[[data2.columns[0],'Institution','University']]

We apply function getCanton on our dataframe to get cantons for universities. For that we use Geonames API. We also combine two ways of looking up cantons: with the help of abbreviation found and without.

In [204]:
baseUrl="http://api.geonames.org/search?country=CH&username=nevena&q="
def getCanton(row,useAbr=False,isAbr=False,prefix=""):
    name=row[2]
    nid=name
    inst=row[1]
    pid=row[0]
    x=None
    if useAbr:
        x=getAbr(name)
        if len(x)>1: 
            name=x[0]
        else: return None
    url=baseUrl+name
    r  = requests.get(url.strip())
    data = r.text
    soup = BeautifulSoup(data,"lxml")
    try:
        lat=soup.find('lat').getText()
        lng=soup.find('lng').getText()
    except AttributeError:
        if (useAbr and len(x)>1): 
            return getCanton([pid,inst,x[1]],False,True,x[0])
        else: return None
    
    #avoid geolog timeout
    try:
        geolocator = Nominatim()
        location = geolocator.reverse([lat, lng])
        if location is None:
            return None
        else:
            try:
                canton=location.raw["address"]["state"]
                if isAbr: nid=prefix+" - "+nid
                f = open('myfile.html','a')
                f.write("<tr><td>{}</td><td>{}</td><td>{}</td></tr>".format(pid, nid,canton)) # python will convert \n to os.linesep
                f.close()
                return canton
            except KeyError:
                return None
    except GeocoderTimedOut:
        return getCanton(name)  

universities=numpy.array(df)
for i in range(0,universities.shape[0]-1):
    getCanton(universities[i])
    getCanton(universities[i],True)

We get data from the dictionary and drop duplicates. Below you can see how the dictionary looks like.

In [205]:
dictionary=pd.read_html("myfile.html",encoding='utf-8')[0]
dictionary=dictionary.drop_duplicates(subset=[0])
dictionary

Unnamed: 0,0,1,2
0,4,Université de Genève - GE,Genève
1,6,Universität Basel - BS,Basel-Stadt
3,8,Université de Fribourg - FR,Fribourg - Freiburg
5,10,Universität Zürich - ZH,Zürich
7,11,Université de Lausanne - LA,Vaud
8,30,Universität Bern - BE,Bern - Berne
10,39,"Eidg. Forschungsanstalt für Wald,Schnee,Land -...",Zürich
11,48,Université de Neuchâtel - NE,Neuchâtel
13,60,ETH Zürich - ETHZ,Zürich
14,106,Universität St. Gallen - SG,Sankt Gallen


Now, we want to assign cantons to the NPO fields. For that, we take the fields that contain "Univer" in "Institution" and "NPO" in "University". There are 60 entries. 

In [206]:
useInst=data[data["Institution"].str.contains("Univer")==True ]
useInst=useInst[useInst["University"].str.contains("NPO")==True]
useInst=useInst[[useInst.columns[0],'Institution','University']]
useInst.shape

(60, 3)

Now we get cantons using institution and our dictionary.

In [207]:
def getUniFromInst(inst):
    for i in range(0,dictionary[1].values.size):
        uni=dictionary[1].values[i].split(" - ")
        if uni[0] in inst[1]:
            canton=dictionary[2].values[i]
            f = open('myfile.html','a')
            f.write("<tr><td>{}</td><td>{}</td><td>{}</td></tr>".format(inst[0],inst[2], canton)) # python will convert \n to os.linesep
            f.close()
            return canton
        
institutions=numpy.array(useInst)
for i in range(0,institutions.shape[0]-1):
    getUniFromInst(institutions[i])

Get data from dictionary and drop duplicate values:

In [208]:
dictionary=pd.read_html("myfile.html",encoding='utf-8')[0]
dictionary=dictionary.drop_duplicates(subset=[0])

And then write down the found cantons in the dataframe:

In [182]:
def getFromDisc(row):
    pid=row[0]
    uni=row[7]
    if pid in dictionary[0].values:
        return numpy.array(dictionary.loc[dictionary[0]==pid][2])[0]
    elif ((uni in dictionary[1].values) and ("NPO" not in uni)):
        return numpy.array(dictionary.loc[dictionary[1]==uni][2])[0]
    else: return None

dataWithCanton=data
dataWithCanton["canton"]=dataWithCanton.apply(lambda row: getFromDisc(row),axis=1)
dataWithCanton

Unnamed: 0,"﻿""Project Number""",Project Title,Project Title English,Responsible Applicant,Funding Instrument,Funding Instrument Hierarchy,Institution,University,Discipline Number,Discipline Name,Discipline Name Hierarchy,Start Date,End Date,Approved Amount,Keywords,canton
1,4,Batterie de tests à l'usage des enseignants po...,,Massarenti Léonard,Project funding (Div. I-III),Project funding,Faculté de Psychologie et des Sciences de l'Ed...,Université de Genève - GE,10104,Educational science and Pedagogy,"Human and Social Sciences;Psychology, educatio...",01.10.1975,30.09.1976,41022.00,,Genève
2,5,"Kritische Erstausgabe der ""Evidentiae contra D...",,Kommission für das Corpus philosophorum medii ...,Project funding (Div. I-III),Project funding,Kommission für das Corpus philosophorum medii ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10101,Philosophy,Human and Social Sciences;Linguistics and lite...,01.03.1976,28.02.1985,79732.00,,
3,6,Katalog der datierten Handschriften in der Sch...,,Burckhardt Max,Project funding (Div. I-III),Project funding,Abt. Handschriften und Alte Drucke Bibliothek ...,Universität Basel - BS,10302,Swiss history,Human and Social Sciences;Theology & religious...,01.10.1975,30.09.1976,52627.00,,Basel-Stadt
4,7,Wissenschaftliche Mitarbeit am Thesaurus Lingu...,,Schweiz. Thesauruskommission,Project funding (Div. I-III),Project funding,Schweiz. Thesauruskommission,"NPO (Biblioth., Museen, Verwalt.) - NPO",10303,Ancient history and Classical studies,Human and Social Sciences;Theology & religious...,01.01.1976,30.04.1978,120042.00,,
5,8,Die schweizerische Wirtschaftspolitik seit dem...,,Kleinewefers Henner,Project funding (Div. I-III),Project funding,"Séminaire de politique économique, d'économie ...",Université de Fribourg - FR,10203,Economics,"Human and Social Sciences;Economics, law",01.01.1976,31.12.1978,53009.00,,Fribourg - Freiburg
6,9,Theologische Forschungen zur Oekumene (Studien...,,Stirnimann Heinrich,Project funding (Div. I-III),Project funding,Institut für ökumenische Studien Université de...,Université de Fribourg - FR,10102,"Religious sciences, Theology",Human and Social Sciences;Theology & religious...,01.01.1976,31.12.1976,25403.00,,Fribourg - Freiburg
7,10,Konfuzianische Kulturwerte in der sozialen Ent...,,Deuchler Martina,Project funding (Div. I-III),Project funding,Ostasiatisches Seminar Universität Zürich,Universität Zürich - ZH,10301,History in general,Human and Social Sciences;Theology & religious...,01.10.1975,31.03.1977,47100.00,,Zürich
8,11,Edizione degli scritti di Aurelio de' Giorgi B...,,Stäuble Antonio,Project funding (Div. I-III),Project funding,,Université de Lausanne - LA,10502,Romance languages and literature,Human and Social Sciences;Linguistics and lite...,01.10.1975,31.03.1977,25814.00,,Vaud
9,13,La construction de nouveautés au sein des morp...,,Piaget Jean,Project funding (Div. I-III),Project funding,Laboratoire de Didactique et Epistémologie des...,Université de Genève - GE,10105,Psychology,"Human and Social Sciences;Psychology, educatio...",01.10.1975,30.09.1978,360000.00,,Genève
10,14,"Wissensstruktur, Fragegenese und Wissenserweit...",,Flammer August,Project funding (Div. I-III),Project funding,Klinische Psychologie und Psychotherapie Insti...,Université de Fribourg - FR,10105,Psychology,"Human and Social Sciences;Psychology, educatio...",01.10.1975,31.12.1978,153886.00,,Fribourg - Freiburg


We can see that now we cover almost all the data (we assigned cantons to almost all entries):

In [183]:
#91.19 of data are classified by canton
x=dataWithCanton.shape[0]
y=dataWithCanton.loc[dataWithCanton["canton"].notnull()].shape[0]
percentage=y*100/x
percentage

91.29419544148948

In [184]:
#check what we couldn't classify
dataWithCanton.loc[dataWithCanton["canton"].isnull()]

Unnamed: 0,"﻿""Project Number""",Project Title,Project Title English,Responsible Applicant,Funding Instrument,Funding Instrument Hierarchy,Institution,University,Discipline Number,Discipline Name,Discipline Name Hierarchy,Start Date,End Date,Approved Amount,Keywords,canton
2,5,"Kritische Erstausgabe der ""Evidentiae contra D...",,Kommission für das Corpus philosophorum medii ...,Project funding (Div. I-III),Project funding,Kommission für das Corpus philosophorum medii ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10101,Philosophy,Human and Social Sciences;Linguistics and lite...,01.03.1976,28.02.1985,79732.00,,
4,7,Wissenschaftliche Mitarbeit am Thesaurus Lingu...,,Schweiz. Thesauruskommission,Project funding (Div. I-III),Project funding,Schweiz. Thesauruskommission,"NPO (Biblioth., Museen, Verwalt.) - NPO",10303,Ancient history and Classical studies,Human and Social Sciences;Theology & religious...,01.01.1976,30.04.1978,120042.00,,
26,36,Aktion Bauernhausforschung in der Schweiz,,Schweizerische Gesellschaft für Volkskunde,Project funding (Div. I-III),Project funding,Schweizerische Gesellschaft für Volkskunde,"NPO (Biblioth., Museen, Verwalt.) - NPO",10403,Ethnology,"Human and Social Sciences;Ethnology, social an...",01.10.1975,30.09.1976,225000.00,,
27,37,Erschliessung und Inventarisierung des Nachlas...,,Kuratorium Carl J. Burckhardt,Project funding (Div. I-III),Project funding,Kuratorium Carl J. Burckhardt,"NPO (Biblioth., Museen, Verwalt.) - NPO",10302,Swiss history,Human and Social Sciences;Theology & religious...,01.10.1975,30.04.1978,179124.00,,
30,40,Mittelalterliche Hartsteinschliffe. Abschluss ...,,Schnyder Rudolf,Project funding (Div. I-III),Project funding,Schweizerisches Nationalmuseum Landesmuseum Zü...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10404,Visual arts and Art history,"Human and Social Sciences;Art studies, musicol...",01.10.1975,30.09.1979,85938.00,,
31,41,Entretiens: Alexandre - image et réalité,,Fondation Hardt pour l'étude de l'Antiquité cl...,Project funding (Div. I-III),Project funding,Fondation Hardt pour l'étude de l'Antiquité cl...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10303,Ancient history and Classical studies,Human and Social Sciences;Theology & religious...,01.08.1975,31.10.1976,8902.00,,
33,43,Monographie über den Zeichner Emanuel Büchel (...,,Historische und Antiquarische Gesellschaft zu ...,Project funding (Div. I-III),Project funding,Historische und Antiquarische Gesellschaft zu ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10404,Visual arts and Art history,"Human and Social Sciences;Art studies, musicol...",01.10.1975,30.09.1976,54509.00,,
35,45,Monographien und Filme über altes Schweizer Ha...,,Schweizerisches Institut für Volkskunde (SIV),Project funding (Div. I-III),Project funding,Schweizerisches Institut für Volkskunde (SIV),"NPO (Biblioth., Museen, Verwalt.) - NPO",10403,Ethnology,"Human and Social Sciences;Ethnology, social an...",01.10.1975,31.10.1977,55000.00,,
37,47,L'image de Genève dans la conscience européenn...,,Candaux Jean-Daniel,Project funding (Div. I-III),Project funding,Bibliothèque de Genève Département des périodi...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10301,History in general,Human and Social Sciences;Theology & religious...,01.10.1975,30.09.1976,20000.00,,
52,65,Vorarbeiten für Band V der Basler Universitäts...,,Kommission zur Herausgabe der Basler Rektorats...,Project funding (Div. I-III),Project funding,Kommission zur Herausgabe der Basler Rektorats...,"NPO (Biblioth., Museen, Verwalt.) - NPO",10302,Swiss history,Human and Social Sciences;Theology & religious...,01.10.1975,30.09.1978,134138.00,,


We assume that there is enough data mapped to canton, so we will not use the remaining.  So we take our dataframe, drop the null ones, convert the amount to number and group by canton.

In [189]:
dfAmount = dataWithCanton[["Approved Amount","canton"]]
dfAmount = dfAmount.loc[dataWithCanton["canton"].isnull()==False]
dfAmount['Approved Amount'] = dfAmount.apply(lambda row: convertToNum(row['Approved Amount']),axis=1)
dfAmount["canton"]=dfAmount.apply(lambda row: row["canton"].split(" - ")[0],axis=1)
dfAmount = dfAmount.groupby("canton").sum()
dfAmount

Unnamed: 0_level_0,Approved Amount
canton,Unnamed: 1_level_1
Aargau,115428300.0
Basel-Landschaft,42771910.0
Basel-Stadt,1353452000.0
Bern,1534238000.0
Fribourg,457907200.0
Genève,1839987000.0
Luzern,41925890.0
Neuchâtel,383475200.0
Sankt Gallen,84229060.0
Ticino,24040080.0


In order to create the map, since we will be using canton abbreaviation, we need to map canton names to appropriate abbreviations. For that we download the list of cantons and concatenate with our dataframe.

In [191]:
cantons = pd.read_html("cantons.html",encoding='utf-8')[0]
cantons = cantons[[1,2]] 
new_header = cantons.iloc[0] #grab the first row for the header
cantons = cantons[1:] #take the data less the header row
cantons.rename(columns = new_header) #set the header row as the df header
cantons = cantons.set_index(2)

merged = pd.concat([cantons,dfAmount], axis=1).reset_index()
merged.columns=["Name","Canton","Amount"]

We replace the null values by 0 and round the amounts for it to be nicely displayed. The amount is now shown as amount * 10^4

In [192]:
def removeNan(row):
    if (pd.isnull(row["Amount"])):
        return 0 
    else: return round(row["Amount"]/10000,2)
    
merged["Amount"]=merged.apply(lambda row: removeNan(row),axis=1)

We have finally obtained a table with amounts of money grants per canton:

In [193]:
merged

Unnamed: 0,Name,Canton,Amount
0,Aargau,AG,11542.83
1,Appenzell Ausserrhoden,AR,0.0
2,Appenzell Innerrhoden,AI,0.0
3,Basel-Landschaft,BL,4277.19
4,Basel-Stadt,BS,135345.18
5,Bern,BE,153423.83
6,Fribourg,FR,45790.72
7,Genève,GE,183998.69
8,Glarus,GL,0.0
9,Graubünden,GR,0.0


We use this table, the topojson that is given and the folium library to produce the map. We use the linear orange coloring to show the difference in expenditure and also we define our own scale.

In [140]:
switzerland = folium.Map(location=[46.85, 8.3], zoom_start=8)
switzerland.choropleth(geo_path = "ch-cantons.topojson.json", 
                     data = merged.copy(),
                     columns = ['Canton', 'Amount'],
                     key_on = 'feature.id',
                     topojson = 'objects.cantons',
                     threshold_scale = [0, 2000, 50000, 200000, 300000, 500000],
                     fill_color='YlOrRd',
                     legend_name = 'Grant expenditure per canton'
                    )
switzerland.save('swiss.html')

The result can be viewed by checking out swiss.html file.