In [60]:
import folium
import pandas as pd
import numpy as np

In [61]:
# Import data from GeoName
import requests
import json

def getData(place):
    base_url = 'http://api.geonames.org/searchJSON'
    payload = {'username': 'adaisp',
               'country' : 'CH',
               'q' : place,
               'fcodeName' : 'university',
                }
    response = requests.get(base_url, params=payload)
    data = json.loads(response.text)
    try:
        i = 0
        results = {
                'canton': data['geonames'][i]['adminCode1'],
                'lat' : data['geonames'][i]['lat'],
                'long' : data['geonames'][i]['lng'],
            }
        while (data['geonames'][i]['adminName1'] == ''):
            i += 1
            results = {
                'canton': data['geonames'][i]['adminCode1'],
                'lat' : data['geonames'][i]['lat'],
                'long' : data['geonames'][i]['lng'],
            }
    except:
        results = {
            'canton': 'No Match',
            'lat' : 'No Match',
            'long' : 'No Match',
        }
        
    return results

In [62]:
grant_export = 'P3_GrantExport.csv'
raw_data = pd.read_csv(grant_export, sep=';', error_bad_lines=False)

uni_data = raw_data.loc[:, ['University', 'Approved Amount']]
uni_data[['Approved Amount']] = uni_data[['Approved Amount']].apply(pd.to_numeric, errors='coerce')

# drop the records where the field University is blank
uni_data = uni_data[uni_data.University != 'Nicht zuteilbar - NA']

# drop the records corresponding to companies or private sector
uni_data = uni_data[uni_data.University != 'Firmen/Privatwirtschaft - FP']

# drop the records corresponding to NPO
uni_data = uni_data[uni_data.University != 'NPO (Biblioth., Museen, Verwalt.) - NPO']

# we only take into account the Swiss Universities to check if we cover 95% of the records
number_valid_records = len(uni_data)

# compute the approved amount by University
uni_data = uni_data.groupby(['University'])['Approved Amount'].sum()

uni_data.sort_values(ascending=False, inplace=True)

In [63]:
data = pd.DataFrame(uni_data)
data['Records number'] = raw_data['University'].value_counts()
data['Canton'] = ['']*len(data)
data['Latitude'] = ['']*len(data)
data['Longitude'] = ['']*len(data)

for univ,row in data.iterrows():
    
    info = getData(univ.split(' - ')[0])
    
    if (info.get('canton') == 'No Match' and len(univ.split(' - ')) > 1):
        info = getData(univ.split(' - ')[1])

    data.set_value(univ, 'Canton', info.get('canton'))
    data.set_value(univ, 'Latitude', info.get('lat'))
    data.set_value(univ, 'Longitude', info.get('long'))

After getting informations from GeoNames API, we have to check if these informations cover at least 95% of the records in the raw_data:

In most of the cases we don't get any information from the API. So we try to find the name of the canton inside the name of the University/Institute. Then, we had to add everything manually.

In [64]:
# try to look for patterns in the University name
for univ, row in data[data.index.str.contains('Luzern')].iterrows():
    data.set_value(univ, 'Canton', 'LU')
    
for univ, row in data[data.index.str.contains('Zürcher')].iterrows():
    data.set_value(univ, 'Canton', 'ZH')

for univ, row in data[data.index.str.contains('Zürich')].iterrows():
    data.set_value(univ, 'Canton', 'ZH')
    
for univ, row in data[data.index.str.contains('Svizzera italiana')].iterrows():
    data.set_value(univ, 'Canton', 'TI')
    
for univ, row in data[data.index.str.contains('Bern')].iterrows():
    data.set_value(univ, 'Canton', 'BE')

for univ, row in data[data.index.str.contains('St. Gallen')].iterrows():
    data.set_value(univ, 'Canton', 'SG')

for univ, row in data[data.index.str.contains('Vaud')].iterrows():
    data.set_value(univ, 'Canton', 'VD')
    
for univ, row in data[data.index.str.contains('Wallis')].iterrows():
    data.set_value(univ, 'Canton', 'VS')

for univ, row in data[data.index.str.contains('Thurgau')].iterrows():
    data.set_value(univ, 'Canton', 'TG')

data.set_value('HES de Suisse occidentale - HES-SO', 'Canton', 'JU')
data.set_value('Inst. de Hautes Etudes Internat. et du Dév - IHEID', 'Canton', 'GE')
data.set_value('Forschungsanstalten Agroscope - AGS', 'Canton', 'ZH')
data.set_value('Idiap Research Institute - IDIAP', 'Canton', 'VS')
data.set_value('Friedrich Miescher Institute - FMI', 'Canton', 'BS')
data.set_value('Physikal.-Meteorolog. Observatorium Davos - PMOD', 'Canton', 'GR')
data.set_value('Allergie- und Asthmaforschung - SIAF', 'Canton', 'GR')
data.set_value('Swiss Institute of Bioinformatics - SIB', 'Canton', 'VD')
data.set_value('Fachhochschule Ostschweiz - FHO', 'Canton', 'SG')
data.set_value('Inst. universit. romand de Santé au Travail - IST', 'Canton', 'VD')
data.set_value('Schweizer Kompetenzzentrum Sozialwissensch. - FORS', 'Canton', 'VD')
data.set_value('Swiss Center for Electronics and Microtech. - CSEM', 'Canton', 'NE')
data.set_value('Interkant. Hochschule für Heilpädagogik ZH - HfH', 'Canton', 'ZH')
data.set_value('Institut Universitaire Kurt Bösch - IUKB', 'Canton', 'VS')
data.set_value('Ente Ospedaliero Cantonale - EOC', 'Canton', 'TI')
data.set_value('Schweiz. Institut für Kunstwissenschaft - SIK-ISEA', 'Canton', 'ZH')
data.set_value('Pädagogische Hochschule Nordwestschweiz - PHFHNW', 'Canton', 'AG')
data.set_value('AO Research Institute - AORI', 'Canton', 'GR')
data.set_value('Forschungsinstitut für biologischen Landbau - FIBL', 'Canton', 'AG')
data.set_value('Forschungsinstitut für Opthalmologie - IRO', 'Canton', 'VS')
data.set_value('Eidg. Hochschulinstitut für Berufsbildung - EHB', 'Canton', 'BE')
data.set_value('Institut für Kulturforschung Graubünden - IKG', 'Canton', 'GR')
data.set_value('Centre de rech. sur l\'environnement alpin - CREALP', 'Canton', 'VS')
data.set_value('Haute école pédagogique fribourgeoise - HEPFR', 'Canton', 'FR')
data.set_value('Haute école pédagogique BE, JU, NE - HEPBEJUNE', 'Canton', 'JU')
data.set_value('Pädagogische Hochschule Zug - PHZG', 'Canton', 'ZG')
data.set_value('Pädagogische Hochschule Graubünden - PHGR', 'Canton', 'GR')
data.set_value('Robert Walser-Stiftung Bern - RWS', 'Canton', 'BE')
data.set_value('Franklin University Switzerland - FUS', 'Canton', 'TI')
data.set_value('Instituto Ricerche Solari Locarno - IRSOL', 'Canton', 'TI')
data.set_value('Pädagogische Hochschule Schwyz - PHSZ', 'Canton', 'SZ')
data.set_value('Inst. Suisse de Spéléologie et Karstologie - ISSKA', 'Canton', 'NE')
data.set_value('Fachhochschule Kalaidos - FHKD', 'Canton', 'ZH')
data.set_value('Staatsunabh. Theologische Hochschule Basel - STHB', 'Canton', 'BS')
data.set_value('Schweiz. Hochschule für Logopädie Rorschach - SHLR', 'Canton', 'SG')
data.set_value('Pädagogische Hochschule Wallis - PHVS', 'Canton', 'VS')
data.set_value('Pädag. Hochschule Tessin (Teilschule SUPSI) - ASP', 'Canton', 'TI')
data.set_value('Facoltà di Teologia di Lugano - FTL', 'Canton', 'TI')
data.set_value('Pädagogische Hochschule Schaffhausen - PHSH', 'Canton', 'SH')
data.set_value('Forschungskommission SAGW', 'Canton', 'BE')
data.set_value('Fernfachhochschule Schweiz (Mitglied SUPSI) - FFHS', 'Canton', 'VS')

Unnamed: 0_level_0,Approved Amount,Records number,Canton,Latitude,Longitude
University,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Université de Genève - GE,1.838237e+09,6394,GE,46.20222,6.14569
Universität Zürich - ZH,1.826843e+09,6774,ZH,47.37092,8.53434
ETH Zürich - ETHZ,1.635597e+09,6153,ZH,47.3763,8.54805
Universität Bern - BE,1.519373e+09,5473,BE,46.95096,7.43548
Universität Basel - BS,1.352251e+09,4746,BS,47.55832,7.58403
Université de Lausanne - LA,1.183291e+09,4092,VD,46.52376,6.58409
EPF Lausanne - EPFL,1.175316e+09,4428,VD,46.51939,6.56673
Université de Fribourg - FR,4.575262e+08,2079,FR,46.80683,7.15317
Université de Neuchâtel - NE,3.832046e+08,1596,NE,46.99385,6.93789
Paul Scherrer Institut - PSI,1.152690e+08,538,AG,47.5385,8.23028


We see that after filling the dataframe, we have more than 95% of the cantons that are covered. So we stop there.

In [65]:
canton_found = data[data.Canton != 'No Match']
percentage = len(canton_found) / len(data)

percentage

0.9594594594594594

Now we create the dataframe containing the total approved amount for each canton. If there is no entry for a canton in the dataframe, we just put 0.

In [66]:
all_cantons = np.array(['AG','AI','AR','BE','BL','BS','FR','GE','GL','GR','LU',
                        'NE','NW','OW','SG','SO','SZ','TG','TI','UR','VD','VS','ZG','ZH','SH','JU'])
amounts = canton_found.groupby(['Canton'])['Approved Amount'].sum()

cantons_amount = []
for canton in all_cantons:
    if amounts.get(canton):
        cantons_amount.append(np.log10(amounts.get(canton)))
    else:
        cantons_amount.append(0)

cantons_amount_df = pd.DataFrame()
cantons_amount_df['Canton'] = all_cantons
cantons_amount_df['Amount'] = cantons_amount

To place markers on the map, we also create a dataframe containing the universities / institutes that have coordinates coming from the geonames API.
We can see that only 20 places are found by the API.

In [67]:
univ_found_api = canton_found[canton_found.Latitude != 'No Match']
univ_found_api.shape

(20, 5)

In [68]:
cantons_geo = 'ch-cantons.topojson.json'

swiss_map = folium.Map(location=[46.801111,8.226667], zoom_start=8)

for univ, row in univ_found_api.iterrows():
     folium.Marker([row['Latitude'], row['Longitude']],
                   popup=univ, 
                   icon = folium.Icon(color = 'red', icon = 'glyphicon-book')).add_to(swiss_map)

swiss_map.choropleth(geo_path=cantons_geo, 
                     topojson='objects.cantons', 
                     data=cantons_amount_df,columns=['Canton', 'Amount'],
                     key_on='feature.id',
                     threshold_scale=[4, 5, 6, 7, 8, 9],
                     line_opacity=0.2, fill_opacity=0.7, fill_color='YlOrBr',
                     legend_name='Approved amount (logarithmic scale)',
                    )
swiss_map

In [69]:
swiss_map.save("map_manual.html")