# Import the data

In [135]:
import pandas as pd
import requests
import math

In [136]:
df=pd.read_csv('P3_GrantExport.csv',sep=';',usecols=[0,6,7,13])

In [137]:
df=df.rename(columns={'\ufeff"Project Number"':'Project','Approved Amount':'Amount'})

In [138]:
df.describe()

Unnamed: 0,Project
count,63969.0
mean,84723.419453
std,53406.795178
min,1.0
25%,36692.0
50%,101689.0
75%,133018.0
max,171414.0


# Database creation

In [139]:
# Remove missing data
nan_string='data not included in P3'
# Amount NaN
df=df[~df.Amount.isin([nan_string])]
# Institution NaN
df=df[df.Institution.notnull() | (df.University.notnull() & ~df.University.isin(['Nicht zuteilbar - NA']))]
df.shape

(49823, 4)

In [140]:
# Change Amount type
df.Amount=pd.to_numeric(df.Amount)
df.dtypes

Project          int64
Institution     object
University      object
Amount         float64
dtype: object

In [141]:
# Setting index
df=df.set_index('Project')

In [142]:
df

Unnamed: 0_level_0,Institution,University,Amount
Project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,Faculté de Psychologie et des Sciences de l'Ed...,Université de Genève - GE,41022.0
5,Kommission für das Corpus philosophorum medii ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",79732.0
6,Abt. Handschriften und Alte Drucke Bibliothek ...,Universität Basel - BS,52627.0
7,Schweiz. Thesauruskommission,"NPO (Biblioth., Museen, Verwalt.) - NPO",120042.0
8,"Séminaire de politique économique, d'économie ...",Université de Fribourg - FR,53009.0
9,Institut für ökumenische Studien Université de...,Université de Fribourg - FR,25403.0
10,Ostasiatisches Seminar Universität Zürich,Universität Zürich - ZH,47100.0
11,,Université de Lausanne - LA,25814.0
13,Laboratoire de Didactique et Epistémologie des...,Université de Genève - GE,360000.0
14,Klinische Psychologie und Psychotherapie Insti...,Université de Fribourg - FR,153886.0


In [127]:
df
df=df.fillna('prova')
a=df.groupby(df['University'])
for i,group in a:
    if i=='prova':
        print(group)
df.shape

       Project                                        Institution University  \
9000     25782        UNI: Universität Basel Biozentrum  Basel CH      prova   
9001     25784  UNI: ETH-Hönggerberg Institut für Biophysik Zü...      prova   
9002     25787        UNI: Friedrich-Miescher Institut   Basel CH      prova   
9003     25788  UNI: ETH-Zürich Laboratorium für organische Ch...      prova   
9004     25793                UNI: Basel Mission Archiv  Basel CH      prova   
9005     25794         UNI: Friedrich Miescher-Institut  Basel CH      prova   
9006     25795  UNI: Univesity of Western Ontario Astronomy De...      prova   
9007     25796  UNI: Kantonsspital Basel Abt. Neurobiologie Ba...      prova   
9018     25826  UNI: Universitätsspital Zürich Departement f ü...      prova   
9019     25827  Physikalische Chemie Departement Chemie Univer...      prova   
9020     25831  UNI: Universität Zürich Physiologisches Inst i...      prova   
9021     25833  Institut Romand de Reche

(49823, 4)

In [143]:
df.head()

Unnamed: 0_level_0,Institution,University,Amount
Project,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,Faculté de Psychologie et des Sciences de l'Ed...,Université de Genève - GE,41022.0
5,Kommission für das Corpus philosophorum medii ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",79732.0
6,Abt. Handschriften und Alte Drucke Bibliothek ...,Universität Basel - BS,52627.0
7,Schweiz. Thesauruskommission,"NPO (Biblioth., Museen, Verwalt.) - NPO",120042.0
8,"Séminaire de politique économique, d'économie ...",Université de Fribourg - FR,53009.0


## Extraction of Canton

In [144]:
username='deatinor'
url='http://api.geonames.org/postalCodeSearchJSON?'

In [145]:
correspondencies_dictionary={}

In [146]:
params={'username':username,'maxRows':1,'country':'CH','operator':'OR'}
r=requests.get(url,params=params)

In [147]:
i=0
df_final=pd.DataFrame({'Canton':[]})
for block in df[['Institution','University']].itertuples(index=False):
    nan1=str(block[0])
    nan2=str(block[1])
    
    # By default take only the university.
    # The first time a value is added it is checked for differences if adding also the institution.
    if nan1=='nan':
        if nan2=='nan':
            raise('Bad preprocessing - double nan')
        query_string=block[1]
    elif nan2=='nan':
        query_string=block[0]
    else:
        query_string=block[0]+", "+block[1]
    
    
    # List of checks if already present in the dictionary:
    # 1- institution + university 
    # 2- university 
    # 3- query to geonames
    try:
        canton=correspondencies_dictionary[query_string]
        df2=pd.DataFrame({'Canton':[canton]})
        df_final=df_final.append(df2)
    except:
        try:
            query_string_university=str(block[1])
            canton=correspondencies_dictionary[query_string_university]
            df2=pd.DataFrame({'Canton':[canton]})
            df_final=df_final.append(df2)
        except:
            params['placename']=query_string
            r=requests.get(url,params=params)
            df1=pd.read_json(r.text,orient='records')
            canton=df1.postalCodes[0]

            if nan2!='nan':
                query_string_short=block[1]
                r=requests.get(url,params=params)
                df1=pd.read_json(r.text,orient='records')
                canton2=df1.postalCodes[0]
                if canton2==canton:
                    query_string=query_string_short
                else:
                    print(canton2,canton)
                 
            if canton['countryCode']!='CH':
#                 if nan2!='nan':
#                     print(2140358234)
#                 print(query_string)
#                 print(canton['countryCode'])
                continue
            df2=pd.DataFrame(canton,columns=['adminCode1'],index=['adminCode1'])
            df2=df2.rename(columns={'adminCode1':'Canton'})
            df_final=df_final.append(df2)
            correspondencies_dictionary[query_string]=df2.Canton[0]
    i+=1
    if i%5000==0:
        print(i)


5000
10000
15000
20000
25000
30000
35000
40000
45000


In [148]:
df_final.shape

(49450, 1)

In [149]:
len(correspondencies_dictionary)

1034

In [150]:
correspondencies_dictionary

{'Institut für Wirtschaftspädagogik Universität St. Gallen': 'SG',
 'Chair of Anglo-American and Comparative Law School of Law University of Lucerne': 'LU',
 'UNI: Universität Zürich Anorganisches-chemis ches Institut Winterthurerstrasse 190 Zürich CH': 'ZH',
 'Yale University School of Forestry and Environm. Studies': 'VD',
 'UNI: Royal Medical School Dept. of Infectiou s Diseases and Bacteriology London GB': 'VS',
 'UNI: Universität Bern M.E. Müller Institut f ür Biomechanik  Bern CH': 'BE',
 'Dept. of Egyptian Art Ny Carlsberg Glyptotek': 'VD',
 'UNI: Universität Bern M.E. Müller-Institut f ür Biomechanik  Bern CH': 'BE',
 "UNI: Fondation Hardt pour l'étude de l'antiq uité classique Vandoeuvres CH": 'GE',
 'UNI: Stiftung Schweizerische Osteuropa-Bibli othek  Bern CH': 'BE',
 'Institut für Energietechnik ETH Zürich': 'ZH',
 'UNI: Marie Meierhofer-Institut für das Kind Zürich CH': 'ZH',
 'UNI: University of Geneva School of Dental M edicine 19, rue Barthelemy-Menn Geneva CH': 'GE',
 '

In [151]:
df=df.reset_index()
del df['index']

KeyError: 'index'

In [None]:
del df['level_0']
del df['index']

In [153]:
df_final['index']=range(df_final.shape[0])
df_final=df_final.set_index(['index'])
df['Canton']=df_final['Canton']
df

Unnamed: 0,Project,Institution,University,Amount,Canton
0,4,Faculté de Psychologie et des Sciences de l'Ed...,Université de Genève - GE,41022.0,GE
1,5,Kommission für das Corpus philosophorum medii ...,"NPO (Biblioth., Museen, Verwalt.) - NPO",79732.0,BE
2,6,Abt. Handschriften und Alte Drucke Bibliothek ...,Universität Basel - BS,52627.0,BS
3,7,Schweiz. Thesauruskommission,"NPO (Biblioth., Museen, Verwalt.) - NPO",120042.0,BE
4,8,"Séminaire de politique économique, d'économie ...",Université de Fribourg - FR,53009.0,FR
5,9,Institut für ökumenische Studien Université de...,Université de Fribourg - FR,25403.0,FR
6,10,Ostasiatisches Seminar Universität Zürich,Universität Zürich - ZH,47100.0,ZH
7,11,,Université de Lausanne - LA,25814.0,VD
8,13,Laboratoire de Didactique et Epistémologie des...,Université de Genève - GE,360000.0,GE
9,14,Klinische Psychologie und Psychotherapie Insti...,Université de Fribourg - FR,153886.0,FR


# Folium visualization

In [154]:
a=df.groupby(df.Canton)

In [155]:
df_group=pd.DataFrame({'Canton':[],'Amount':[]})

In [156]:
for i,group in a:
    print(i)
    s=group['Amount'].sum()
    df_group=df_group.append(pd.DataFrame({'Canton':[i],'Amount':[s]}))

AG
AI
AR
BE
BL
BS
FR
GE
GR
JU
LU
NE
SG
SH
SO
SZ
TG
TI
VD
VS
ZG
ZH


In [157]:
canton_list=['ZH','BE','LU','UR','SZ','OW','NW','GL','ZG','FR','SO','BS','BL','SH','AR','AI','SG','GR','AG','TG','TI','VD','VS','NE','GE','JU']

In [158]:
for i in canton_list:
    isin=df_group.Canton.isin([i]).sum()
    if not isin:
        df_group=df_group.append(pd.DataFrame({'Canton':[i],'Amount':[0]}))

In [159]:
df_group

Unnamed: 0,Amount,Canton
0,149772700.0,AG
0,2517048.0,AI
0,113489.0,AR
0,1846871000.0,BE
0,547065.0,BL
0,1254777000.0,BS
0,538005200.0,FR
0,1658209000.0,GE
0,53243400.0,GR
0,80431660.0,JU


In [161]:
import folium
map_osm = folium.Map(location=[47, 7])
state_geo = r'ch-cantons.topojson.json'
map_osm.choropleth(geo_path=state_geo,key_on='feature.id',topojson='objects.cantons',data=df_group,
                   columns=['Canton','Amount'],fill_color='BuPu', fill_opacity=0.7, line_opacity=0.2,
                  legend_name='Unemployment Rate (%)')
map_osm.save('prova.html')

