In [16]:
import numpy as np
import pandas as pd
import requests
import re
import os
import json

In [17]:
df = pd.read_csv('P3_GrantExport.csv', sep=';', index_col=0, na_values=['Nicht zuteilbar - NA'])

In [18]:
df.index.is_unique

True

In [19]:
df.University.describe()

count                       48393
unique                         76
top       Universität Zürich - ZH
freq                         6774
Name: University, dtype: object

In [20]:
def trim_university_name(full_name):
    match = re.match('^(.+?)(?: - .+)?$', full_name)
    if match is None:
        return None
    else:
        return match.group(1)

In [21]:
def get_api_key():
    with open('google_api_key.json') as f:
        json_data = json.load(f)
        return json_data['api_key']

In [27]:
def search_place(place_name):
    BASE_URL = 'https://maps.googleapis.com/maps/api/place/textsearch/json'
    
    api_key = get_api_key()
    parameters = {'key': api_key, 'query': place_name}
    
    r = requests.get(BASE_URL, params=parameters)
    response = r.json()

    if response['status'] != 'OK':
        return None
    else:
        result = response['results'][0]
        location = result['geometry']['location']
        place_id = result['place_id']
        
        return (place_id, location['lat'], location['lng'])

In [28]:
def search_canton(place_id):
    BASE_URL = 'https://maps.googleapis.com/maps/api/geocode/json'
    
    api_key = get_api_key()
    parameters = {'key': api_key, 'place_id': place_id}
    
    r = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params=parameters)
    response = r.json()
    
    if response['status'] != 'OK':
        return None
    else:
        result = response['results'][0]
        
        for component in result['address_components']:
            if 'administrative_area_level_1' in component['types']:
                return component['short_name']
            
        return None

In [118]:
def search_uni(university_name):
    place = search_place(university_name)
    if place is None:
        return None
    else:
        place_id, lat, lng = place
        canton = search_canton(place_id)
        return (canton, lat, lng)

In [59]:
uni_names = df.University.drop_duplicates().dropna().apply(lambda s: trim_university_name(s))

In [60]:
uni_infos = {university: search_uni(university) for university in uni_names}

In [138]:
uni_data = uni_names.map(lambda s: uni_infos[s] if uni_infos[s] is not None else (np.nan,) * 3)
unis = pd.DataFrame(uni_data.tolist(), columns=['Canton', 'Latitude', 'Longitude'], index=t.University)

In [139]:
unis.count()

Canton       60
Latitude     60
Longitude    60
dtype: int64

In [142]:
unis.to_csv('universities.csv')

In [144]:
unis.isnull()

Unnamed: 0_level_0,Canton,Latitude,Longitude
University,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Université de Genève,False,False,False
"NPO (Biblioth., Museen, Verwalt.)",True,True,True
Universität Basel,False,False,False
Université de Fribourg,False,False,False
Universität Zürich,False,False,False
Université de Lausanne,False,False,False
Universität Bern,False,False,False
"Eidg. Forschungsanstalt für Wald,Schnee,Land",True,True,True
Université de Neuchâtel,False,False,False
ETH Zürich,False,False,False
