# Setup (always run this!)

In [22]:
# importing modules
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
from pathlib import Path
import fiona
import gdal
import json

# setting up directories
data_dir = Path('data/')
lang_dir = data_dir / 'language_data'
gis_dir = data_dir / 'boundaries'

# function to reformat .json and .geojson files
# essentially improves readability of file content
def reformat_json(json_file: Path):
    if not json_file.exists():
        raise FileNotFoundError(f'Unable to find file {json_file}')
    with open(json_file) as f:
        data = json.load(f)
    with open(json_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=True, indent=4)

# Preprocessing for "graficos_immigracion_por_nacionalidad"

Loading and preprocessing immigration data

In [5]:
file_immigration = lang_dir / 'Karte1_inmigración_total_nacionalidad.xlsx'

df_immigration = pd.read_excel(file_immigration, sheet_name='Einwanderung_new')

df_immigration = df_immigration.rename(columns={'Nacionalidad': 'name'})

# used to retrieve flags from https://flag-icon-css.lip.is/
countryIDs = {
    'Argentinien': 'ar',
    'Bolivien': 'bo',
    'Chile': 'cl',
    'Colombia': 'co',
    'Costa Rica': 'cr',
    'Dominikanische Republik': 'do',
    'Ecuador': 'ec',
    'El Salvador': 'sv',
    'Guatemala': 'gt',
    'Honduras': 'hn',
    'Kolumbien': 'co',
    'Kuba': 'cu',
    'Mexiko': 'mx',
    'Nicaragua': 'ni',
    'Panama': 'pa',
    'Paraguay': 'py',
    'Peru': 'pe',
    'Spanien': 'es',
    'Uruguay': 'uy',
    'Venezuela': 've',  
};

# adding geometry column
df_immigration['code'] = df_immigration.apply(lambda row: countryIDs[row['name']], axis=1)


# setting primary key to country name
df_immigration = df_immigration.set_index('name')

df_immigration.head()

Unnamed: 0_level_0,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,code
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Argentinien,357,270,247,195,195,175,167,180,171,210,...,252,280,249,252,217,270,201,207,241,ar
Bolivien,95,79,80,76,86,81,71,91,97,89,...,252,185,148,135,117,134,156,167,167,bo
Chile,487,379,272,238,202,190,186,167,219,240,...,255,183,171,199,190,184,199,178,229,cl
Costa Rica,34,28,37,44,27,35,45,36,40,28,...,84,74,67,53,57,81,90,75,90,cr
Dominikanische Republik,275,555,694,659,741,631,562,565,511,548,...,416,392,392,447,336,393,341,325,281,do


Exporting as json file

In [6]:
save_dir = Path('graficos_immigracion_por_nacionalidad/')
output_file = save_dir / 'preprocessed_data.json'
df_immigration.to_json(output_file, orient='table')

# optional reformatting data
reformat_json(output_file)

# Preprocessing "mapas_poblacion_por_nacionalidad"

Loading and preprocessing municipality data

In [61]:
# functions that returns a preprocessed data frame for an input year
def preprocess_stats_map2(year):
    
    file_municipality_stats = lang_dir / 'Karte2_población_nacionalidad.xlsx'
    
    df_municipality_stats = pd.read_excel(file_municipality_stats, sheet_name=f'{year}_new')

    # removing all non-municipality rows

    # function to determine whether a row is a municipality or not
    def is_municipality(row):
        place_name = row['Ortschaft']
        name_start = place_name.split()[0]

        # cantons start with -
        if name_start == '-':
            return False
        # districts start with >>
        elif name_start == '>>':
            return False
        else:
            return True

    df_municipality_stats['is_municipality'] = df_municipality_stats.apply(is_municipality,axis=1)    
    dropIndices = df_municipality_stats[df_municipality_stats['is_municipality'] == False].index
    df_municipality_stats = df_municipality_stats.drop(dropIndices)


    # adding bfs number column and municipality name column

    def split_column(row):
        place_name = row['Ortschaft']

        # splitting at first space
        bfs_number, name = place_name.split(' ', 1)
        bfs_number = int(bfs_number[-4:])

        return pd.Series({'bfs_number':bfs_number, 'name_old':name})

    new_columns = df_municipality_stats.apply(split_column, axis=1)                     
    df_municipality_stats = df_municipality_stats.merge(new_columns, left_index=True, right_index=True)                

    # put name column first
    df_municipality_stats.insert(0, 'name', df_municipality_stats.name_old)


    # removing Ortschaft column and is municipality column
    df_municipality_stats = df_municipality_stats.drop(labels=['Ortschaft', 'is_municipality', 'name_old'], axis=1)


    # setting primary key to bfs number
    df_municipality_stats = df_municipality_stats.set_index('bfs_number')


    # adding Total count
    countries = list(df_municipality_stats.columns)[1:]
    def sum_countries(row):
        total = 0
        for country in countries:
            total += row[country]
        return total

    df_municipality_stats['Total'] = df_municipality_stats.apply(sum_countries, axis=1)
    
    # renaming country counts according to year
    old_names = countries + ['Total']
    new_names = [f'{old_name}_{year}' for old_name in old_names]
    
    df_municipality_stats = df_municipality_stats.rename(columns=dict(zip(old_names, new_names)))
    
    return df_municipality_stats
    # end of the preprocessing function


    
startYear, endYear = 2010, 2018

df_municipality_stats = preprocess_stats_map2(startYear)

for year in range(startYear+1, endYear+1):
    df_year = preprocess_stats_map2(year)    
    df_year = df_year.drop(columns='name')
    df_municipality_stats = df_municipality_stats.join(df_year, how='inner', lsuffix='', rsuffix='')


df_municipality_stats.head()

Unnamed: 0_level_0,name,Spanien_2010,Argentinien_2010,Bolivien_2010,Costa Rica_2010,Dominikanische Republik_2010,Ecuador_2010,El Salvador_2010,Guatemala_2010,Honduras_2010,...,Kolumbien_2018,Kuba_2018,Mexiko_2018,Nicaragua_2018,Panama_2018,Paraguay_2018,Peru_2018,Uruguay_2018,Venezuela_2018,Total_2018
bfs_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Aeugst am Albis,4,1,0,0,0,0,0,0,0,...,1,0,1,0,0,0,0,0,0,15
2,Affoltern am Albis,39,2,3,0,12,2,0,0,0,...,9,2,3,0,0,1,1,0,0,101
3,Bonstetten,12,2,0,0,2,0,0,0,0,...,0,0,1,0,0,0,1,0,0,19
4,Hausen am Albis,3,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,5,13
5,Hedingen,11,3,0,0,0,0,0,0,0,...,2,0,1,0,0,0,0,0,1,32


Loading and preprocessing municipalities polygons

In [62]:
file_municipalities = gis_dir / 'swissBOUNDARIES3D_1_3_TLM_HOHEITSGEBIET.shp'

gdf_municipalities = gpd.read_file(file_municipalities)

# change crs to wg84
gdf_municipalities = gdf_municipalities.to_crs({'init' :'epsg:4326'})

# subset to Switzerland
gdf_municipalities = gdf_municipalities[gdf_municipalities['ICC']=='CH']

# subset to municipalities (Kantonsgebiet is removed)
gdf_municipalities = gdf_municipalities[gdf_municipalities['OBJEKTART']=='Gemeindegebiet']


gdf_municipalities = gdf_municipalities[['BFS_NUMMER', 'NAME', 'EINWOHNERZ', 'GEM_FLAECH', 'KANTONSNUM', 'geometry']]

rename_dict = {
    'BFS_NUMMER': 'bfs_number',
    'NAME': 'name',
    'EINWOHNERZ': 'n_inhabitants',
    'GEM_FLAECH': 'area',
    'KANTONSNUM': 'canton_number'
}

gdf_municipalities = gdf_municipalities.rename(columns=rename_dict)

# setting primary key to bfs number
gdf_municipalities = gdf_municipalities.set_index('bfs_number')

print(gdf_municipalities.shape)

gdf_municipalities.head()

(2307, 5)


Unnamed: 0_level_0,name,n_inhabitants,area,canton_number,geometry
bfs_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3762,Scuol,4598.0,43861.0,18.0,"POLYGON Z ((10.42272 46.78824 2898.19782, 10.4..."
1631,Glarus Süd,9581.0,43003.0,8.0,"POLYGON Z ((8.93538 46.91985 2655.04194, 8.935..."
3746,Zernez,1532.0,34404.0,18.0,"POLYGON Z ((10.09693 46.84879 3253.89244, 10.0..."
3543,Surses,2343.0,32377.0,18.0,"POLYGON Z ((9.51989 46.49394 3100.30426, 9.520..."
6031,Bagnes,8100.0,28410.0,23.0,"POLYGON Z ((7.19637 46.11405 2387.52838, 7.197..."


Join municipality stats with boundaries

In [63]:
gdf_municipalities = gdf_municipalities.join(df_municipality_stats, how='left', lsuffix='_left', rsuffix='_right')
gdf_municipalities = gdf_municipalities.rename(columns={'name_left': 'name'})
gdf_municipalities = gdf_municipalities.drop(['name_right'], axis=1)

print(gdf_municipalities.shape)

gdf_municipalities.head()

(2307, 176)


Unnamed: 0_level_0,name,n_inhabitants,area,canton_number,geometry,Spanien_2010,Argentinien_2010,Bolivien_2010,Costa Rica_2010,Dominikanische Republik_2010,...,Kolumbien_2018,Kuba_2018,Mexiko_2018,Nicaragua_2018,Panama_2018,Paraguay_2018,Peru_2018,Uruguay_2018,Venezuela_2018,Total_2018
bfs_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Aeugst am Albis,1941.0,791.0,1.0,"POLYGON Z ((8.50216 47.26132 700.56696, 8.5020...",4.0,1.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0
2,Affoltern am Albis,12146.0,1059.0,1.0,"POLYGON Z ((8.42614 47.27914 558.98992, 8.4259...",39.0,2.0,3.0,0.0,12.0,...,9.0,2.0,3.0,0.0,0.0,1.0,1.0,0.0,0.0,101.0
3,Bonstetten,5512.0,743.0,1.0,"POLYGON Z ((8.48377 47.32378 676.24380, 8.4838...",12.0,2.0,0.0,0.0,2.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,19.0
4,Hausen am Albis,3664.0,1360.0,1.0,"POLYGON Z ((8.57437 47.21707 593.54844, 8.5743...",3.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,5.0,13.0
5,Hedingen,3694.0,653.0,1.0,"POLYGON Z ((8.47675 47.29307 791.40911, 8.4766...",11.0,3.0,0.0,0.0,0.0,...,2.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,32.0


Export as geojson

In [64]:
# export as geojson
# gdf_municipalities.to_file(f'../map2/map2.geojson', driver='GeoJSON', encoding='utf-8')

df_municipalities = pd.DataFrame(gdf_municipalities)
df_municipalities = df_municipalities.drop(columns=['geometry'])
# df_municipalities = df_municipalities.head(10)

df_municipalities = df_municipalities.reset_index()

print(df_municipalities.shape)

(2307, 176)


In [65]:
save_dir = Path('mapas_poblacion_por_nacionalidad')
output_file = save_dir / 'preprocessed_data.json'
df_municipalities.to_json(output_file, orient='columns')
reformat_json(output_file)

# Preprocessing "mapas_universidades"

Loading and preprocessing all bildung points

In [7]:
file_bildung = lang_dir / 'Masterlist_Bildung_Anzahlen.xlsx'

df_bildung = pd.read_excel(file_bildung)
df_bildung['geometry'] = df_bildung.apply(lambda x: Point(x['y'],x['x']), axis=1)
gdf_bildung = gpd.GeoDataFrame(df_bildung, geometry='geometry')


# setting primary key

gdf_unis = gdf_bildung[gdf_bildung['Typ'] == 'Universidad']
gdf_unis['name'] = gdf_unis['Name']
gdf_unis = gdf_unis.set_index('Name')
# print(gdf_unis.shape)

gdf_unis.head(100)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf_unis['name'] = gdf_unis['Name']


Unnamed: 0_level_0,ID,Typ,x,y,Total,geometry,name
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Universität Basel,,Universidad,47.558234,7.582858,90.0,POINT (7.58286 47.55823),Universität Basel
Universität Bern,,Universidad,46.950078,7.436919,120.0,POINT (7.43692 46.95008),Universität Bern
Université de Neuchâtel,,Universidad,46.993852,6.938709,,POINT (6.93871 46.99385),Université de Neuchâtel
Universität Fribourg,,Universidad,46.806263,7.152665,111.0,POINT (7.15266 46.80626),Universität Fribourg
Universität Genf,,Universidad,46.199784,6.142629,120.0,POINT (6.14263 46.19978),Universität Genf
Universität St. Gallen,,Universidad,47.431768,9.374598,398.0,POINT (9.37460 47.43177),Universität St. Gallen
Université de Lausanne,,Universidad,46.528964,6.574077,302.0,POINT (6.57408 46.52896),Université de Lausanne
Universität Zürich,,Universidad,47.374782,8.548334,124.0,POINT (8.54833 47.37478),Universität Zürich
CLC Centre for Languages and Communication (ZHAW),,Universidad,47.499921,8.720451,142.0,POINT (8.72045 47.49992),CLC Centre for Languages and Communication (ZHAW)


Loading and preprocessing statistics for universities

In [8]:
file_stats = lang_dir / 'Karte3_Universitäten.xlsx'

df_uni_stats = pd.read_excel(file_stats, sheet_name='studierende_unis')

# renaming column names because geojson does not accept integers as column names
old_names = range(1988,2019)
new_names = [str(old_name) for old_name in old_names]
df_uni_stats = df_uni_stats.rename(columns=dict(zip(old_names, new_names)))


# setting primary key
df_uni_stats = df_uni_stats.set_index('name')

df_uni_stats.head(10)

Unnamed: 0_level_0,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Universität Basel,25,23,23,26,35,38,44,45,46,40,...,18,9,7,6,5,8,5,5,4,2
Universität Bern,28,27,31,36,35,38,40,39,42,41,...,60,64,52,57,60,53,59,45,45,40
Universität Fribourg,16,19,20,19,21,23,30,24,24,21,...,73,76,70,69,68,60,57,60,60,53
Universität Genf,80,82,80,81,94,87,90,89,84,82,...,39,41,31,24,31,30,32,30,32,24
Université de Lausanne,17,20,18,21,26,32,38,41,43,39,...,21,24,22,23,24,22,17,10,8,9
Universität Zürich,101,100,108,96,104,105,86,67,85,88,...,119,124,112,107,109,100,83,71,54,49
Université de Neuchâtel,21,20,26,25,35,30,34,29,41,46,...,19,19,13,13,10,13,15,16,15,15


Combining points with statistics

In [9]:
gdf_unis_time_series = gdf_unis.join(df_uni_stats, how='inner', lsuffix='_left', rsuffix='_right')
gdf_unis_time_series = gdf_unis_time_series.drop(columns=['ID', 'Typ', 'x', 'y', 'Total '])

gdf_unis_time_series.head()

Unnamed: 0,geometry,name,1988,1989,1990,1991,1992,1993,1994,1995,...,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
Universität Basel,POINT (7.58286 47.55823),Universität Basel,25,23,23,26,35,38,44,45,...,18,9,7,6,5,8,5,5,4,2
Universität Bern,POINT (7.43692 46.95008),Universität Bern,28,27,31,36,35,38,40,39,...,60,64,52,57,60,53,59,45,45,40
Université de Neuchâtel,POINT (6.93871 46.99385),Université de Neuchâtel,21,20,26,25,35,30,34,29,...,19,19,13,13,10,13,15,16,15,15
Universität Fribourg,POINT (7.15266 46.80626),Universität Fribourg,16,19,20,19,21,23,30,24,...,73,76,70,69,68,60,57,60,60,53
Universität Genf,POINT (6.14263 46.19978),Universität Genf,80,82,80,81,94,87,90,89,...,39,41,31,24,31,30,32,30,32,24


Exporting as geojson

In [11]:
save_dir = Path('mapas_universidades')
output_file = save_dir / 'preprocessed_data.geojson'

gdf_unis_time_series.to_file(output_file, driver='GeoJSON', encoding='utf-8')
reformat_json(output_file)

# Preprocessing "mapas_universidades_graphico"

Loading and preprocessing Uni Stufe data

In [13]:
file_stats = lang_dir / 'Karte3_Universitäten.xlsx'

df_uni_stats = pd.read_excel(file_stats, sheet_name='studierende_stufe')

# renaming column names because geojson does not accept integers as column names
old_names = range(1988,2019)
new_names = [str(old_name) for old_name in old_names]
df_uni_stats = df_uni_stats.rename(columns=dict(zip(old_names, new_names)))

print(list(df_uni_stats['type']))

dict_type_abbreviations = {
    'Total': 't',
    'Lizenziat / Diplom': 'ld',
    'Bachelor': 'b',
    'Master': 'm',
    'Doktorat': 'd',
    'Weiterbildung, Vertiefung und andere': 'wva'
}
df_uni_stats['code'] = df_uni_stats.apply(lambda x: dict_type_abbreviations[x['type']], axis=1)


df_uni_stats.head(10)

['Total', 'Lizenziat / Diplom', 'Bachelor', 'Master', 'Doktorat', 'Weiterbildung, Vertiefung und andere']


Unnamed: 0,type,1988,1989,1990,1991,1992,1993,1994,1995,1996,...,2010,2011,2012,2013,2014,2015,2016,2017,2018,code
0,Total,288,291,306,304,350,353,362,334,365,...,357,307,299,307,286,268,237,218,192,t
1,Lizenziat / Diplom,243,245,255,258,301,316,335,311,328,...,81,54,43,35,30,14,8,5,12,ld
2,Bachelor,0,0,0,0,0,0,0,0,0,...,143,115,127,114,104,107,91,66,50,b
3,Master,0,0,0,0,0,0,0,0,0,...,70,80,80,101,89,85,74,90,72,m
4,Doktorat,28,30,34,30,35,28,23,19,27,...,58,49,42,54,54,54,57,55,50,d
5,"Weiterbildung, Vertiefung und andere",17,16,17,16,14,9,4,4,10,...,5,9,7,3,9,8,7,2,8,wva


Exporting as json

In [14]:
save_dir = Path('mapas_universidades_graphico')
output_file = save_dir / 'preprocessed_data.json'

df_uni_stats.to_json(output_file, orient='table')
reformat_json(output_file)

# Preprocessing "mapas_centros_de_idiomas"

Loading and preprocessing sprachzentren data

In [15]:
file_centers = lang_dir / 'SprachenzentrenUniversitäten.xlsx'
df_centers = pd.read_excel(file_centers, sheet_name='Tabelle1')
df_centers.head()

Unnamed: 0,ID,Name,x,y,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018
0,,centro de idiomas UZH y ETH,47.374604,8.548851,1182,1370,1449,1559,1471,1468,935,953,939,929
1,,centro de idiomas de la HSLU,47.046667,8.314852,0,0,218,251,215,199,202,204,224,188
2,,centro de idiomas de la UniBas,47.558528,7.583869,257,269,301,331,356,373,483,502,425,437
3,,centro de idiomas de la UNIL,46.523665,6.584301,402,402,420,484,455,483,482,449,464,401


In [16]:

df_centers['geometry'] = df_centers.apply(lambda x: Point(x['y'],x['x']), axis=1)

gdf_centers = gpd.GeoDataFrame(df_centers, geometry='geometry')

gdf_centers = gdf_centers.drop(columns=['ID', 'x', 'y'])
gdf_centers = gdf_centers.rename(columns={'Name': 'name'})

# renaming column names because geojson does not accept integers as column names
old_names = range(2009, 2019)
new_names = [str(old_name) for old_name in old_names]
gdf_centers = gdf_centers.rename(columns=dict(zip(old_names, new_names)))

gdf_centers.head()

Unnamed: 0,name,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,geometry
0,centro de idiomas UZH y ETH,1182,1370,1449,1559,1471,1468,935,953,939,929,POINT (8.54885 47.37460)
1,centro de idiomas de la HSLU,0,0,218,251,215,199,202,204,224,188,POINT (8.31485 47.04667)
2,centro de idiomas de la UniBas,257,269,301,331,356,373,483,502,425,437,POINT (7.58387 47.55853)
3,centro de idiomas de la UNIL,402,402,420,484,455,483,482,449,464,401,POINT (6.58430 46.52367)


Exporting as geojson

In [17]:
save_dir = Path('mapas_centros_de_idiomas')
output_file = save_dir / 'preprocessed_data.geojson'

gdf_centers.to_file(output_file, driver='GeoJSON', encoding='utf-8')
reformat_json(output_file)

# Preprocessing "mapas_academias_de_idiomas"

Loading and preprocessing Sprachschulen data

In [56]:
file_schools = lang_dir / 'Karte5_Sprachschulen_Nov.xlsx'

df_schools = pd.read_excel(file_schools, sheet_name='Tabelle1')

df_schools = df_schools.rename(columns={
    'Typ': 'type',
    'Name': 'name',
    'Adresse': 'address',
    'Webseite': 'website'
})

# setting primary key to country name
df_schools = df_schools.set_index('ID')

# converting to geodataframe
df_schools['geometry'] = df_schools.apply(lambda x: Point(x['x'],x['y']), axis=1)
gdf_schools = gpd.GeoDataFrame(df_schools, geometry='geometry')

# getting rid of unnecessary columns
gdf_schools = gdf_schools.drop(columns=['x', 'y', 'Unnamed: 7'])

df_schools.head()

Unnamed: 0_level_0,type,name,y,x,address,website,Unnamed: 7,geometry
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
#230,Escuela de idiomas,Klubschule Migros St. Gallen,47.425059,9.376588,"Marktgasse 17, 9000 St. Gallen",https://www.klubschule.ch/Standorte/Ostschweiz...,,POINT (9.37659 47.42506)
#231,Escuela de idiomas,Klubschule Migros Arbon,47.514926,9.430403,"Rebenstrasse 20, 9320 Arbon",https://www.klubschule.ch/Standorte/Ostschweiz...,,POINT (9.43040 47.51493)
#232,Escuela de idiomas,Klubschule Migros Buchs,7.164124,9.47369,"Churerstrasse 7, 9470 Buchs",https://www.klubschule.ch/Standorte/Ostschweiz...,,POINT (9.47369 7.16412)
#233,Escuela de idiomas,Klubschule Migros Chur,46.852501,9.525666,"Gartenstrasse 5, 7001 Chur",https://www.klubschule.ch/Standorte/Ostschweiz...,,POINT (9.52567 46.85250)
#234,Escuela de idiomas,Klubschule Migros Frauenfeld,47.556236,8.896414,"Rheinstrasse 10, 8500 Frauenfeld",https://www.klubschule.ch/Standorte/Ostschweiz...,,POINT (8.89641 47.55624)


Export as geojson

In [60]:
save_dir = Path('mapas_academias_de_idiomas')
output_file = save_dir / 'preprocessed_data.geojson'
gdf_schools.to_file(output_file, driver='GeoJSON', encoding='utf-8')
reformat_json(output_file)

# Preprocessing "mapas_sedes_diplomaticas"

Loading embassy data and preprocessing

In [18]:
file_embassy = lang_dir / 'Consulados_embajadas_Dez.xlsx'
df_embassy = pd.read_excel(file_embassy)
print(df_embassy.columns)

df_embassy['geometry'] = df_embassy.apply(lambda x: Point(x['y'],x['x']), axis=1)
gdf_embassy = gpd.GeoDataFrame(df_embassy, geometry='geometry')




# renaming columns
gdf_embassy = gdf_embassy.rename(columns={
    'País': 'country',
    'Institution': 'institution',
    'Unnamed: 3': 'name',
    'Adresse': 'address',
    'Webseite': 'website'
})

gdf_embassy = gdf_embassy.drop(columns=['http://suiza.oficinascomerciales.es', 'x', 'y'])

print(gdf_embassy['country'].unique())



# used to retrieve flags from https://flag-icon-css.lip.is/
countryIDs = {
    'Argentina': 'ar',
    'Bolivia': 'bo',
    'Chile': 'cl',
    'Colombia': 'co',
    'Costa Rica': 'cr',
    'Cuba': 'cu',
    'Ecuador': 'ec',
    'El Salvador': 'sv',
    'España': 'es',
    'Guatemala': 'gt',
    'Honduras': 'hn',
    'México': 'mx',
    'Nicaragua': 'ni',
    'Panamá': 'pa',
    'Paraguay': 'py',
    'Perú': 'pe',
    'Dom. Rep.': 'do',
    'Uruguay': 'uy',
    'Venezuela': 've',
};
# adding country codes
gdf_embassy['code'] = gdf_embassy.apply(lambda row: countryIDs[row['country']], axis=1)

"""

# setting primary key
# df_embassy = df_embassy.set_index('ID')




gdf_embassy = gdf_embassy.drop(columns=['x', 'y'])

"""
gdf_embassy.head()



Index(['http://suiza.oficinascomerciales.es', 'Institution', 'País',
       'Unnamed: 3', 'Adresse', 'Webseite', 'x', 'y'],
      dtype='object')
['Argentina' 'Bolivia' 'Chile' 'Colombia' 'Costa Rica' 'Cuba' 'Dom. Rep.'
 'Ecuador' 'El Salvador' 'España' 'Guatemala' 'Honduras' 'México'
 'Nicaragua' 'Panamá' 'Paraguay' 'Perú' 'Uruguay' 'Venezuela']


Unnamed: 0,institution,country,name,address,website,geometry,code
0,embajada,Argentina,Embajada Argentina en Berna,"Jungfraustrasse 1, 3005 Berne",http://www.esuiz.mrecic.gob.ar,POINT (7.45470 46.94399),ar
1,consulado,Bolivia,Consulado General Estado Plurinacional de Bolivia,"Rue de Lausanne 72, 1202 Genève",http://www.consuladoboliviasuiza.com,POINT (6.14749 46.21572),bo
2,embajada,Chile,Embajada de Chile ante la Confederación Suiza,"Eigerplatz 5, 3007 Berne",https://chile.gob.cl/suiza/en/,POINT (7.43163 46.94096),cl
3,consulado,Chile,Consulado de Chile en Zúrich,"Fuhrstrasse 12, Unterer Leihof, 8820 Wädenswil",https://www.embassypages.com/chile-consulado-z...,POINT (8.66930 47.22584),cl
4,embajada,Colombia,Embajada de Colombia en Suiza,"Zieglerstrasse 29, 3007 Berne",http://suiza.embajada.gov.co,POINT (7.42996 46.94501),co


Saving embassy to geojson

In [23]:
save_dir = Path('mapas_sedes_diplomaticas')
output_file = save_dir / 'preprocessed_data.geojson'

gdf_embassy.to_file(output_file, driver='GeoJSON', encoding='utf-8')
reformat_json(output_file)

# Preprocessing "mapas_institutos_o_escuelas_secundarias"

Loading Kantis data and preprocessing

In [27]:
file_kantis = lang_dir / 'Karte7_Kantis_Total.xlsx'
df_kantis = pd.read_excel(file_kantis, sheet_name='Tabelle1')

df_kantis['geometry'] = df_kantis.apply(lambda x: Point(x['y'],x['x']), axis=1)

gdf_kantis = gpd.GeoDataFrame(df_kantis, geometry='geometry')


# renaming columns
gdf_kantis = gdf_kantis.rename(columns={
    'Name': 'name',
    'Adresse': 'address'
})


gdf_kantis = gdf_kantis[['name', 'address', 'geometry']]

print(gdf_kantis.shape)
gdf_kantis.head()



(108, 3)


Unnamed: 0,name,address,geometry
0,Academia Engiadina Samedan Mittelschule,"Quadratscha 18, 7503 Samedan",POINT (6.65844 46.50657)
1,Alte Kantonsschule Aarau,"Bahnhofstrasse 91, 5001 Aarau",POINT (8.05317 47.39355)
2,Bündner Kantonsschule,"Arosastrasse 2, 7000 Chur",POINT (9.53632 46.84853)
3,Collège Calvin,"Rue Théodore De-Bèze 2-4, 1206 Genève",POINT (6.15207 46.20045)
4,Collège Claparède,"Chemin de Fossard 61, 1231 Chêne-Bougeries",POINT (6.18495 46.18937)


Loading and preprocessing canton borders

In [29]:
file_cantons = gis_dir / 'boundaries_swiss_cantons.shp'
gdf_cantons = gpd.read_file(file_cantons)
print(gdf_cantons.columns)
gdf_cantons = gdf_cantons[['NAME', 'KT_TEIL', 'EINWOHNERZ', 'geometry']]
gdf_cantons = gdf_cantons.rename(columns={'NAME': 'name'})

gdf_cantons['geometry']


# remapping names
def remap_names(row):
    dict_remap_names = {
        'Graubï¿½nden': 'Graubünden',
        'Zï¿½rich': 'Zürich',
        'Genï¿½ve': 'Genève',
        'Neuchï¿½tel': 'Neuchâtel',
    }    
    old_name = row['name']
    new_name = dict_remap_names.get(old_name,False)
    if not new_name:
        return old_name
    else:
        return new_name    
gdf_cantons['name'] = gdf_cantons.apply(remap_names, axis=1)

gdf_cantons.head()

Index(['UUID', 'DATUM_AEND', 'DATUM_ERST', 'ERSTELL_J', 'ERSTELL_M',
       'REVISION_J', 'REVISION_M', 'GRUND_AEND', 'HERKUNFT', 'HERKUNFT_J',
       'HERKUNFT_M', 'OBJEKTART', 'REVISION_Q', 'ICC', 'KANTONSNUM',
       'SEE_FLAECH', 'KANTONSFLA', 'KT_TEIL', 'NAME', 'EINWOHNERZ',
       'geometry'],
      dtype='object')


Unnamed: 0,name,KT_TEIL,EINWOHNERZ,geometry
0,Graubünden,0,197888.0,"POLYGON Z ((8.87705 46.81291 3062.87625, 8.877..."
1,Bern,1,1031126.0,"POLYGON Z ((7.15352 46.98628 433.29750, 7.1524..."
2,Valais,0,341463.0,"POLYGON Z ((8.47763 46.52762 3024.43500, 8.477..."
3,Vaud,1,793129.0,"POLYGON Z ((6.77983 46.85296 429.24750, 6.7688..."
4,Ticino,0,353709.0,"POLYGON Z ((8.47763 46.52762 3024.43500, 8.477..."


Assigning canton to each kanti

In [30]:


list_names = list(gdf_cantons['name'])
list_boundaries = list(gdf_cantons['geometry'])

def in_canton(row):
    point = Point(row['geometry'])
    for name, boundary in zip(list_names, list_boundaries):
        if point.within(boundary):
            return name
    return 'not found'

gdf_kantis['canton'] = gdf_kantis.apply(in_canton, axis=1)

gdf_kantis.head()

Unnamed: 0,name,address,geometry,canton
0,Academia Engiadina Samedan Mittelschule,"Quadratscha 18, 7503 Samedan",POINT (6.65844 46.50657),Vaud
1,Alte Kantonsschule Aarau,"Bahnhofstrasse 91, 5001 Aarau",POINT (8.05317 47.39355),Aargau
2,Bündner Kantonsschule,"Arosastrasse 2, 7000 Chur",POINT (9.53632 46.84853),Graubünden
3,Collège Calvin,"Rue Théodore De-Bèze 2-4, 1206 Genève",POINT (6.15207 46.20045),Genève
4,Collège Claparède,"Chemin de Fossard 61, 1231 Chêne-Bougeries",POINT (6.18495 46.18937),Genève


Export kantis data as geojson

In [35]:
save_dir = Path('mapas_institutos_o_escuelas_secundarias')

absolute_file = save_dir / 'preprocessed_data_absolute.geojson'
# df_cantons.to_json(absolute_file, orient='records', encoding='utf-8')
gdf_cantons.to_file(absolute_file, driver='GeoJSON', encoding='utf-8')
reformat_json(absolute_file)

kantis_file = save_dir / 'preprocessed_data_kantis.geojson'
gdf_kantis.to_file(kantis_file, driver='GeoJSON', encoding='utf-8')
# reformat_json(kantis_file)

Get number of kantis for each canton

In [36]:
gdf_kantis['count'] = 1
dict_n_kantis = gdf_kantis.groupby(['canton']).sum()['count'].to_dict()
# print(dict_n_kantis)
def n_kantis(row):
    return dict_n_kantis.get(row['name'], 0)
gdf_cantons['n_kantis'] = gdf_cantons.apply(n_kantis, axis=1, result_type='expand')


dict_n_inhabitants = gdf_cantons.groupby(['name'])['EINWOHNERZ'].sum().to_dict()
# print(dict_n_inhabitants)
gdf_cantons['n_inhabitants'] = gdf_cantons.apply(lambda row: dict_n_inhabitants[row['name']], axis=1)

gdf_cantons = gdf_cantons.drop(columns=['EINWOHNERZ'])
gdf_cantons = gdf_cantons.rename(columns={'KT_TEIL': 'part'})


gdf_cantons.head()


Unnamed: 0,name,part,geometry,n_kantis,n_inhabitants
0,Graubünden,0,"POLYGON Z ((8.87705 46.81291 3062.87625, 8.877...",4,197888.0
1,Bern,1,"POLYGON Z ((7.15352 46.98628 433.29750, 7.1524...",9,1031126.0
2,Valais,0,"POLYGON Z ((8.47763 46.52762 3024.43500, 8.477...",0,341463.0
3,Vaud,1,"POLYGON Z ((6.77983 46.85296 429.24750, 6.7688...",13,793129.0
4,Ticino,0,"POLYGON Z ((8.47763 46.52762 3024.43500, 8.477...",5,353709.0


Export data for choropleth map (kantis per inhabitants)

In [37]:
gdf_cantons_choropleth = gpd.GeoDataFrame(gdf_cantons, geometry='geometry')

choropleth_file = save_dir / 'preprocessed_data_choropleth.geojson'
gdf_cantons_choropleth.to_file(choropleth_file, driver='GeoJSON', encoding='utf-8')

Subset cantons such that there is only 1 entry (polygon) per canton.
In case of multiple polygons, the largest one is used.

In [38]:
gdf_cantons = gdf_cantons[(gdf_cantons['KT_TEIL'] == '0') | (gdf_cantons['KT_TEIL'] == '1')]

print(gdf_cantons.shape)
gdf_cantons.head(26)

KeyError: 'KT_TEIL'

Compute centroid for each polygon (centroid will correspond to the marker position on the map)

In [34]:
# adding centroid
gdf_cantons['geometry'] = gdf_cantons.apply(lambda x: Polygon(x['geometry']).centroid, axis=1)

In [35]:
gdf_cantons['x'] = gdf_cantons.apply(lambda row: row['geometry'].x, axis=1)
gdf_cantons['y'] = gdf_cantons.apply(lambda row: row['geometry'].y, axis=1)
gdf_cantons.head()

Unnamed: 0,name,KT_TEIL,EINWOHNERZ,geometry,n_kantis,x,y
0,Graubünden,0,197888.0,POINT (9.62862381268349 46.65606579160512),4,9.628624,46.656066
1,Bern,1,1031126.0,POINT (7.624744294648847 46.82208787558518),9,7.624744,46.822088
2,Valais,0,341463.0,POINT (7.605940034344669 46.20935513994819),0,7.60594,46.209355
3,Vaud,1,793129.0,POINT (6.646681769033475 46.55944971911612),13,6.646682,46.55945
4,Ticino,0,353709.0,POINT (8.808554728958551 46.29606041574468),5,8.808555,46.29606


Exporting as geojson

In [36]:
gdf_cantons = gdf_cantons.drop(columns=['KT_TEIL', 'EINWOHNERZ', 'geometry'])
df_cantons = pd.DataFrame(gdf_cantons)
df_cantons.to_json(f'../map7/map7_absolute.json', orient='records', encoding='utf-8')

# gdf_cantons = gpd.GeoDataFrame(gdf_cantons, geometry='geometry')
# gdf_cantons.to_file(f'../map7/map7_absolut.geojson', driver='GeoJSON', encoding='utf-8')

TypeError: to_json() got an unexpected keyword argument 'encoding'

# Preprocessing "mapas_centros_y_asociaciones_educativas"

Loading all language data (Bildung, Sprachschulen and Botschaften) and adding adding geometry columns 

In [39]:
file_master = lang_dir / 'Karte7_Bildung_Dez.xlsx'
df_bildung = pd.read_excel(file_master, sheet_name='Tabelle1')

print(df_bildung.columns)

df_bildung['geometry'] = df_bildung.apply(lambda x: Point(x['y'],x['x']), axis=1)
gdf_bildung = gpd.GeoDataFrame(df_bildung, geometry='geometry')

gdf_bildung = gdf_bildung.drop(columns=['ID', 'x', 'y'])
gdf_bildung = gdf_bildung.rename(columns={
    'Typ': 'type',
    'Total ': 'total',
    'Name': 'name'
})

print(gdf_bildung['type'].unique())


# assign abbreviations for the different types of schools
dict_type_abbreviations = {
    'ALCE': 'ALCE',
    'Centro de idiomas ': 'CDI',
    'DELE': 'DELE',
    'Enseñanza primaria ': 'EP',
    'Instituto de ensenãnza media': 'IDEM',
    'Universidad': 'U'
}
gdf_bildung['type_abbr'] = gdf_bildung.apply(lambda x: dict_type_abbreviations[x['type']], axis=1)

print(gdf_bildung.shape)
print(gdf_bildung['type'].unique())
gdf_bildung.head()

Index(['ID', 'Typ', 'x', 'y', 'Total ', 'Name'], dtype='object')
['ALCE' 'Centro de idiomas ' 'DELE' 'Enseñanza primaria '
 'Instituto de ensenãnza media' 'Universidad']
(222, 5)
['ALCE' 'Centro de idiomas ' 'DELE' 'Enseñanza primaria '
 'Instituto de ensenãnza media' 'Universidad']


Unnamed: 0,type,total,name,geometry,type_abbr
0,ALCE,30,ALCE,POINT (8.04570 47.39043),ALCE
1,ALCE,38,ALCE,POINT (7.60051 47.47449),ALCE
2,ALCE,71,ALCE,POINT (8.30982 47.47137),ALCE
3,ALCE,66,ALCE,POINT (7.60546 47.56364),ALCE
4,ALCE,67,ALCE,POINT (7.58897 47.56993),ALCE


Saving as geojson

In [46]:
save_dir = Path('mapas_centros_y_asociaciones_educativas')
output_file = save_dir / 'preprocessed_data.geojson'
gdf_bildung.to_file(output_file, driver='GeoJSON', encoding='utf-8')
# reformat_json(output_file)

# Preprocessing "mapas_espacios_culturales_y_sociales"

Loading and preprocessing cultural data

In [48]:
file_culture = lang_dir / 'Karte6_VereineKulturzentren.xlsx'
df_culture = pd.read_excel(file_culture, sheet_name='asociaciones')


df_culture = df_culture.rename(columns={
    'Typ': 'type',
    'Vereinsname': 'name',
    'Adresse': 'address',
    'Kontakt': 'contact'
})

df_culture['ID'] = df_culture['ID'].astype(str)

# setting primary key
df_culture = df_culture.set_index('ID')

df_culture['geometry'] = df_culture.apply(lambda x: Point(x['y'],x['x']), axis=1)
gdf_culture = gpd.GeoDataFrame(df_culture, geometry='geometry')

gdf_culture = gdf_culture.drop(columns=['x', 'y'])

gdf_culture.head()





Unnamed: 0_level_0,type,name,address,contact,geometry
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
#901,asociación,"""Silberbär"" Asociación de Mayores y Jubilados ...","Muristrasse 12, Postfach, 3000 Bern 31",mayores.berna@hotmail.com,POINT (7.46057 46.94430)
#902,asociación,“Arco Iris” Asociación de Pensionistas del Ámb...,Missionsstrasse 34 – 4055 Basel,info@arcoirisbasel.ch,POINT (7.57741 47.56018)
#903,asociación,Asistencia Social en Español,"Bahnhofplatz 1, 3° Piso, 5400 Baden",e-mail: carmen.palmeiro@kathaargau.ch,POINT (8.30846 47.47610)
#904,asociación,Asociación “Arte Andaluz”,"Chemin de Maisonneuve 10, 1219 Châtelaine",e-mail : arteandaluz@bluewin.ch,POINT (6.11269 46.21207)
#905,asociación,Asociación “Esperanza”,"Komotar Cavezuelo, Brüderhofweg 15, 8050 Zürich",e-mail: esperanza@bluewin.ch,POINT (8.53915 47.39940)


Save as geojson

In [53]:
save_dir = Path('mapas_espacios_culturales_y_sociales')
output_file = save_dir / 'preprocessed_data.geojson'
gdf_culture.to_file(output_file, driver='GeoJSON', encoding='utf-8')