In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import geoplot
import mapclassify
from IPython.display import HTML
from collections import Counter

### Load Geo Shape Data

In [2]:
df_municipalities = gpd.read_file('shape/municipalities_2018.geojson')

In [3]:
# Get shape coordinates for drawing labels
df_municipalities['coords'] = df_municipalities['geometry'].apply(lambda x: x.representative_point().coords[:])
df_municipalities['coords'] = [coords[0] for coords in df_municipalities['coords']]

# Fix some municipalities (faulty names), add a new column with fixed names so they work with census municipalities.

CONVERT = {
    # Municipality numbers -> fixed names
    '821':'Bø (Telemark)',
    '236':'Nes (Akershus)',
    '616':'Nes (Buskerud)',
    '137':'Våler (Østfold)',
    '426':'Våler (Innlandet)',
    '1867':'Bø (Nordland)',
    '441':'Os (Innlandet)',
    '1243':'Os (Hordaland)',
    '713':'Sande (Vestfold)',
    '1514':'Sande (Møre og Romsdal)',
    
    # Municipality names -> fixed names
    'Guovdageaidnu - Kautokeino':'Kautokeino - Guovdageainnu suohkan',
    'Kárásjohka - Karasjok':'Karasjok - Kárásjoga gielda',
    'Unjárga - Nesseby':'Nesseby - Unjárgga gielda',
    'Deatnu Tana':'Tana - Deanu gielda',
    'Porsanger - Porsángu - Porsanki':'Porsanger - Porsánjggu gielda - Porsangin komuuni',
    'Snåase - Snåsa':'Snåsa - Snåasen tjielte',
    'Fauske - Fuossko':'Fauske',
    'Loabák - Lavangen':'Lavangen',
    'Hamarøy - Hábmer':'Hamarøy',
    'Divtasvuodna - Tysfjord':'Hamarøy',
    'Storfjord - Omasvuotna - Omasvuo':'Lyngen',
    'Gáivuotna - Kåfjord - Kaivuono':'Lyngen',
    'Snillfjord':'Hitra',
    'Båtsfjord':'Vardø',
    'Fedje':'Austrheim',
    'Utsira':'Karmøy',
    'Rælingen':'Fet',
    'Sula':'Ålesund',
    'Midsund':'Aukra',
}

municipalityNamesUpdated = []
for n, m in zip(df_municipalities.loc[:,'Kommunenum'], df_municipalities.loc[:,'Kommunenav']):
    if str(n) in CONVERT:
        m = CONVERT[str(n)]
    elif str(m) in CONVERT:
        m = CONVERT[str(m)]
    municipalityNamesUpdated.append(m)

df_municipalities['Municipality'] = municipalityNamesUpdated

In [4]:
MUNICIPALITIES = set(df_municipalities.Municipality)
MUNICIPALITIES_IDX = { m:i for i, m in enumerate(df_municipalities.Municipality) }

### Methods for Visualizing

In [5]:
def visualize(data, values, title='', cat=False, figPath=None, dontShow=False):
    plt.rcParams['figure.figsize'] = (95, 30) # Fix figure size!
    
    if dontShow:
        plt.ioff()
    
    ax = data.plot(
        column=values, 
        cmap='Reds', 
        legend=True, 
        vmin=0.0, 
        vmax=(np.ceil(values[values.nonzero()].mean()) if (cat == False) else len(set(values))), 
        categorical=cat
    )
    
    # Draw municipality labels
    #for _, row in data.iterrows():
        #ax.annotate(s=row['Kommunenav'].upper(), xy=row['coords'], horizontalalignment='center', verticalalignment='center', color='green', alpha=0.95)
        
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False) 
    ax.set_title(title)
    
    if figPath:
        plt.savefig('img/{}.png'.format(figPath), transparent=False, bbox_inches='tight')
        
    if dontShow:
        plt.close()


### Visualizing births from 1800 to 1900

In [6]:
df_births = pd.read_csv('data/census_births.csv', encoding='utf-8', names=['Year', 'Municipality', 'Births'])
df_births.drop([i for i in range(len(df_births)) if str(df_births.loc[i,'Municipality']).title() not in MUNICIPALITIES], inplace=True) # Remove invalid municips.
df_births.head()

Unnamed: 0,Year,Municipality,Births
2,1661,Larvik,1
3,1661,Vestby,1
4,1662,Skien,1
5,1662,Vik,1
6,1663,Malvik,1


In [7]:
def animatePopulationOverTime(df, year_start, year_end, display=False):
    years_of_interest = sorted([int(year) for year in set(df.Year) if year is not '?' and int(year) >= year_start and int(year) <= year_end])
    data = np.zeros((len(years_of_interest), len(df_municipalities)), dtype=np.uint64) # A matrix containing the births for the given year (row) in the respective municipalities (column)
        
    for i, year in enumerate(years_of_interest):
        records = df.loc[df.Year == str(year)]
        for m, c in zip(records.Municipality, records.Births):
            data[i,MUNICIPALITIES_IDX[m]] += c
    
    for i, population in enumerate(data.cumsum(axis=0)):
        year = years_of_interest[i]
        visualize(df_municipalities, population, '{}'.format(year), False, 'birth_{}'.format(year), not display)
        print("Visualized", year, '!')

#animatePopulationOverTime(df_births, 1800, 1900)

In [55]:
HTML('<img src="img/1800_1900_anim.gif">')

### Visualizing 1910 Population Density

In [8]:
df_pop_1910 = pd.read_csv('data/census_population.csv', encoding='utf-8', names=['Municipality', 'Population'])
df_pop_1910.head()

Unnamed: 0,Municipality,Population
0,Agdenes,4422
1,Alstahaug,5205
2,Alta,5236
3,Alvdal,4519
4,Andøy,4726


In [18]:
pop = np.zeros(len(df_municipalities), dtype=np.uint64)
for m, c in zip(df_pop_1910.Municipality, df_pop_1910.Population):
    if m not in MUNICIPALITIES:
        print(m)
    else:
        pop[MUNICIPALITIES_IDX[m]] = c

pop
#visualize(df_municipalities, population, '1910 Population Density')

Andebu
Hof
Lardal
Leksvik
Nøtterøy
Rissa
Stokke
Tjøme
Ulvik herad


array([  3472,   1596,   7751,   2555,  22970,   8883,   7305,   5169,
         2902,      0,   4449,   1833,   2469,   4303,   1458,   3844,
        17468,   5626,   4726,   7725,   1550,   2984,   2703,  15350,
         2385,      0,   3433,      0,   5551,  15175,      0,  15032,
         5299,   8219,   6568,   2632,   5854,  24033,   5100,    994,
        12191,      0,   5761,   1885,   3696,   2471,   2566,      0,
        16732,   4377,   3820,   3848,   1419,   2372,   1683,  19119,
         4524,  11053,   3816,  14330,   5270,  12342,   2313,   3842,
         3286,    841,   5015,   4509,   4240,   2142,   3498,   2955,
            0,   1298,      0,   6916,   9004,   2481,   1772,   5835,
         1997,   2422,   6215,   4422,   3969,   3776,   5654,    955,
         3118,   3889,  15455,   3704,      0,      0,   2692,  21987,
         4951,      0,   1137,      0,   1512,   5448,    836,      0,
         7955,   2314,   2951,   1934,   2775,   3231,   3027,   3636,
      

In [15]:
list(df_municipalities.Municipality)

['Vadsø',
 'Osen',
 'Bremanger',
 'Austevoll',
 'Kristiansand',
 'Tvedestrand',
 'Risør',
 'Steigen',
 'Sørfold',
 'Vardø',
 'Vardø',
 'Lebesby',
 'Nordkapp',
 'Hammerfest',
 'Loppa',
 'Karlsøy',
 'Tromsø',
 'Lenvik',
 'Andøy',
 'Vågan',
 'Værøy',
 'Rødøy',
 'Bindal',
 'Kristiansund',
 'Eide',
 'Aukra',
 'Selje',
 'Austrheim',
 'Bømlo',
 'Haugesund',
 'Karmøy',
 'Karmøy',
 'Hå',
 'Eigersund',
 'Flekkefjord',
 'Lindesnes',
 'Lillesand',
 'Larvik',
 'Nesna',
 'Øygarden',
 'Osterøy',
 'Fet',
 'Løten',
 'Fyresdal',
 'Vestby',
 'Flå',
 'Svelvik',
 'Siljan',
 'Tønsberg',
 'Hurum',
 'Naustdal',
 'Ulstein',
 'Tjeldsund',
 'Gol',
 'Gjerdrum',
 'Halden',
 'Radøy',
 'Lindås',
 'Tolga',
 'Gjøvik',
 'Gausdal',
 'Kongsberg',
 'Flesberg',
 'Åmot',
 'Enebakk',
 'Tydal',
 'Aure',
 'Ringebu',
 'Stor-Elvdal',
 'Måsøy',
 'Ål',
 'Nore og Uvdal',
 'Ulvik',
 'Lørenskog',
 'Folldal',
 'Bamble',
 'Hadsel',
 'Norddal',
 'Dyrøy',
 'Ibestad',
 'Beiarn',
 'Lurøy',
 'Verdal',
 'Agdenes',
 'Vestnes',
 'Meløy',
 'Ind