In [8]:
import pandas as pd
import os 
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sns
# For bokeh visualisations
from bokeh.plotting import figure
from bokeh.models.widgets import Panel, Tabs
from bokeh.io import show, output_notebook, reset_output
from bokeh.models import ColumnDataSource, HoverTool, Legend
from bokeh.layouts import row

In [2]:
# load data
df_tree = pd.read_csv('data/df_tree.csv')
df_income = pd.read_csv('data/df_income.csv')
df_education = pd.read_csv('data/df_education.csv')

In [3]:
# Get average number of years in education per district
grundskole = 10
erhvervsfaglig = grundskole + 2
gymnasiel = grundskole + 3
lang_videregående = gymnasiel + (5+8)/2
mellem_videregående = gymnasiel + 3

# calculate average number of study year per district
df_education['total_study_year'] = (df_education['Erhvervsfaglige og korte videregående uddannelser']*erhvervsfaglig
+ df_education['Grundskole og uoplyst mv.']*grundskole
+ df_education['Gymnasiale uddannelser og adgangsgivende uddannelsesforløb']*gymnasiel
+ df_education['Lange videregående uddannelser og Ph.d og forskeruddannelser']*lang_videregående
+ df_education['Mellemlange videregående uddannelser og bacheloruddannelser']*mellem_videregående)
df_education['total_educated'] = df_education[['Erhvervsfaglige og korte videregående uddannelser','Grundskole og uoplyst mv.','Gymnasiale uddannelser og adgangsgivende uddannelsesforløb','Lange videregående uddannelser og Ph.d og forskeruddannelser','Mellemlange videregående uddannelser og bacheloruddannelser']].sum(axis=1)
df_education['avg_study_year'] = df_education.total_study_year/df_education.total_educated

In [4]:
# Explore demo data 
df_demo = pd.merge(df_income,df_education)
df_demo = df_demo[['year','district','Gennemsnit for alle personer med indkomsten (kr.)','avg_study_year']]
df_demo.columns = ['year','district','avg_income','avg_study_year']
df_demo.head()

Unnamed: 0,year,district,avg_income,avg_study_year
0,2008,Amager Vest,209245.0,13.152245
1,2008,Amager Øst,202575.0,13.077911
2,2008,Bispebjerg,190239.0,12.846948
3,2008,Brønshøj-Husum,203281.0,12.729431
4,2008,Indre By,258185.0,14.252449


In [11]:
df_districts = df_demo.groupby(["year", "district"]).size().reset_index()
districts = df_districts.district.unique()

table_income = pd.pivot_table(df_demo, values='avg_income', index=["year"],columns=['district'])
table_study = pd.pivot_table(df_demo, values='avg_study_year', index=["year"],columns=['district'])

reset_output() # If you are having issues with visualizing Bokeh plots in the notebook try to uncomment this line
# prepare data for bokeh plot
#Create empty figures

p1 = figure(plot_width = 900, plot_height = 600, title = "Avg_income", 
            x_axis_label = "Year", y_axis_label = "Avg_income") 

p2 = figure(plot_width = 900, plot_height = 600, title = "Avg_study_year", 
            x_axis_label = "Year", y_axis_label = "Avg_study_year") 

#colors
colors = ["red", "blue", "green", "cyan", "black", "darkblue", "magenta", "yellow", "orange", "grey"]
#Create empty list for custom legend
items1 = []
items2 = []


#Add line by using p.line
line ={}
line2= {}
# Pivot table for number of unique species in each district over the years 
#df_species_year = pd.pivot_table(df5, values='species', index='year', columns=['district'], aggfunc='nunique', fill_value=0)

# Insert fraction values in bars
for indx, i in enumerate(districts):
    line[indx] = p1.line(x=table_income.index, y = table_income[i], width=2.0, color = colors[indx], muted =True)
    line2[indx] = p2.line(x=table_study.index, y = table_study[i], width=2.0, color = colors[indx], muted =True)

    #Append items to create the legend
    items1.append((districts[indx], [line[indx]]))
    items2.append((districts[indx], [line2[indx]]))


# plot 1
#This is to manage the legend in plot
#add the legend outside the plot
legend = Legend(items=items1, location="top_left")
p1.add_layout(legend, 'left')

p1.legend.click_policy = "mute" #you can also try "hide"
p1.title.text_font_size = "20px"
# plot 2
legend = Legend(items=items2, location="top_left")
p2.add_layout(legend, 'left')

p2.legend.click_policy = "mute" #you can also try "hide"
p2.title.text_font_size = "20px"


# Visualize all 10 district and the count of their top 10 species
tab1 = Panel(child=p1, title="Income")
tab2 = Panel(child=p2, title="Study year")
tabs = Tabs(tabs=[ tab1, tab2])
output_notebook()

show(tabs)


#show(tabs)