In [1]:
#Pandas + data
import geopandas as gpd
import pandas    as pd
import hvplot.pandas
import json

# Panel + extentions
import panel as pn
import param
import panel.widgets as pnw
import ipywidgets as ipw
import panel.widgets as pnw

# Plots
import hvplot
from bokeh.io import output_file, show, output_notebook, export_png
from bokeh.models import ColumnDataSource, GeoJSONDataSource, LinearColorMapper, ColorBar
from bokeh.plotting import figure
from bokeh.palettes import brewer
from bokeh.transform import dodge
from shapely.affinity import translate
from shapely.geometry import LineString, MultiPolygon, Polygon
import shapely
from bokeh.models.annotations import Label


# Misc
import itertools
import numpy as np
import os
from math import pi

import warnings
import sys

sys.setrecursionlimit(100000)
pn.extension()

In [2]:
geo_data = gpd.read_file('https://raw.githubusercontent.com/Denze8/Data-Visualization-Project/main/Data/final_dataset.shp')

In [3]:
for i, poly in enumerate(geo_data.geometry.unique()):
    country = geo_data.Country.unique()[i]
    if (type(poly) == Polygon):
        if (poly.bounds[0] >= (-170)):
            x =  translate(poly, xoff = -180 - 10)
        else:
            x =  translate(poly, xoff =  180 - 10)

        geo_data.loc[list(geo_data.loc[geo_data['Country'] == country].index), 'geometry'] = x
    else:
        temp = []
        for j, mpoly in enumerate(poly.geoms):
            if (mpoly.bounds[0] >= (-170)):
                new = translate(mpoly, xoff = -180 - 10)
            else:
                new = translate(mpoly, xoff =  180 - 10)
                
            temp.append(new)
        geo_data.loc[list(geo_data.loc[geo_data['Country'] == country].index), 'geometry'] = shapely.ops.unary_union(temp)

In [22]:
geo_data[a] = geo_data[a].div(geo_data.sum_deaths, axis = 0)

In [41]:
def geo_plot(gdf, year, column = None, country_select = None, title = ''):
    
    def get_geodatasource(gdf):    
        return GeoJSONDataSource(geojson = json.dumps(json.loads(gdf.to_json())))
    
    vals        = gdf[column].sum(axis = 1)
    geo         = gdf[['Country', 'geometry']].drop_duplicates(ignore_index = True)
    range_year  = range(year[0], year[1])
    gdf         = gdf.query("Year in @range_year") 
    gdf         = gdf.groupby('Country', as_index = False)[column].sum()
    gdf[column] = gdf[column]/len(range_year)
    gdf = geo.merge(gdf, left_on = 'Country', right_on = 'Country')
    print(gdf)
    countries     = list(gdf.Country)
    gdf["colors"] = ['#ff0000' if i in country_select else '#000000' for i in countries]
    gdf["size"]   = [1.5 if i in country_select else 0.5 for i in countries]
    
    geosource     = get_geodatasource(gdf)
    
    palette       = brewer['OrRd'][8]
    palette       = palette[::-1]
    
    # Instantiate LinearColorMapper that linearly maps numbers in a range, into a sequence of colors.
    color_mapper = LinearColorMapper(palette = palette, low = vals.min(), high = vals.max())
    color_bar    = ColorBar(color_mapper = color_mapper, label_standoff = 8, width = 500, height = 20, 
                         location=(0,0), orientation='horizontal')
    
    tools    = 'wheel_zoom, pan, reset, hover, box_zoom'
    TOOLTIPS = [("Country", "@Country"), ("Population", "@vals")]#, ("Deaths", "".join(['@', column]))
    
    p = figure(title = title, toolbar_location = 'right', tools = tools, tooltips = TOOLTIPS)
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    
    # Add patch renderer to figure
    p.patches('xs','ys', source = geosource, fill_alpha = 1, line_width = 'size', line_color = 'colors',  
              fill_color={'field' :column[0] , 'transform': color_mapper})
    
    # Specify figure layout.
    p.add_layout(color_bar, 'below')
    return p

In [65]:
def Merge(dict1, dict2):
    res = {**dict1, **dict2}
    return res

def col_plot(data, year, continents, country, cause):
    
    if(len(continents) < 2 | len(country) <= 6):
        data = data[['Country'] + cause].groupby(['Country']).sum().reset_index()
    else:
        data = data[['continent'] + cause].groupby(['continent']).sum().reset_index()
    data[cause] = data[cause]/int(data[cause].sum(axis = 0))
    color  = brewer['Set2'][8]
    keys   = cause
    values = list(list(data[c]) for c in cause)
    b      = list(data.iloc[:, 0])
    dict1  =  {'country': list(data.iloc[:, 0])}
    
    myDict = {k:v for (k,v) in zip(keys, values)} 
  
    a      = Merge(dict1, myDict)

    source = ColumnDataSource(a)

    p = figure(y_range = b, height = 350, title = "", toolbar_location = None, tools = "")
    c = len(keys)
    for j, i in enumerate(keys):
        p.hbar(y = dodge('country', j/4, range = p.y_range), right = i, height = 0.2, source = source, color = color[j], legend_label = keys[j])

    return p

In [6]:
def area_plot(data, cause):
    data  = pd.DataFrame(data).groupby('Year')[cause].sum().reset_index()
    tools = 'ypan'
    p     = figure(x_range = (1990, 2019), y_range = (0, 1e8), tools = tools)
    p.grid.minor_grid_line_color = '#eeeeee'
    
    p.varea_stack(stackers = cause, x = 'Year', legend_label = cause, source = data)

    p.legend.background_fill_color = "#fafafa"
    
    return p

In [43]:
def correlation_matrix_(X):
    X = np.matrix(X)
    n,d  = X.shape 
    corr = np.zeros((d,d)) 
    for i in range(d): 
        for j in range(d): 
            corr[i,j] = np.round(sum([((X[n,i]-np.mean(X[:,i])) * (X[n,j]-np.mean(X[:,j]))) for n in range(n)])/np.sqrt(sum([(X[n,i]-np.mean(X[:,i]))**2 for n in range(n)])*sum([(X[n,j]-np.mean(X[:,j]))**2 for n in range(n)])),2) # formula mentioned above
    return corr

def matrix_plot(data, cause):
    data = data.groupby('Year')[cause].sum().reset_index()
    data = data.iloc[:,1:]
    df_numerical = data.select_dtypes(exclude = "object")

    correlation_matrix = correlation_matrix_(data)
    
    #Get how many values will be in each dimension. It will be a square.
    table_one_dimension = correlation_matrix.shape[1]

    #Create an array from available correlations. Creating this from Numpy array as reshaping from Pandas is not practical.
    correlation_matrix_numpy = correlation_matrix
    correlation_array = correlation_matrix_numpy.reshape(-1)

    #Create a list that will include the colors from the values in the correlation array in the order of correlation array
    correlation_color_list = [
    "lime" if (value<=0 and value>-.25) or (value>=0 and value<.25)
    else "green" if (value<=-0.25 and value>-.50) or (value>=0.25 and value<.50)
    else "orangered" if (value<=-0.50 and value>-.75) or (value>=0.50 and value<.75)
    else "red" for value in correlation_array]


    #Create a Numpy matrix as a layer to show only the colors defined in the correlation_color_list
    color_matrix = np.array(correlation_color_list)
    color_matrix = color_matrix.reshape((table_one_dimension,table_one_dimension))
    #Flip the color matrix to start from top to bottom; not from bottom to top.
    color_matrix = np.flip(color_matrix, axis=0)

    #For aligning the position of the cell squares representing each cell on the heat-map. 0.5 shift fits perfectly.
    alignment_list_for_rows = []
    for i in range(table_one_dimension):
        alignment_list_for_rows.append(i+0.5) 

    #Create x and y ranges to be used in Bokeh plot
    x_range_ = list(df_numerical.columns)
    y_range_ = list(df_numerical.columns)

    #Reverse y range for fitting to the heat map. 
    #Otherwise first item starts from bottom, while we want it to start from top and the x range start from left.
    y_range_.reverse()

    #Define the Bokeh figure and assign ranges. Some small adjustments for x axis labels.
    p = figure(title="Karsten", x_range=x_range_, y_range=y_range_)
    p.xaxis.major_label_orientation = pi/6
    p.xaxis.major_label_text_font_size = "8pt"

    #Create the color layer for each cell one by one with two for loops.
    #One loop for rows and other for columns. Each cell has a width and height of one unit. Colors are defined according to color matrix.
    for row in range(table_one_dimension):
        for col in range(table_one_dimension):
            p.rect(x=alignment_list_for_rows[col], y=alignment_list_for_rows[row], width=1, height=1, color=color_matrix[row,col])


    #Reversing/flipping the correlation matrix to start from top to bottom rather than from bottom to top.
    #This fits the numpy matrix to the plot as we want.

    correlation_matrix_numpy = np.flip(correlation_matrix_numpy, axis=0)


    #Creating text layer via two for loops; one for each row other for each column
    #This creates a value for all cells one by one and uses the values in correlation_dataframe_numpy
    for row in range(table_one_dimension):
        for col in range(table_one_dimension):
            mytext = Label(x=col+0.3, y=row + 0.5, text=str(round(correlation_matrix_numpy[row, col], 2)), text_font_size="10px")
            p.add_layout(mytext)
    return p

In [8]:
def agg_data(data, cause):
    data_ =  data.groupby('Country', as_index = False)[cause].sum()
    dt = data_.merge(data.loc[data['Year'] == 1990][['Country', 'continent', 'geometry']], left_on = 'Country', right_on = 'Country')
    return dt



In [47]:
class App(param.Parameterized):
    
    pn.extension(sizing_mode = "stretch_width", template = "bootstrap")

    # Sets up canvas for the plots  
    map_pane   = pn.pane.Bokeh(width        = 1000, height = 400)
    bar        = pn.pane.Bokeh(width        =  600, height = 600)
    area       = pn.pane.Bokeh(width        =  600, height = 600)
    mat        = pn.pane.Bokeh(width        =  600, height = 600)
    
    # Data
    data       = geo_data.copy()
    
    causes     = ['Infectious Diseases', 'Neurological and Cognitive Disorders',
       'Nutritional and Metabolic Disorders',
       'Mental Health and Substance Abuse', 'Injuries and Accidents',
       'Maternal and Child Health', 'Non-communicable Diseases',
       'Violence and Conflict']
    
    # Widgets
    year       = param.Range(default        = (1990, 2019), bounds = (1990, 2019),    label = 'Year')
    continents = param.ListSelector(default = list(geo_data.continent.unique()),    objects = list(geo_data.continent.unique()))
    country    = param.ListSelector(default = list(geo_data.Country.unique()),      objects = list(geo_data.Country.unique()))
    cause      = param.ListSelector(default = [causes[1]],               objects = causes, label = 'Cause of Death')
    
    
    @param.depends('year', watch = True)
    def update_year(self):
        a = range(self.year[0], self.year[1])
        self.data = geo_data.query('Year in @a')
        return 
        
    @param.depends('continents', watch = True)
    def update_values(self):
        new_data = geo_data.query('continent in @self.continents')
        self.param['country'].objects = list(new_data.Country.unique())
        return
    
    @param.depends('country', watch = True)
    def update_countries(self):
        self.data = geo_data.query('Country in @self.country')
        return 
    
    @param.depends('year', 'continents', 'country', 'cause', watch = True)
    def plot(self):
        # Uses function geo_plot to plot the choropleth map
        self.map_pane.object = geo_plot(geo_data.copy(), self.year, self.cause, self.country)
        
        # Uses function col_plot to plot the vertical column plot
        self.bar.object      = col_plot(self.data, self.year, self.continents, self.country, self.cause)

        # Uses function area_plot() to plot the stacked area chart
        self.area.object     = area_plot(self.data, self.cause)
        
        # Uses function matrix_plot() to plot the correlation
        self.mat.object      = matrix_plot(self.data, self.cause)
        return 

In [66]:
test = App()
test.plot()

pn.Column(pn.Row(pn.Column(
    test.param.year,
         pn.Row(test.param.continents, test.param.country),
                test.param.cause),     test.map_pane), 
          pn.Row(test.bar,   test.area), 
          pn.Row(test.mat)
         ).servable()

         Country                                           geometry  \
0    Afghanistan  POLYGON ((-123.48139 37.36278, -122.92422 37.3...   
1        Albania  POLYGON ((-168.97996 40.84273, -169.00001 40.5...   
2        Algeria  POLYGON ((-198.68440 27.39574, -198.66512 27.5...   
3         Angola  MULTIPOLYGON (((-177.26483 -5.96568, -176.9751...   
4      Argentina  MULTIPOLYGON (((-258.25000 -53.10000, -257.750...   
..           ...                                                ...   
163    Venezuela  POLYGON ((-250.73357 5.20028, -250.60118 4.918...   
164      Vietnam  POLYGON ((-85.66567 10.48654, -84.80009 10.889...   
165        Yemen  POLYGON ((-137.99999 19.00000, -137.21782 17.3...   
166       Zambia  POLYGON ((-159.25999 -8.34001, -158.84225 -8.5...   
167     Zimbabwe  POLYGON ((-158.80859 -22.25151, -159.34013 -22...   

     Neurological and Cognitive Disorders  
0                                0.009325  
1                                0.039069  
2              

In [11]:

y = [1990, 1991]

a =  ['Infectious Diseases', 'Neurological and Cognitive Disorders',
       'Nutritional and Metabolic Disorders',
       'Mental Health and Substance Abuse', 'Injuries and Accidents',
       'Maternal and Child Health', 'Non-communicable Diseases',
       'Violence and Conflict'] 
       
custom_style = {
    'background': '#f9f9f9',
    'border': '1px solid black',
    'padding': '100px',
    'box-shadow': '5px 5px 5px #bcbcbc'
}

pn.widgets.FloatSlider(name='Number', styles=custom_style)


In [12]:
keys  = ['Infectious Diseases']

data = geo_data[['Country'] + keys].groupby(['Country']).sum().reset_index()
data = data.iloc[0:5,]

In [13]:
from bokeh.plotting import figure, show

countries = list(data['Country'])
count1 = list(data['Infectious Diseases'])
count2 = list(data['Neurological and Cognitive Disorders'])
a = ['Infectious Diseases', 'Neurological and Cognitive Disorders']

dt = {'C' : countries,
      'Infectious Diseases' : count1,
      'Neurological and Cognitive Disorders' : count2
}

p = figure(x_range = (0, max(max(count1), max(count2))), y_range = countries, height=350, title="Fruit Counts",  
           toolbar_location = None, tools = "")

p.hbar_stack(a , y = 'C', width = 0.9, source = ColumnDataSource(dt))


show(p)

KeyError: 'Neurological and Cognitive Disorders'

In [None]:
list(data.iloc[:, 0])
a = geo_data.loc[geo_data['Country'] == 'Denmark'].groupby('Year')[['Infectious Diseases', 'Neurological and Cognitive Disorders',
       'Nutritional and Metabolic Disorders',
       'Mental Health and Substance Abuse', 'Injuries and Accidents',
       'Maternal and Child Health', 'Non-communicable Diseases',
       'Violence and Conflict'] ].sum().reset_index()

np.matrix(a.iloc[ : , 1 : ])

In [None]:
values = [list(data[k]) for k in keys]
 
# but this line shows dict comprehension here  
myDict = {k:v for (k,v) in zip(keys, values)} 


     
dict1 = {'Country': list(data['Country'])}
a = Merge(dict1, myDict)

In [None]:
values
#list(data[keys])
dict1

In [None]:
a =  ['Infectious Diseases', 'Neurological and Cognitive Disorders',
       'Nutritional and Metabolic Disorders',
       'Mental Health and Substance Abuse', 'Injuries and Accidents',
       'Maternal and Child Health', 'Non-communicable Diseases',
       'Violence and Conflict'] 
geo_data.groupby('Country', as_index = True).agg({'Infectious Diseases' : sum, 'Neurological and Cognitive Disorders' : sum,
       'Nutritional and Metabolic Disorders' : sum,
       'Mental Health and Substance Abuse' : sum, 'Injuries and Accidents' : sum,
       'Maternal and Child Health' : sum, 'Non-communicable Diseases' : sum,
       'Violence and Conflict' : sum})

In [None]:
geo_data.sum_deaths

In [None]:
all(geo_data[a].sum(axis = 1) == geo_data.sum_deaths)

In [None]:
geo_data[a].sum(axis = 1)

In [None]:
year = range(1990, 1995)
geo_data.query('Year in @year')

In [None]:
geo_data.groupby('Country', as_index = False)[a].sum().merge(geo_data.geometry.drop_duplitcates(),left_on = 'Country', right_on = 'Country')

In [51]:
dt = geo_data[['Country'] + a].groupby('Country').sum().reset_index()

dt.iloc[:,1:].sum(axis = 1).div(dt.iloc[:,1], axis = 0)

0       4.164975
1      17.090684
2      16.969225
3       2.025050
4      10.651192
         ...    
163    12.845464
164     8.163210
165     4.330173
166     1.655794
167     1.638801
Length: 168, dtype: float64

In [50]:
geo_data[a[1]].div(geo_data.sum_deaths, axis = 0)

0       7.000701e-08
1       6.316704e-08
2       5.540852e-08
3       4.881688e-08
4       4.358724e-08
            ...     
5035    6.002801e-08
5036    6.289250e-08
5037    6.580930e-08
5038    7.029227e-08
5039    7.172330e-08
Length: 5040, dtype: float64