In [1]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from enum import Enum
import numpy as np
import json
import folium
from branca.colormap import LinearColormap, StepColormap
from matplotlib.legend import Legend
import itertools
from plotly.offline import init_notebook_mode, iplot
from IPython.display import display, HTML
import plotly.graph_objs as go

init_notebook_mode(connected=True)

In [2]:
DALY = pd.read_csv("./data/Last_data/" + "DALYs (Disability-Adjusted Life Years).csv", index_col=0, low_memory=False).dropna()
DALY.age = DALY.age.replace({'5 to 9':'05 to 09', '1 to 4':'01 to 04'})

prevalence = pd.read_csv("./data/Last_data/Prevalence.csv", index_col=0, low_memory=False).dropna()
prevalence.year = prevalence.year.astype(int)
prevalence_number = prevalence[prevalence.metric == 'Number']
prevalence_rate = prevalence[prevalence.metric == 'Rate']

level_1 = ['Mental disorders']
depression = ['Major depressive disorder', 'Dysthymia']
eating = ['Anorexia nervosa', 'Bulimia nervosa']
level_2 = [i for i in DALY.cause.unique() if (i not in level_1) and (i not in depression) and (i not in eating) ]

# Categorize the countries
glob = ['Global']
continents = ['Asia', 'Europe', 'Oceania', 'America', 'Africa']
countries = [ i for i in sorted(DALY.location.unique()) if ((not i in continents) and (i != 'Global'))]

population = pd.read_csv("./data/population_1990_2017.csv")


In [4]:
prevalence_gender = pd.read_csv("./prevalence_gender.csv")
prevalence_gender.head()

Unnamed: 0,cause,Male,Female
0,Anxiety disorders,21.207867,33.12079
1,Depressive disorders,20.653087,29.94415
2,Other mental disorders,17.831006,11.241406
3,Idiopathic developmental intellectual disability,10.859483,8.613581
4,Attention-deficit/hyperactivity disorder,10.371021,4.013573


#### By Gender Population Share

In [5]:
df = prevalence_number
locations = continents

years = list(df.year.unique())
years_str = results = list(map(str, years))

continents = locations

def make_figure(title, x_axis=None, y_axis=None, sliders=True, playButtons = True):
    # make figure
    figure = {
        'data': [],
        'layout': {},
        'frames': []
    }

    # fill in most of layout
    if (x_axis is not None) and (y_axis is not None):
        figure['layout']['xaxis'] = x_axis
        figure['layout']['yaxis'] = y_axis
    figure['layout']['hovermode'] = 'closest'
    if sliders:
        figure['layout']['sliders'] = {
            'args': [
                'transition', {
                    'duration': 400,
                    'easing': 'cubic-in-out'
                }
            ],
            'initialValue': str(years[0]),
            'plotlycommand': 'animate',
            'values': years_str,
            'visible': True
        }
    
    if playButtons:
        figure['layout']['updatemenus'] = [
            {
                'buttons': [
                    {
                        'args': [None, {'frame': {'duration': 500, 'redraw': False},
                                 'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                        'label': 'Play',
                        'method': 'animate'
                    },
                    {
                        'args': [[None], {'frame': {'duration': 0, 'redraw': False}, 'mode': 'immediate',
                        'transition': {'duration': 0}}],
                        'label': 'Pause',
                        'method': 'animate'
                    }
                ],
                'direction': 'left',
                'pad': {'r': 10, 't': 87},
                'showactive': False,
                'type': 'buttons',
                'x': 0.1,
                'xanchor': 'right',
                'y': 0,
                'yanchor': 'top'
            }
        ]
    figure['layout']['title'] = title
    figure['layout']['font'] = dict(color='#ffffff')
    
    return figure

def make_figure_black(title, x_axis=None, y_axis=None, sliders=True, playButtons = True):
    figure = make_figure(title, x_axis, y_axis, sliders, playButtons)
    figure['layout']['font'] = dict(color='#000000')
    
    return figure

def make_sliders(prefix = 'Year: '):
    sliders_dict = {
        'active': 0,
        'yanchor': 'top',
        'xanchor': 'left',
        'currentvalue': {
            'font': {'size': 15},
            'prefix': prefix,
            'visible': True,
            'xanchor': 'right'
        },
        'transition': {'duration': 300, 'easing': 'cubic-in-out'},
        'pad': {'b': 10, 't': 50},
        'len': 0.9,
        'x': 0.1,
        'y': 0,
        'steps': []
    }
    return sliders_dict

In [10]:
def get_gender_share_per_year(start_year, end_year, df, locations):
    years = np.linspace(start_year, end_year, end_year - start_year + 1, dtype=np.int32)

    x = dict.fromkeys(locations)
    y = dict.fromkeys(locations)
    size = dict.fromkeys(locations)

    for year in years:
        df_year = df[(df.cause.isin(level_2)) &
                                (df.year == year) &
                                (df.age == 'All Ages') &
                                (df.location.isin(locations))]

        pop = population[(population.location_name.isin(locations)) & 
                               (population.year_id == year) &
                               (population.age_group_name == 'All Ages')]

        df_year_group = df_year.groupby(['location', 'sex'])['val'].sum().reset_index()
        pop = pop.groupby(['location_name', 'sex_name'])['val'].sum().reset_index()
        # Join the prevalency data with population data so we can get the prevalency as a percentage of population
        perc = pd.merge(df_year_group, pop, how='inner', left_on=['location', 'sex'], right_on=['location_name', 'sex_name'])
        # Create new column as prevalency percentage of population
        perc['prevalency_percentage'] = perc['val_x'] / perc['val_y'] * 100
        perc_male = perc[perc.sex == 'Male'][['location', 'prevalency_percentage']].reset_index(drop = True)
        perc_female = perc[perc.sex == 'Female'][['location', 'prevalency_percentage']].reset_index(drop = True)
        pop_per_loc = perc[perc.sex == 'Both'][['location', 'val_x']]
        
        for loc in locations:
            x[loc] = []
            y[loc] = []
            size[loc] = []
        
        for loc in locations:
#             if year != end_year and year == start_year:
#                 x[loc] = list(round(elem, 2) for elem in list(perc_female[perc_female.location == loc].prevalency_percentage))
#                 y[loc] = list(round(elem, 2) for elem in list(perc_male[perc_male.location == loc].prevalency_percentage))
#                 size[loc] = list(pop_per_loc[pop_per_loc.location == loc].val_x / 1000000)
#             else:
            x[loc].extend(list(round(elem, 2) for elem in list(perc_female[perc_female.location == loc].prevalency_percentage)))
            y[loc].extend(list(round(elem, 2) for elem in list(perc_male[perc_male.location == loc].prevalency_percentage)))
#                 if year == end_year:
            size[loc].extend(list(pop_per_loc[pop_per_loc.location == loc].val_x / 100000))
#                 else:
#                     size[loc].extend(list(pop_per_loc[pop_per_loc.location == loc].val_x / 1000000))   
    
    data_dicts = []
    for loc in locations:
        # Make dict
        data_dict = {
            'x': x[loc],
            'y': y[loc],
            'mode': 'lines+markers',
            'text': '(Female(%), Male(%)',
            'marker': {
                'sizemode': 'area',
                'sizeref': 2,
                'size': size[loc]
            },
            'line': {
                'width': 5
            },
            'name': loc
        }
        data_dicts.append(data_dict)
        
    return data_dicts

In [11]:
def make_plot(sliders_dict, figure, function, years, df, locations):
    for year in years:
        data_dict = function(years[0], year, df, locations)
#         print(data_dict)
        # Only append first year
        if year == years[0]:
            figure['data'] = data_dict

        # Make frames
        frame = {'data': [], 'name': str(year)}
        frame['data'] = data_dict

        figure['frames'].append(frame)

        # Slider step
        slider_step = {'args': [
            [str(year)],
            {'frame': {'duration': 300, 'redraw': False},
             'mode': 'immediate',
           'transition': {'duration': 300}}
         ],
         'label': str(year),
         'method': 'animate'}
        sliders_dict['steps'].append(slider_step)

    figure['layout']['sliders'] = [sliders_dict]
    figure['layout']['paper_bgcolor']='rgba(0,0,0,0)'
    figure['layout']['plot_bgcolor']='rgba(0,0,0,0)'
    

    return figure

In [12]:
sliders = make_sliders()
figure = make_figure_black('Share of gender of mental illness per continent(%)', 
                                                    {'title': 'Share of Female(%)', 'range': [10, 18]},
                                                    {'title': 'Share of Male(%)', 'range': [10, 15]})

In [13]:
fig = make_plot(sliders, figure, get_gender_share_per_year, years, df, locations)

In [None]:
iplot(fig, validate=True)

#### Bar plot

In [19]:
def per_gender_data(current_year, df, location, causes=level_2):
    dff = df[(df.year == current_year) & (df.age == 'All Ages') & (df.location == location) & (df.cause.isin(causes))]
    df_g = dff.groupby(['cause', 'location', 'sex'])['val'].sum().unstack().reset_index()
    df_g['Female_perc'] = df_g.Female / df_g.Both * 100
    df_g['Male_perc'] = df_g.Male / df_g.Both * 100

    df_sex = df_g.drop(columns=['Both', 'Female', 'Male'])
    data_dicts = []
    for sex in ['Male', 'Female']:
        data_dict = {
            'type' : 'bar',
            'x': df_sex.cause.unique().tolist(),
            'y': [round(elem, 2) for elem in list(df_sex[sex + '_perc'])],
            'name' : sex
        }
        data_dicts.append(data_dict)
    
    return data_dicts

def bar_plot_layout(title, xaxis, yaxis, slider_dict, barmode = 'group'):    
    layout = go.Layout(
        title= title,
        barmode= barmode,
        paper_bgcolor='rgba(0, 0, 0, 0)',
        plot_bgcolor='rgba(0, 0, 0, 0)',
        xaxis = xaxis,
        yaxis = yaxis,
        hovermode = 'closest',
        sliders = [slider_dict]
    )
    return layout

def get_gender_sliders():
    sliders_dict = {
        'active': 0,
        'visible' : True,
        'yanchor': 'top',
        'xanchor': 'left',
        'currentvalue': {
            'font': {'size': 20},
            'prefix': 'Year:',
            'visible': True,
            'xanchor': 'right'
        },
        'transition': {'duration': 300, 'easing': 'cubic-in-out'},
        'pad': {'b': 10, 't': 50},
        'len': 0.9,
        'x': 0.1,
        'y': 0,
        'steps': []
    }
    return sliders_dict

def per_gender_frames(start_year, end_year, df, location, causes, slider_dict):
    years = np.linspace(start_year, end_year, end_year - start_year + 1, dtype=np.int32)
    frames = []
    for year in years:
        frame = {'data': [], 'name': str(year)}
        data_dict = per_gender_data(year, df, location, causes)            
        frame['data'].extend(data_dict)
        frames.append(frame)
        
        slider_step = {'args': [
            [str(year)],
            {'frame': {
#                 'duration': 300, 
                'redraw': False},
#              'mode': 'immediate',
#            'transition': {'duration': 300}
#             }
         ],
         'label': str(year),
         'method': 'animate'
        }
        
        slider_dict['steps'].append(slider_step)
        
    
    return slider_dict, frames

In [18]:
df = prevalence.copy()
g_years = df.year.unique().tolist()
locations = continents.copy()
data = per_gender_data(g_years[0], df, locations[0], level_2)
slider_dict = get_gender_sliders()
slider_dict, frames = per_gender_frames(1990, 2017, df, locations[0], level_2, slider_dict)
layout = bar_plot_layout(title= "By Gender", xaxis=dict(title="Mental Illness"), yaxis=dict(title="Share(%)"), 
                         slider_dict=slider_dict)

In [None]:
fig = go.Figure(data=data, layout=layout, frames=frames)
years_str = list(map(str, g_years))

# fig['layout']['sliders'] = {
#             'args': [
#                 'transition', {
#                     'duration': 400,
#                     'easing': 'cubic-in-out'
#                 }
#             ],
#             'initialValue': years_str[0],
#             'plotlycommand': 'animate',
#             'values': years_str,
#             'visible': True
#         }

In [None]:
# We are interested in the most recent data, so we filter by 2017, and aggregate all other columns except location
filtered_data = prevalence_number[(prevalence_number.location.isin(continents)) & 
                                   (prevalence_number.cause.isin(level_1)) &
                                   (prevalence_number.sex == 'Both') &
                                   (prevalence_number.year == 2017) &
                                   (prevalence_number.age == 'All Ages')]

# First filter the population data to have only the information we need.
pop_filtered = population[(population.location_name.isin(continents)) & 
                           (population.sex_name == 'Both') &
                           (population.year_id == 2017) &
                           (population.age_group_name == 'All Ages')]


# Join the prevalency data with population data so we can get the prevalency as a percentage of population
filter_data_perc = pd.merge(filtered_data, pop_filtered, how='inner', left_on='location', right_on='location_name')

# Create new column as prevalency percentage of population
filter_data_perc['prevalency_percentage'] = filter_data_perc['val_x'] / filter_data_perc['val_y'] * 100