In [430]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
from enum import Enum
import numpy as np
import json
import folium
from branca.colormap import LinearColormap, StepColormap
from matplotlib.legend import Legend
import itertools
from plotly.offline import init_notebook_mode, iplot, plot
from IPython.display import display, HTML
import plotly.graph_objs as go
from plotly import tools

init_notebook_mode(connected=True)

In [2]:
DALY = pd.read_csv("./data/Last_data/" + "DALYs (Disability-Adjusted Life Years).csv", index_col=0, low_memory=False).dropna()
DALY.age = DALY.age.replace({'5 to 9':'05 to 09', '1 to 4':'01 to 04'})

prevalence = pd.read_csv("./data/Last_data/Prevalence.csv", index_col=0, low_memory=False).dropna()
prevalence.year = prevalence.year.astype(int)
prevalence_number = prevalence[prevalence.metric == 'Number']
prevalence_rate = prevalence[prevalence.metric == 'Rate']

level_1 = ['Mental disorders']
depression = ['Major depressive disorder', 'Dysthymia']
eating = ['Anorexia nervosa', 'Bulimia nervosa']
level_2 = [i for i in DALY.cause.unique() if (i not in level_1) and (i not in depression) and (i not in eating) ]

population = pd.read_csv("./data/population_1990_2017.csv")


In [433]:

# Categorize the countries
glob = ['Global']
continents = ['Asia', 'Europe', 'Oceania', 'America', 'Africa']
countries = [ i for i in sorted(DALY.location.unique()) if ((not i in continents) and (i != 'Global'))]


In [434]:
def save_html(html_str, title):
    Html_file = open("plotly/" + title + ".html","w")
    Html_file.write(html_str)
    Html_file.close()

In [4]:
prevalence_gender = pd.read_csv("./prevalence_gender.csv")
prevalence_gender.head()

Unnamed: 0,cause,Male,Female
0,Anxiety disorders,21.207867,33.12079
1,Depressive disorders,20.653087,29.94415
2,Other mental disorders,17.831006,11.241406
3,Idiopathic developmental intellectual disability,10.859483,8.613581
4,Attention-deficit/hyperactivity disorder,10.371021,4.013573


#### By Gender Population Share

In [502]:
df = prevalence_number
locations = continents

years = sorted(list(df.year.unique()))
years_str = results = list(map(str, years))

continents = locations

def make_figure(title, x_axis=None, y_axis=None, sliders=True, playButtons = True, values=years_str):
    # make figure
    figure = {
        'data': [],
        'layout': {},
        'frames': []
    }

    # fill in most of layout
    if (x_axis is not None) and (y_axis is not None):
        figure['layout']['xaxis'] = x_axis
        figure['layout']['yaxis'] = y_axis
    figure['layout']['hovermode'] = 'closest'
    if sliders:
        figure['layout']['sliders'] = {
            'args': [
                'transition', {
                    'duration': 400,
                    'easing': 'cubic-in-out'
                }
            ],
            'initialValue': str(values[0]),
            'plotlycommand': 'animate',
            'values': values,
            'visible': True
        }
    
    if playButtons:
        figure['layout']['updatemenus'] = [
            {
                'buttons': [
                    {
                        'args': [None, {'frame': {'duration': 500, 'redraw': True},
                                 'fromcurrent': True, 'transition': {'duration': 300, 'easing': 'quadratic-in-out'}}],
                        'label': 'Play',
                        'method': 'animate'
                    },
                    {
                        'args': [[None], {'frame': {'duration': 0, 'redraw': True}, 'mode': 'immediate',
                        'transition': {'duration': 0}}],
                        'label': 'Pause',
                        'method': 'animate'
                    }
                ],
                'direction': 'left',
                'pad': {'r': 10, 't': 87},
                'showactive': False,
                'type': 'buttons',
                'x': 0.1,
                'xanchor': 'right',
                'y': 0,
                'yanchor': 'top'
            }
        ]
    figure['layout']['title'] = title
    figure['layout']['font'] = dict(color='#ffffff')
    
    return figure

def make_figure_black(title, x_axis=None, y_axis=None, sliders=True, playButtons = True, values=years_str):
    figure = make_figure(title, x_axis, y_axis, sliders, playButtons, values)
    figure['layout']['font'] = dict(color='#000000')
    
    return figure

def make_sliders(prefix = 'Year: '):
    sliders_dict = {
        'active': 0,
        'yanchor': 'top',
        'xanchor': 'left',
        'currentvalue': {
            'font': {'size': 15},
            'prefix': prefix,
            'visible': True,
            'xanchor': 'right'
        },
        'transition': {'duration': 300, 'easing': 'cubic-in-out'},
        'pad': {'b': 10, 't': 50},
        'len': 0.9,
        'x': 0.1,
        'y': 0,
        'steps': []
    }
    return sliders_dict

In [503]:
def get_gender_share_per_year(start_year, end_year, df, locations):
    years = np.linspace(start_year, end_year, end_year - start_year + 1, dtype=np.int32)

    x = dict.fromkeys(locations)
    y = dict.fromkeys(locations)
    size = dict.fromkeys(locations)

    for year in years:
        df_year = df[(df.cause.isin(level_2)) &
                                (df.year == year) &
                                (df.age == 'All Ages') &
                                (df.location.isin(locations))]

        pop = population[(population.location_name.isin(locations)) & 
                               (population.year_id == year) &
                               (population.age_group_name == 'All Ages')]

        df_year_group = df_year.groupby(['location', 'sex'])['val'].sum().reset_index()
        pop = pop.groupby(['location_name', 'sex_name'])['val'].sum().reset_index()
        # Join the prevalency data with population data so we can get the prevalency as a percentage of population
        perc = pd.merge(df_year_group, pop, how='inner', left_on=['location', 'sex'], right_on=['location_name', 'sex_name'])
        # Create new column as prevalency percentage of population
        perc['prevalency_percentage'] = perc['val_x'] / perc['val_y'] * 100
        perc_male = perc[perc.sex == 'Male'][['location', 'prevalency_percentage']].reset_index(drop = True)
        perc_female = perc[perc.sex == 'Female'][['location', 'prevalency_percentage']].reset_index(drop = True)
        pop_per_loc = perc[perc.sex == 'Both'][['location', 'val_x']]
        
        for loc in locations:
            if (year == start_year and end_year != start_year):
                x[loc] = list(round(elem, 2) for elem in list(perc_female[perc_female.location == loc].prevalency_percentage))
                y[loc] = list(round(elem, 2) for elem in list(perc_male[perc_male.location == loc].prevalency_percentage))
                size[loc] = list(pop_per_loc[pop_per_loc.location == loc].val_x / 10000000)
            elif year == start_year and end_year == start_year:
                x[loc] = list(round(elem, 2) for elem in list(perc_female[perc_female.location == loc].prevalency_percentage))
                y[loc] = list(round(elem, 2) for elem in list(perc_male[perc_male.location == loc].prevalency_percentage))
                size[loc] = list(pop_per_loc[pop_per_loc.location == loc].val_x / 100000)
            else:
                x[loc].extend(list(round(elem, 2) for elem in list(perc_female[perc_female.location == loc].prevalency_percentage)))
                y[loc].extend(list(round(elem, 2) for elem in list(perc_male[perc_male.location == loc].prevalency_percentage)))
                if year == end_year and start_year != end_year:
                    size[loc].extend(list(pop_per_loc[pop_per_loc.location == loc].val_x / 100000))
                else:
                    size[loc].extend(list(pop_per_loc[pop_per_loc.location == loc].val_x / 10000000))   
    
    data_dicts = []
    for loc in locations:
        # Make dict
        data_dict = {
            'x': x[loc],
            'y': y[loc],
            'mode': 'lines+markers',
            'text': '(Female(%), Male(%))',
            'marker': {
                'sizemode': 'area',
                'sizeref': 2,
                'size': size[loc]
            },
            'line': {
                'width': 5
            },
            'name': loc
        }
        data_dicts.append(data_dict)
        
    return data_dicts

In [504]:
def make_plot(sliders_dict, figure, function, years, df, locations, bar = False):
    years = sorted(years)
    for year in years:
        if bar:
            data_dict = function(year, df, locations[0], level_2)
        else:
            data_dict = function(years[0], year, df, locations)
#         print(data_dict)
        # Only append first year
        if year == years[0]:
            figure['data'] = data_dict

        # Make frames
        frame = {'data': data_dict, 'name': str(year)}
#         frame['data'] = data_dict
#         print(frame)
        figure['frames'].append(frame)

        # Slider step
        slider_step = {'args': [
            [str(year)],
            {'frame': {'duration': 300, 'redraw': True},
             'mode': 'immediate',
           'transition': {'duration': 300}}
         ],
         'label': str(year),
         'method': 'animate'}
        sliders_dict['steps'].append(slider_step)

    figure['layout']['sliders'] = [sliders_dict]
    figure['layout']['paper_bgcolor']='rgba(0,0,0,0)'
    figure['layout']['plot_bgcolor']='rgba(0,0,0,0)'
    if bar:
        figure['layout']['barmode'] ='stack'
    

    return figure

In [505]:
sliders = make_sliders()
figure = make_figure_black('Share of gender of mental illness per continent(%)', 
                                                    {'title': 'Share of Female(%)', 'range': [10, 18]},
                                                    {'title': 'Share of Male(%)', 'range': [10, 15]})

In [506]:
fig_gender = make_plot(sliders, figure, get_gender_share_per_year, years, df, locations)

In [507]:
iplot(fig_gender, validate=True)

In [512]:
html_str = plot(fig_gender, include_plotlyjs=False, output_type='div')
save_html(html_str, "share_gender_continent")

#### Bar plot

In [416]:
def per_gender_location(year, df, causes, locations = countries):
    dff = df[(df.year == year) & (df.age == 'All Ages') & (df.location.isin(locations)) & (df.cause.isin(causes))]
    df_g = dff.groupby(['location', 'cause', 'sex'])['val'].mean().unstack().reset_index()

    grs = df_g.groupby('location')
    per_location = pd.DataFrame()
    for loc in locations:
        g = grs.get_group(loc)
        g['Female_perc'] = g.Female / g.Female.sum() * 100
        g['Male_perc'] = g.Male / g.Male.sum() * 100
        per_location = pd.concat([per_location, g])
    return per_location

def make_gender_data(df, cause):
    df_sex = df[df.cause == cause]
    data_dicts = []
    for sex in ['Male', 'Female']:
        data_dict = go.Bar(
            x = df_sex.location.unique().tolist(),
            y = [round(elem, 2) for elem in list(df_sex[sex + '_perc'])],
            name = sex,
            legendgroup = sex,
            text='%'
        )

        data_dicts.append(data_dict)
    return data_dicts

In [508]:
subplot_titles = []
locations = countries.copy()
locations.extend(glob)
causes = level_2.copy()
causes.remove("Other mental disorders")
for i, cause in enumerate(causes):
    subplot_titles.append("{}".format(cause))
print(locations)

['Australia', 'China', 'France', 'Russian Federation', 'United States', 'Global']


In [509]:
fig = tools.make_subplots(rows=3, cols=3, subplot_titles= subplot_titles)
df = per_gender_location(2017, prevalence_number.copy(), causes, locations)

for i, cause in enumerate(causes):
    traces =  make_gender_data(df, cause)
#     print(traces)
    for trace in traces:
        fig.append_trace(trace, int(i/3) + 1, i%3 + 1,)
        fig['layout']['yaxis{}'.format(i+1)].update(title='Share per location (%)')
#         print(i%5, int(i/5))
        
fig['layout'].update(height=1000, width=1200, title= 'Mental Disorder by Gender in 2016 over countries', 
                     barmode = "group", )

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]  [ (1,3) x3,y3 ]
[ (2,1) x4,y4 ]  [ (2,2) x5,y5 ]  [ (2,3) x6,y6 ]
[ (3,1) x7,y7 ]  [ (3,2) x8,y8 ]  [ (3,3) x9,y9 ]





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



In [510]:
fig_gender_per_country = fig
iplot(fig_gender_per_country, validate=False)

In [513]:
html_str = plot(fig_gender_per_country, include_plotlyjs=False, output_type='div')
save_html(html_str, "prevalence_gender_share_per_country")

#### Pie Plot - Global

In [514]:
def per_gender_global(year, df, causes=level_2):
    dff = df[(df.year == year) & (df.age == 'All Ages') & (df.location == "Global") & (df.cause.isin(causes))]
    df_g = dff.groupby(['cause', 'location', 'sex'])['val'].sum().unstack().reset_index()
      
    data_dicts = []
    x = [[0, 0.48], [0.52, 1]]
    for i, sex in enumerate(['Male', 'Female']):
        data_dict = {
          "values": list(df_g[sex]),
          "labels": df_g.cause.unique().tolist(),
          "domain": {"x": x[i]},
          "name": sex,
          "hoverinfo":"label+percent+name",
          "hole": .4,
          "type": "pie",
          "text":[sex],
          "textposition":"inside",
        }
#         print(data_dict['domain'])
        data_dicts.append(data_dict)
    return data_dicts

In [515]:
def make_plot_global(sliders_dict, figure, years, df):
    years = sorted(years)
    for year in years:
        data_dict = per_gender_global(year, df, causes)

        # Only append first year
        if year == years[0]:
            figure['data'] = data_dict

        # Make frames
        frame = {'data': data_dict, 'name': str(year)}
        figure['frames'].append(frame)

        # Slider step
        slider_step = {'args': [
            [str(year)],
            {'frame': {'duration': 300, 'redraw': True},
             'mode': 'immediate',
           'transition': {'duration': 300}}
         ],
         'label': str(year),
         'method': 'animate'}
        sliders_dict['steps'].append(slider_step)

    figure['layout']['sliders'] = [sliders_dict]
    figure['layout']['paper_bgcolor']='rgba(0,0,0,0)'
    figure['layout']['plot_bgcolor']='rgba(0,0,0,0)'
    figure['layout']['annotations'] = [
                {
                    "font": {
                        "size": 15
                    },
                    "showarrow": False,
                    "text": "Male",
                    "x": 0.22,
                    "y": 0.5
                },
                {
                    "font": {
                        "size": 15
                    },
                    "showarrow": False,
                    "text": "Female",
                    "x": 0.79,
                    "y": 0.5
                }
            ]
    

    return figure

In [498]:
sliders = make_sliders()
df = prevalence_number
locations = continents
figure = make_figure_black('Share of gender per Mental Illness Globally (%)')
fig_gender_global = make_plot_global(sliders, figure, years, df, locations)

In [499]:
iplot(fig_gender_global, validate=True)

In [501]:
html_str = plot(fig_gender_global, include_plotlyjs=False, output_type='div')
save_html(html_str, "prevalence_gender_disorder_share_global")