In [None]:
"""
Human Rights First, Asylum Analysis

It was brought up in the initial stakeholder meeting that there was an 
issue with the y-axis increments for some of the presented bar graphs.
At times the y-axis was labeled with a floating point value; since
the bar graphs are documenting counts, it should be labeled as integers.

I reviewed the plotly docs (the graphing library used by prior cohorts);
the current iteration uses plotly.graph_objects.Bar.
This graph has two interesting hyperparameters that may addres the problem:
y – Sets the y coordinates.
y0 – Alternate to y. Builds a linear space of y coordinates. Use with dy 
where y0 is the starting coordinate and dy the step.

My intent with this notebook is to test dummy data with these hyperparameters 
to determine if there is an effect on the graphs output.
I also intend to explore the possibility of the presence of NaN values, which 
default to type - float.  If this is causing the problem, I aim to come up with
a solution to address.
"""

In [None]:
# Current codebase 

def get_stacked_bar_chart(df, feature):
    """Takes dataframe and feature name (str) and returns a graph figure
    in json format for that feature as the x-axis"""

    outcomes_list = ['Denied', 'Granted', 'Remanded', 'Sustained', 'Terminated']
    df = df.groupby(feature)['case_outcome'].value_counts().unstack(fill_value=0)

    fig_data = []
    for outcome in outcomes_list:
        if outcome in df.columns:
            temp = go.Bar(name= outcome,
                            x=list(df.index),
                            y=df[outcome])
            fig_data.append(temp)
    
    fig = go.Figure(fig_data)
    fig.update_layout(barmode='stack')

    return fig.to_json()


In [19]:
# Create dummy data

import numpy as np
import pandas as pd 
import plotly.graph_objects as go


d = {'gender': ['male', 'male', 'female', 'female', 'female', 'female', 'female', 'male'], 
     'religion': ['christian', 'islam', 'hindu', 'buddhist', 'sikh', 'jewish', 'islam', 'christian'],
     'nationality': ['el salvador', 'dominican republic', 'el salvador', 'bangladesh', 'bangladesh', 'cuba', 'cuba', 'cuba'],
     'social_group': ['androgynous', 'gay', 'cisgender', 'bisexual', 'bisexual', 'cisgender', np.nan, np.nan],
     'case_outcome' : ['granted', 'granted', 'granted', 'denied', 'denied', 'denied', 'denied', 'denied']
     }

df_dummyData = pd.DataFrame(data=d)

In [20]:
# Explore dummy data and ensure there are NaN values to work with

df_dummyData

Unnamed: 0,gender,religion,nationality,social_group,case_outcome
0,male,christian,el salvador,androgynous,granted
1,male,islam,dominican republic,gay,granted
2,female,hindu,el salvador,cisgender,granted
3,female,buddhist,bangladesh,bisexual,denied
4,female,sikh,bangladesh,bisexual,denied
5,female,jewish,cuba,cisgender,denied
6,female,islam,cuba,,denied
7,male,christian,cuba,,denied


In [36]:
# Create function to gain understanding of logic in df.groupby

def see_fxn_df(df, feature):
    df = df.groupby(feature)['case_outcome'].value_counts().unstack(fill_value=0)
    return df

In [37]:
see_fxn_df(df_dummyData, 'religion')

case_outcome,denied,granted
religion,Unnamed: 1_level_1,Unnamed: 2_level_1
buddhist,1,0
christian,1,1
hindu,0,1
islam,1,1
jewish,1,0
sikh,1,0


In [40]:
# Work with current code source, but return plot rather than json object

def get_stacked_bar_chart(df, feature):
    """Takes dataframe and feature name (str) and returns a graph figure
    """

    outcomes_list = ['denied', 'granted', 'remanded', 'austained', 'terminated']
    df = df.groupby(feature)['case_outcome'].value_counts().unstack(fill_value=0)

    fig_data = []
    for outcome in outcomes_list:
        if outcome in df.columns:
            temp = go.Bar(name=outcome,
                            x=list(df.index),
                            y=df[outcome])
            fig_data.append(temp)
    
    fig = go.Figure(fig_data)
    fig.update_layout(barmode='stack')

    return fig

In [41]:
# This plot does not include data with NaN values and uses floats for y-axis

get_stacked_bar_chart(df_dummyData, 'religion')

In [55]:
# This plot contains datawith NaN values and also uses floats for y-axis

get_stacked_bar_chart(df_dummyData, 'social_group')

In [62]:
# Adding arguments to add constraints to the y-axis

def get_stacked_bar_chart2(df, feature):
    """Takes dataframe and feature name (str) and returns a graph figure
    """

    outcomes_list = ['denied', 'granted', 'remanded', 'austained', 'terminated']
    df = df.groupby(feature)['case_outcome'].value_counts().unstack(fill_value=0)

    fig_data = []
    for outcome in outcomes_list:
        if outcome in df.columns:
            temp = go.Bar(name=outcome,
                            x=list(df.index),
                            y=df[outcome], y0=0, dy=1)
            fig_data.append(temp)
    
    fig = go.Figure(fig_data)
    fig.update_layout(barmode='stack')

    return fig

In [63]:
# This plot does not include data with NaN values and uses floats for y-axis

get_stacked_bar_chart2(df_dummyData, 'religion')