In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
from ipywidgets import widgets, interact, interactive, interact_manual, Layout
from numpy import array
import chart_studio.plotly as py
import chart_studio

# Importing data

gpao80agg2008 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2008.csv', index_col=0)
gpao80agg2009 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2009.csv', index_col=0)
gpao80agg2010 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2010.csv', index_col=0)
gpao80agg2011 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2011.csv', index_col=0)
gpao80agg2012 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2012.csv', index_col=0)
gpao80agg2013 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2013.csv', index_col=0)
gpao80agg2014 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2014.csv', index_col=0)
gpao80agg2015 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2015.csv', index_col=0)
gpao80agg2016 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2016.csv', index_col=0)
gpao80agg2017 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2017.csv', index_col=0)
gpao80agg2018 = pd.read_csv(
    'https://raw.githubusercontent.com/Reggie-Gilliard-RANYCS/interactive_graphs/master/data/GPAO80AGG2018.csv', index_col=0)

In [3]:
datadict = {
    "2008" : gpao80agg2008,
    "2009" : gpao80agg2009,
    "2010" : gpao80agg2010,
    "2011" : gpao80agg2011,
    "2012" : gpao80agg2012,
    "2013" : gpao80agg2013,
    "2014" : gpao80agg2014,
    "2015" : gpao80agg2015,
    "2016" : gpao80agg2016,
    "2017" : gpao80agg2017,
    "2018" : gpao80agg2018
}

In [4]:
colorvalues = ["#b86b78", "#7e9c90", "#d68230", 
                 "#7c97ab", "#7d6890"]

In [5]:
tdat = gpao80agg2008.copy()

In [6]:
# Drop Downs

genoptions = tdat['GENCAT'].unique().tolist()
genoptionsb = tdat['GENCAT'].unique().tolist()

genoptions.append("ANY")

gender = widgets.Dropdown(
    description = "Gender:",
    options = genoptions,
    value = "ANY")

racoptions = tdat['ETHCAT'].unique().tolist()
racoptionsb = tdat['ETHCAT'].unique().tolist()

racoptions.append('ANY')

race = widgets.Dropdown(
    description = "Race/Ethnicity:",
    options = racoptions,
    value = 'ANY')

elloptions = tdat["ANYELL0812"].unique().tolist()
elloptionsb = tdat["ANYELL0812"].unique().tolist()

elloptions.append('ALL')

ell = widgets.Dropdown(
    description = "ELL Status:",
    options = elloptions,
    value = "ALL")

iepoptions = tdat['anyspd1'].unique().tolist()
iepoptionsb = tdat['anyspd1'].unique().tolist()

iepoptions.append('ALL')

iep = widgets.Dropdown(
    description = 'IEP Status:',
    options = iepoptions,
    value = 'ALL')

disoptions = tdat['trulydis_b'].unique().tolist()
disoptionsb = tdat['trulydis_b'].unique().tolist()

disoptions.append('ALL')

dis = widgets.Dropdown(
    description = 'SES:',
    options = disoptions,
    value = 'ALL')

cohort = widgets.IntSlider(
    value = 2008,
    min = 2008,
    max = 2018,
    description = "9th Grade Cohort",
    style = {'description_width' : 'initial'})

In [7]:
# Radio Buttons

colnames = ['ETHCAT', 'GENCAT', 'ANYELL0812', 'anyspd1', 'trulydis_b']

demos = widgets.Dropdown(
    description = 'Rows:',
    options = ['Race','Gender', 'ELL', 'IEP', 'SES'],
    index = 0, disabled = False)

demographics_toggle2 = widgets.RadioButtons(
    options = ['Race','Gender', 'ELL', 'IEP', 'SES'],
    index = 0, disabled = False, style={'description_width':'initial'})

In [8]:
#Function to reset the graph, preventing too much slicing

def reset():
    global tdat
    race.value = 'ANY'
    gender.value = 'ANY'
    ell.value = 'ALL'
    dis.value = 'ALL'
    iep.value = 'ALL'
    demos.value = 'Race'
    cohort.value = 2008
    
    sums = tdat.groupby(['ETHCAT']).Frequency.sum()
    sums = sums.to_frame()
    sums = sums.rename(columns = {'Frequency': 'freqcum'})
    sums.reset_index(inplace = True)
    temp_df = tdat.merge(sums, how = 'left', on = 'ETHCAT')
    
    freq = temp_df.Frequency
    tot = temp_df.freqcum
    location = len(temp_df.columns)
    percent = freq/tot
    temp_df.insert(loc = location, column = "Percent", value = percent * 100)
    
    o80dat = temp_df[(temp_df['AVG_GPA3'] == 'GPA Over 80')]
    un80dat = temp_df[(temp_df['AVG_GPA3'] == 'GPA Under 80')]
    un80dattemp = un80dat.groupby(['ETHCAT']).sum()
    un80dattemp.reset_index(inplace = True)
    un80dattemp.Percent = round(un80dattemp.Percent, 1)
    o80dattemp = o80dat.groupby(['ETHCAT']).sum()
    o80dattemp.reset_index(inplace = True)
    o80dattemp.Percent = round(o80dattemp.Percent, 1)

    with fig.batch_update():
        fig.data[0].x = o80dattemp.Percent
        fig.data[0].y = o80dattemp['ETHCAT']
        fig.data[1].x = un80dattemp.Percent
        fig.data[1].y = un80dattemp['ETHCAT']

In [9]:
def buildfigure(indata):
    global fig
    
    sums = indata.groupby(['ETHCAT']).Frequency.sum()
    sums = sums.to_frame()
    sums = sums.rename(columns = {'Frequency': 'freqcum'})
    sums.reset_index(inplace = True)
    temp_df = indata.merge(sums, how = 'left', on = 'ETHCAT')
    
    freq = temp_df.Frequency
    tot = temp_df.freqcum
    location = len(temp_df.columns)
    percent = freq/tot
    temp_df.insert(loc = location, column = "Percent", value = percent * 100)
    
    o80dat = temp_df[(temp_df['AVG_GPA3'] == 'GPA Over 80')]
    un80dat = temp_df[(temp_df['AVG_GPA3'] == 'GPA Under 80')]
    un80dattemp = un80dat.groupby(['ETHCAT']).sum()
    un80dattemp.reset_index(inplace = True)
    un80dattemp.Percent = round(un80dattemp.Percent, 1)
    o80dattemp = o80dat.groupby(['ETHCAT']).sum()
    o80dattemp.reset_index(inplace = True)
    o80dattemp.Percent = round(o80dattemp.Percent, 1)
    
    fig=go.FigureWidget()
    fig.add_bar(name = 'GPA Over 80', x=o80dattemp.Percent, y = o80dattemp.ETHCAT,
                        marker = dict(color = colorvalues[4]),
                        texttemplate = '%{x}%',
                        textposition = 'inside',
                        orientation = 'h',
                        opacity = 1)
    fig.add_bar(name = 'GPA Under 80', x=un80dattemp.Percent, y = un80dattemp.ETHCAT,
                        marker = dict(color = colorvalues[3]),
                        texttemplate = '%{x}%',
                        textposition = 'inside',
                        orientation = 'h',
                        opacity = 1)
    fig.update_layout(barmode = 'stack',
                     title = 'GPA Over 80',
                     xaxis_title = 'Percentage of Students',
                      margin_b = 90,
                     annotations = [dict(xref = 'paper',
                                        yref = 'paper',
                                        x=.5, y=-.25,
                                        showarrow = False,
                                        text = 'Step 1: Choose Rows; Step 2: Choose Characteristic for Subsetting')])
    fig.update_xaxes(range = (0, 105))

In [10]:
buildfigure(tdat)

In [11]:
# Defining function updating demographics

def update_demographics(Characteristic):
    reset()    
    global container
    demographics = [race, gender, ell, iep, dis]
    d2 = demographics_toggle2.index
    container = widgets.HBox([demos, demographics[d2]])
    return(container)
    print(widgets.VBox([container]))

In [12]:
# Defining function updating bar graph

def updatetrace(change):
    
    global tdat

    char = colnames[demos.index]
    
    temp_df = datadict[str(cohort.value)].copy()

    if demographics_toggle2.value == 'Race' :
        
        if race.value in racoptionsb:

            temp_df = temp_df[temp_df['ETHCAT'] == race.value]

        else:

            temp_df = temp_df

    elif demographics_toggle2.value == 'Gender':

        if gender.value in genoptionsb:

            temp_df = temp_df[temp_df['GENCAT'] == gender.value]

        else:

            temp_df = temp_df

    elif demographics_toggle2.value == 'Birth Place':
        
        if forborn.value in foroptionsb:

            temp_df = temp_df[temp_df['FORBORN'] == forborn.value]

        else:

            temp_df = temp_df


    elif demographics_toggle2.value == 'ELL':

        if ell.value in elloptionsb:

            temp_df = temp_df[temp_df['ANYELL0812'] == ell.value]

        else:

            temp_df = temp_df
            
    elif demographics_toggle2.value == 'IEP':

        if iep.value in iepoptionsb:

            temp_df = temp_df[temp_df['anyspd1'] == iep.value]

        else:

            temp_df = temp_df

    elif demographics_toggle2.value == 'SES':

        if dis.value in disoptionsb :

            temp_df = temp_df[temp_df['trulydis_b'] == dis.value]

        else:

            temp_df = temp_df

    sums = temp_df.groupby([char]).Frequency.sum()
    sums = sums.to_frame()
    sums = sums.rename(columns = {'Frequency': 'freqcum'})
    sums.reset_index(inplace = True)
    temp_df = temp_df.merge(sums, how = 'left', on = char)

    freq = temp_df.Frequency
    tot = temp_df.freqcum
    location = len(temp_df.columns)
    percent = freq/tot
    temp_df.insert(loc = location, column = "Percent", value = percent * 100)

    o80dat = temp_df[(temp_df['AVG_GPA3'] == 'GPA Over 80')]
    un80dat = temp_df[(temp_df['AVG_GPA3'] == 'GPA Under 80')]
    un80dattemp = un80dat.groupby([char]).sum()
    un80dattemp.reset_index(inplace = True)
    un80dattemp.Percent = round(un80dattemp.Percent, 1)
    o80dattemp = o80dat.groupby([char]).sum()
    o80dattemp.reset_index(inplace = True)
    o80dattemp.Percent = round(o80dattemp.Percent, 1)

    with fig.batch_update():
        fig.data[0].x = o80dattemp.Percent
        fig.data[0].y = o80dattemp[char]
        fig.data[1].x = un80dattemp.Percent
        fig.data[1].y = un80dattemp[char]

In [13]:
# Observe

race.observe(updatetrace, names = 'value')
gender.observe(updatetrace, names = 'value')
ell.observe(updatetrace, names = 'value')
iep.observe(updatetrace, names = 'value')
dis.observe(updatetrace, names = 'value')
demos.observe(updatetrace, names = 'value')
cohort.observe(updatetrace, names = 'value')