In [1]:
import pandas as pd
import numpy as np
import csv, sys

### Visualization 3
3 plots showing breakdown of graduation by year as an aggregate of institutions (m/f)  
* one: total female and total male by year  
* two: female broken out by ennrollment type  
* three: male broken out by enrollment type

In [2]:
# Create dataframe from processed-data

# dictionary for storing year, # female graduated, # male graduated
        
# lists for processed csv use 
years = ["2004-2005", "2005-2006", "2006-2007", "2007-2008", "2008-2009","2009-2010", "2010-2011", "2011-2012", "2012-2013", "2013-2014", "2014-2015", "2015-2016"]
colnames= ['Total, Female: Graduated', 'Total, Male: Graduated'] 

# dataFrame for number of graduates
graphData = pd.DataFrame(columns=['Year','Total_F', 'Total_M', 'Yr1_F', 'Yr1_M', 'Yr2_F', 'Yr2_M', 'Enr_F', 'Enr_M','Oth_F','Oth_M'])
graphX = 2005

# extract data from processed-data
for i in years:
    filename = './processed-data/retention/' +  i + '.csv'
    data = pd.read_csv(filename, usecols=colnames)
    total_f = data[colnames[0]].sum()
    total_m = data[colnames[1]].sum()
    yr1_f = data.iloc[1][colnames[0]]
    yr1_m = data.iloc[1][colnames[1]]
    yr2_f = data.iloc[0][colnames[0]]
    yr2_m = data.iloc[0][colnames[1]]
    enr_f = data.iloc[2][colnames[0]]
    enr_m = data.iloc[2][colnames[1]]
    oth_f = data.iloc[3][colnames[0]]
    oth_m = data.iloc[3][colnames[1]]
    graphData = graphData.append({'Year':graphX, 'Total_F':total_f, 'Total_M':total_m,\
                                  'Yr1_F':yr1_f, 'Yr1_M':yr1_m, 'Yr2_F':yr2_f, 'Yr2_M':yr2_m,\
                                  'Enr_F':enr_f, 'Enr_M':enr_m, 'Oth_F':oth_f, 'Oth_M':oth_m},ignore_index=True)
    graphX = graphX+1

pd.options.display.float_format = '{:,.0f}'.format
# print(graphData)


#### Linegraphs

In [3]:
# plot 1: Graduation by year, female vs male

import bokeh
from bokeh.plotting import figure, output_file, show
from bokeh.models import HoverTool
from bokeh.models import Range1d


def totalPlot():
    p1 = figure(plot_width=400, plot_height=400, toolbar_sticky=False, tools="box_zoom,reset")
    p1.title.text = "Graduation Pattern: Females and Males"
    p1.title.text_font_size = "15px"
    p1.x_range = Range1d(2005, 2017)
    p1.outline_line_width = 2
    p1.xaxis.axis_label = "Graduation Year"
    p1.yaxis.axis_label = "Number of Graduates"
    p1.xaxis.minor_tick_line_color = None
    p1.background_fill_color = "whitesmoke"
    p1.background_fill_alpha = 0.5
    p1.legend.border_line_width = 2
    p1.legend.border_line_color = "grey"


    # plotting the data
    fl = p1.line(graphData['Year'], graphData['Total_F'], line_width=2, color="red", legend="females")
    fc = p1.circle(graphData['Year'], graphData['Total_F'], color="red", size=8, legend="females")
    ml = p1.line(graphData['Year'], graphData['Total_M'], color="blue", legend = "males")
    mc = p1.circle(graphData['Year'], graphData['Total_M'], color="blue", size=8, legend="males")

    p1.legend.location = "top_left"
    #p1.legend.click_policy="hide"
    return[p1]


In [4]:
# 2 interactive plots. one for females, one for males

def fmPlot():
    # plot 2: Females by year broken out by enrollment type
    p2 = figure(plot_width=300, plot_height=300, toolbar_sticky=False, tools="box_zoom,reset")
    p2.title.text = "Graduation Pattern by Enrollment Type: Females"
    p2.title.text_font_size = "15px"
    p2.x_range = Range1d(2005, 2017)
    p2.outline_line_width = 2
    p2.xaxis.axis_label = "Graduation Year"
    p2.yaxis.axis_label = "Number of Graduates"
    p2.xaxis.minor_tick_line_color = None
    p2.background_fill_color = "whitesmoke"
    p2.background_fill_alpha = 0.5
    p2.legend.border_line_width = 2
    p2.legend.border_line_color = "grey"

    # plotting the data
    fl = p2.line(graphData['Year'], graphData['Total_F'], line_width=2, color="black", legend="total")
    fc = p2.circle(graphData['Year'], graphData['Total_F'], color="black", size=8, legend="total")

    fenl = p2.line(graphData['Year'], graphData['Enr_F'], line_width=2, color="#253494", legend="upon enrollment")
    fenc = p2.circle(graphData['Year'], graphData['Enr_F'], color="#253494", size=8, legend="upon enrollment")

    fy1l = p2.line(graphData['Year'], graphData['Yr1_F'], line_width=2, color="#2c7fb8", legend="year 1")
    fy1c = p2.circle(graphData['Year'], graphData['Yr1_F'], color="#2c7fb8", size=8, legend="year 1")

    fy2l = p2.line(graphData['Year'], graphData['Yr2_F'], line_width=2, color="#41b6c4", legend="year 2")
    fy2c = p2.circle(graphData['Year'], graphData['Yr2_F'], color="#41b6c4", size=8, legend="year 2")

    fotl = p2.line(graphData['Year'], graphData['Oth_F'], line_width=2, color="#7fcdbb", legend="other")
    fotc = p2.circle(graphData['Year'], graphData['Oth_F'], color="#7fcdbb", size=8, legend="other")

    p2.legend.location = "top_left"
    p2.legend.click_policy="hide"
    
    # plot 3: Females by year broken out by enrollment type
    
    p3 = figure(plot_width=300, plot_height=300, toolbar_sticky=False, tools="box_zoom,reset")
    p3.title.text = "Graduation Pattern By Enrollment Type: Males"
    p3.title.text_font_size = "15px"
    p3.x_range = Range1d(2005, 2017)
    p3.outline_line_width = 2
    p3.xaxis.axis_label = "Graduation Year"
    p3.yaxis.axis_label = "Number of Graduates"
    p3.xaxis.minor_tick_line_color = None
    p3.background_fill_color = "whitesmoke"
    p3.background_fill_alpha = 0.5
    p3.legend.border_line_width = 2
    p3.legend.border_line_color = "grey"

    # plotting the data
    ml = p3.line(graphData['Year'], graphData['Total_M'], color="black", legend = "total")
    mc = p3.circle(graphData['Year'], graphData['Total_M'], color="black", size=8, legend="total")

    menl = p3.line(graphData['Year'], graphData['Enr_M'], color="#253494", legend = "upon enrollment")
    menc = p3.circle(graphData['Year'], graphData['Enr_M'], color="#253494", size=8, legend="upon enrollment")

    my1l = p3.line(graphData['Year'], graphData['Yr1_M'], color="#2c7fb8", legend = "year 1")
    my1c = p3.circle(graphData['Year'], graphData['Yr1_M'], color="#2c7fb8", size=8, legend="year 1")

    my2l = p3.line(graphData['Year'], graphData['Yr2_M'], color="#41b6c4", legend = "year 2")
    my2c = p3.circle(graphData['Year'], graphData['Yr2_M'], color="#41b6c4", size=8, legend="year 2")

    motl = p3.line(graphData['Year'], graphData['Oth_M'], color="#7fcdbb", legend = "other")
    motc = p3.circle(graphData['Year'], graphData['Oth_M'], color="#7fcdbb", size=8, legend="other")

    p3.legend.location = "top_left"
    p3.legend.click_policy="hide"

    return[p2, p3]


In [5]:
from bokeh.layouts import layout
from bokeh.layouts import gridplot

output_file("vis3.html")

# make a grid
l = layout([
    totalPlot(),
    fmPlot(),
], sizing_mode='stretch_both')

# show the results

show(l)