In [40]:
import pandas as pd
from bokeh.plotting import figure, show
import numpy as np

In [95]:
df = pd.read_csv("SSE_Courses.csv") # Read file into pandas dataframe
df = df.rename(columns={"Unnamed: 0":"Course"}) # Rename first column to 'Course'
df = df.set_index('Course') # Set the 'Course' column as the index of the dataframe


In [96]:
df


Unnamed: 0_level_0,Spring '18,Fall '18,Spring '19,Fall '19,Spring '20,Fall '20
Course,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
EM,125,187,228,172,184,197
ES,3,0,80,0,34,63
ISE,0,0,80,0,34,63
SSW,148,219,303,292,342,352
SYS,280,195,312,189,307,371


# EM by Semester (Line)

In [104]:
from bokeh.models import Range1d


# Create list for x axis ticks and y axis values
x = [1,2,3,4,5,6]
y = list(df.loc['EM'])

# create a new plot with a title and axis labels
p = figure(title="EM Students by Semester", x_axis_label="Semester", y_axis_label="EM Enrollments")

# add a line renderer with legend and line thickness
p.line(x, y, legend_label="# of Students", line_width=2, line_color='green')

# Customize the values of the tickers on the x axis
p.xaxis.ticker = x
p.xaxis.major_label_overrides = {1: 'Spring 18', 2: 'Fall 18', 3: 'Spring 19', 4:'Fall 19', 5:'Spring 20', 6:'Fall 20'}
p.xaxis.major_label_overrides = {1: 'Spring 18', 2: 'Fall 18', 3: 'Spring 19', 4:'Fall 19', 5:'Spring 20', 6:'Fall 20'}


# Create limits for y axis
p.y_range = Range1d(50,250)

# show the results
show(p)

# EM vs SYS (Scatter)

In [105]:
# Create a new plot with a title and axis labels
p = figure(title="EM vs SYS Students", x_axis_label="EM", y_axis_label="SYS")

# Plot EM data for x values and SYS data for y values
p.circle(list(df.loc['EM']), list(df.loc['SYS']), size=10, color="red", alpha=1)

# Create limits for y axis
p.y_range = Range1d(0,500)

# show the results
show(p)

# Average Enrollment by Program (Bar)

In [120]:
# Calculate average enrollment for each course type
avg_EM = np.mean(df.loc['EM'])
avg_ES = np.mean(df.loc['ES'])
avg_ISE = np.mean(df.loc['ISE'])
avg_SSW = np.mean(df.loc['SSW'])
avg_SYS = np.mean(df.loc['SYS'])

# Create list of courses and average enrollment per course
courses = list(df.index)
counts = [avg_EM, avg_ES, avg_ISE, avg_SSW, avg_SYS]

p = figure(x_range=courses, title="Average Enrollment by Course",
           toolbar_location=None, tools="", y_axis_label='# of Students', x_axis_label='Course')

p.vbar(x=courses, top=counts, width=0.9)

p.xgrid.grid_line_color = None
p.y_range.start = 0

show(p)

# Enrollment for Top 5 Programs of 2020 (Pie)

In [133]:
from math import pi

from bokeh.palettes import Category10
from bokeh.transform import cumsum

# Calculate total enrollment for 2020 for each of the courses
em_2020 = df.loc['EM']["Spring '20"] + df.loc['EM']["Fall '20"]
es_2020 = df.loc['ES']["Spring '20"] + df.loc['ES']["Fall '20"]
ise_2020 = df.loc['ISE']["Spring '20"] + df.loc['ISE']["Fall '20"]
ssw_2020 = df.loc['SSW']["Spring '20"] + df.loc['SSW']["Fall '20"]
sys_2020 = df.loc['SYS']["Spring '20"] + df.loc['SYS']["Fall '20"]

x = {
    'EM': em_2020,
    'ES': es_2020,
    'ISE': ise_2020,
    'SSW': ssw_2020,
    'SYS': sys_2020,
}

data = pd.Series(x).reset_index(name='value').rename(columns={'index': 'Course'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Category10[len(x)]

p = figure(title="Total 2020 Enrollment by Course", toolbar_location=None,
           tools="hover", tooltips="@Course: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='Course', source=data)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

show(p)

#https://docs.bokeh.org/en/latest/docs/gallery/pie_chart.html