In [4]:
import sqlite3
import pandas as pd
import numpy as np

#Compensation Change Import - Start (Bureau of Labor Statistics - Employment Cost Index)
compChange = pd.read_excel("CompensationChange.xlsx", skiprows=15, usecols=4)
#*Total all four quarters for annual change total for all 10 years in data set
compChange = compChange.assign(Change_Total = (compChange.Qtr1 + compChange.Qtr2 + compChange.Qtr3 + compChange.Qtr4).astype(float))
#*Take average of annual change total for all 10 years in data set
avgCompChange = compChange['Change_Total'].mean()
#Compensation Change Import - End

#Variables Import - Start (Excel file contains sources - includes avg repayment years, avg residency years, unreported salaries, tuition change, loan rates, and discount rate)
stdVar = pd.read_excel("Variables.xlsx")
#*Insert calculated 10 year average compensation change variable
stdVar.loc[len(stdVar)] = ['Compensation Change', '', (avgCompChange / 100), '2017', '', 'Employment Cost Index-NAICS']
#Variables Import - End

#Undergraduate Cost Import - Start (National Center for Education Statistics - Average Cost of Attenance)
undCost = pd.read_excel("UndergraduateCost.xlsx", header=None, skiprows=6, skip_footer=3)
#*Drop unneeded columns, keep 2015-2016 average associates and undergraduate annual cost for all institutions
undCost = undCost.drop(undCost.columns[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20]], axis=1)
#*Rename remaining columns
undCost.columns = ['Category', 'All_Institutions_2016']
#*Drop all rows (i.e. years) that are not needed
undCost = undCost.drop(undCost[undCost.Category.str.contains("Tuition")==False].index)
undCost = undCost.dropna(subset=['Category'])
undCost = undCost[:-2]
#*Rename remaining rows
undCost.iloc[-2, undCost.columns.get_loc('Category')] = "4-year institutions"
undCost.iloc[-1, undCost.columns.get_loc('Category')] = "2-year institutions"
#Undergraduate Cost Import - End

#Graduate Cost Import - Start (National Center for Education Statistics - Average Cost of Attenance)
gradCost = pd.read_excel("GraduateCost.xlsx", header=None, skiprows=5, skip_footer=4)
#*Drop unneeded columns, keep total annual graduate cost for all institutions
gradCost = gradCost.drop(gradCost.columns[[2, 3, 4, 5, 6, 7, 8, 9, 10, 11]], axis=1)
#*Rename remaining columns
gradCost.columns = ['Year', 'All_Institutions']
#*Drop unneeded column
gradCost = gradCost.dropna(subset=['Year'])
gradCost = gradCost[:-27]

#Graduate Education Cost Change - For Doctoral Data - Start
#*Due to limited data years in doctoral and professional degree annual cost, used graduate cost escalation to apply
eduGrad_2009 = gradCost.at[gradCost[gradCost.Year.str.contains("2008-09")].index[0], 'All_Institutions']
eduGrad_2010 = gradCost.at[gradCost[gradCost.Year.str.contains("2009-10")].index[0], 'All_Institutions']
eduGrad_2011 = gradCost.at[gradCost[gradCost.Year.str.contains("2010-11")].index[0], 'All_Institutions']
eduGrad_2012 = gradCost.at[gradCost[gradCost.Year.str.contains("2011-12")].index[0], 'All_Institutions']
eduGrad_2013 = gradCost.at[gradCost[gradCost.Year.str.contains("2012-13")].index[0], 'All_Institutions']
eduGrad_2014 = gradCost.at[gradCost[gradCost.Year.str.contains("2013-14")].index[0], 'All_Institutions']
eduGrad_2015 = gradCost.at[gradCost[gradCost.Year.str.contains("2014-15")].index[0], 'All_Institutions']
eduGrad_2016 = gradCost.at[gradCost[gradCost.Year.str.contains("2015-16")].index[0], 'All_Institutions']

eduChange = (eduGrad_2010 / eduGrad_2009) * (eduGrad_2011 / eduGrad_2010) * (eduGrad_2012 / eduGrad_2011) * (eduGrad_2013 / eduGrad_2012) * (eduGrad_2014 / eduGrad_2013) * (eduGrad_2015 / eduGrad_2014) * (eduGrad_2016 / eduGrad_2015)
#Graduate Education Cost Change - For Doctoral Data - End

#*Drop all rows (i.e. years) that are not needed
gradCost = gradCost.drop(gradCost[gradCost.Year.str.contains("2015-16")==False].index)
#*Rename remaining row
gradCost.iloc[-1, gradCost.columns.get_loc('Year')] = "2016"
#Graduate Cost Import - End

#Doctoral Cost Import - Start (National Center for Education Statistics - Average Cost of Attenance)
docCost = pd.read_excel("DoctoralCost.xlsx", header=None, skiprows=5, skip_footer=6)
#*Drop unneeded columns, keep total annual cost for professional specializations
docCost = docCost.drop(docCost.columns[[2]], axis=1)
#*Rename remaining columns
docCost.columns = ['Year','Average','Chiropractic', 'Dentistry','Medicine','Optometry','Osteopathic_Medicine','Pharmacy','Podiatry','Veterinary','Law','Theology']
#*Drop all rows (i.e. years) that are not needed, keep last available
docCost = docCost.drop(docCost[docCost.Year.str.contains("2008-09")==False].index)
#*Drop unneeded column
docCost = docCost.dropna(subset=['Year'])
docCost = docCost[:-2]

#Doctoral Education Cost Change - Using Graduate Education Change - Start
#*Graduate cost escalation to apply (calculated above on line 55, due to limited data years in doctoral and professional degree annual cost
docCost = docCost[['Average','Chiropractic', 'Dentistry','Medicine','Optometry','Osteopathic_Medicine','Pharmacy','Podiatry','Veterinary','Law','Theology']].multiply(eduChange, axis="index")
#*Rename row
docCost.insert(loc=0, column='Year', value=['2016'])
#Doctoral Education Cost Change - Using Graduate Education Change - End

#Doctoral Cost Import - End

#Tuition Table - Start
#*Create consolidated tuition table based on Employment Projection Data - Education Needed lables (Bureau of Labor Statistics - Employment Projections)
tuiTable = pd.DataFrame(columns=['Degree','Type','Education_Years','Annual_Cost'])
tuiTable.loc[0] = ['No formal educational credential', 'All', 0, 0]
tuiTable.loc[1] = ['High school diploma or equivalent', 'All', 0, 0]
tuiTable.loc[2] = ['Some college, no degree', 'All', 1, undCost.at[undCost[undCost.Category.str.contains("2-year")].index[0], 'All_Institutions_2016']]
tuiTable.loc[3] = ['Postsecondary nondegree award', 'All', 1, undCost.at[undCost[undCost.Category.str.contains("2-year")].index[0], 'All_Institutions_2016']]
tuiTable.loc[4] = ['Associate\'s degree', 'All', 2, undCost.at[undCost[undCost.Category.str.contains("2-year")].index[0], 'All_Institutions_2016']]
tuiTable.loc[5] = ['Bachelor\'s degree', 'All', 4, undCost.at[undCost[undCost.Category.str.contains("4-year")].index[0], 'All_Institutions_2016']]
tuiTable.loc[6] = ['Master\'s degree', 'All', 6, gradCost.at[gradCost[gradCost.Year.str.contains("2016")].index[0], 'All_Institutions']]
tuiTable.loc[7] = ['Doctoral or professional degree', 'Average', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Average']]
tuiTable.loc[8] = ['Doctoral or professional degree', 'Chiropractic', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Chiropractic']]
tuiTable.loc[9] = ['Doctoral or professional degree', 'Dentistry', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Dentistry']]
tuiTable.loc[10] = ['Doctoral or professional degree', 'Advanced Dentistry', 10, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Dentistry']]
tuiTable.loc[11] = ['Doctoral or professional degree', 'Medicine', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Medicine']]
tuiTable.loc[12] = ['Doctoral or professional degree', 'Optometry', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Optometry']]
tuiTable.loc[13] = ['Doctoral or professional degree', 'Pharmacy', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Pharmacy']]
tuiTable.loc[14] = ['Doctoral or professional degree', 'Podiatry', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Podiatry']]
tuiTable.loc[15] = ['Doctoral or professional degree', 'Veterinary', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Veterinary']]
tuiTable.loc[16] = ['Doctoral or professional degree', 'Law', 8, docCost.at[docCost[docCost.Year.str.contains("2016")].index[0], 'Law']]
#Tuition Table - End

#Function: For those positions that did not have a median salary listed, used specific salaries found on Bureau of Labor Statistics career profiles. Variables file has direct website links where these were found
def assign_misSal(row):
    if ('>=$208,000') in str(row['Median_Wage']):
        return stdVar.at[stdVar[stdVar.Category.str.contains(row['Employment_Title'],na=False)].index[0], 'Value'] * ((1 + (avgCompChange/100))**(2016 - (stdVar.at[stdVar[stdVar.Category.str.contains(row['Employment_Title'],na=False)].index[0], 'Year'])))
    elif ('—') in str(row['Median_Wage']):
        return 99
    else:
        return row['Median_Wage']


#Employment Projection Import - Start (Bureau of Labor Statistics - Employment Projections)
empProj = pd.read_excel("EmploymentProjections.xlsx", header=None, skiprows=3, skip_footer=4)
#*Rename columns
empProj.columns = ['Employment_Title', 'Matrix_Code', 'Occupation', 'Employment_2016', 'Employment_2026', 'Employment_Change_Number', 'Employment_Change_Percent', 'Self_Employed_Percent', 'Openings_2016_2026', 'Median_Wage', 'Education_Needed', 'Work_Experience', 'Job_Training']
#*Remove all summary lines
empProj = empProj.drop(empProj[empProj.Occupation.str.contains("Summary")].index)
#*Apply specific average median salaries not identified in data set, listed these in Variables excel file along with BLS career profile link
empProj = empProj.assign(Median_Wage=empProj.apply (lambda row: assign_misSal (row),axis=1))
#*Remove lines that had no average median salary available that could not be reasonably estimated (entertainers, etc.)
empProj = empProj.drop(empProj[empProj.Median_Wage==99].index)
empProj.Median_Wage.dropna()
#Employment Projection Import - End

#Function: Populate Education_FK column in Time Analysis table below from Tuition table, returns index for joining. Uses keywords to identify doctoral and professional degree specializations
def assign_edu_fk (row):
    if row['Education_Needed'] != 'Doctoral or professional degree':
        return tuiTable.set_index('Degree').index.get_loc(row['Education_Needed'])
    elif row['Education_Needed'] == 'Doctoral or professional degree':
        test = str(row['Employment_Title']).lower()
        if ('chiropractor') in test:
            return tuiTable.set_index('Type').index.get_loc('Chiropractic')
        elif ('optometrist') in test:
            return tuiTable.set_index('Type').index.get_loc('Optometry')
        elif ('pharmacist') in test:
            return tuiTable.set_index('Type').index.get_loc('Pharmacy')
        elif ('podiatrist') in test:
            return tuiTable.set_index('Type').index.get_loc('Podiatry')
        elif ('veterinarian') in test:
            return tuiTable.set_index('Type').index.get_loc('Veterinary')
        elif any(x in test for x in ('orthodontist','prosthodontist')):            
            return tuiTable.set_index('Type').index.get_loc('Advanced Dentistry')
        elif ('dentist') in test:
            return tuiTable.set_index('Type').index.get_loc('Dentistry')
        elif any(x in test for x in ('lawyer','judge','judicial')):
            return tuiTable.set_index('Type').index.get_loc('Law')
        elif any(x in test for x in ('practitioner','pediatrician','psychiatrist','surgeon','psychologist','anesthesiologist','internist','gynecologist','audiologist','medical scientist','physical therapist')):
            return tuiTable.set_index('Type').index.get_loc('Medicine')
        else:
            return tuiTable.set_index('Type').index.get_loc('Average')
    else:
        return 999

#Function: Identify those professional degree positions that require a residency, returns residency years    
def assign_resYears (row, rYears):
    if ((row['Education_Needed'] == "Doctoral or professional degree") and (row['Job_Training'] == 'Internship/residency')):
        return rYears
    else:
        return 0

#Function: Calculates expected inflows based on excluding education and residency years, returns expected salary based on assumption that median average salary in Bureau of Labor Statistics Employment Projection file is achieved at the midpoint of working years
def assign_inflow (row, tYear, careerYears, resSal):
    workYears = int(careerYears - row['Education_Years'] - row['Residency'])
    educationYears = int(row['Education_Years'])
    residencyYears = int(row['Residency'])
    midWorkYears = int(workYears/2)
    if tYear <= educationYears:
        return 0.0
    elif ((residencyYears > 0) and (tYear <= (educationYears + residencyYears))):
        return (resSal * ((1 + (avgCompChange/100))**(tYear)))
    else: 
        return ((row['Median_Wage'] * ((1 + (avgCompChange/100))**(careerYears-workYears+midWorkYears+1))) / ((1 + (avgCompChange/100))**(midWorkYears+1-(tYear-(careerYears-workYears)))))

#Function: Calculates expected outflows based on excluding education years, returns expected payment given average repayment years and interest rates    
def assign_outflow (row, tYear, careerYears, undTuition, tuitionChange, undRepay, grdRepay, undRate, grdRate):
    educationYears = int(row['Education_Years'])
    residencyYears = int(row['Residency'])
    
    #No outflow if year is less than total education years or education years are 0
    if tYear <= educationYears or educationYears == 0:
        return 0.0
    #If position requires less than masters degree (based on index in Tuition table), use 'Annual Cost' column to calculate total tuition costs with appropriate escalation based on total number of education years
    elif row['Education_FK'] in [2, 3, 4, 5]:
        if tYear > (educationYears + undRepay):
            return 0.0
        else:
            tFactor = 0
            tempCount = 0
            while tempCount < educationYears:
                tFactor += ((1 + tuitionChange)**(tempCount+1))
                tempCount += 1
            totalTuition = tFactor * row['Annual_Cost']
            pmtFactor = (1-((1+(undRate/365))**(-(365*undRepay)))) / (undRate/365)
            pmtAnnual = (totalTuition / pmtFactor) * 365
            return -pmtAnnual
    #If position requires masters degree or higher, first calculate undergraduate cost, then graduate/professional degree cost with appropriate cost escalation based on tuition change
    else:
        if tYear > (educationYears + grdRepay):
            return 0.0
        else:
            tFactor = 0
            tempCount = 0
            while tempCount < 4:
                tFactor += ((1 + tuitionChange)**(tempCount+1))
                tempCount += 1
            totalUndTuition = tFactor * undTuition
            pmtUndFactor = (1-((1+(undRate/365))**(-(365*grdRepay)))) / (undRate/365)
            pmtUndAnnual = (totalUndTuition / pmtUndFactor) * 365        
            tFactor = 0
            while tempCount < educationYears:
                tFactor += ((1 + tuitionChange)**(tempCount+1))
                tempCount += 1
            totalGrdTuition = tFactor * row['Annual_Cost']
            pmtGrdFactor = (1-((1+(grdRate/365))**(-(365*grdRepay)))) / (grdRate/365)
            pmtGrdAnnual = (totalGrdTuition / pmtGrdFactor) * 365      
            pmtAnnual = pmtUndAnnual + pmtGrdAnnual
            return -pmtAnnual

#Function: Net present value calculation by year based on summing cash inflow and outflow for the same future year and discounting based on discount rate (30 year Treasury rate)        
def netPresentValue (row, tYear, discountRate):
    return ((row['CashInflow_' + str(2016 + tYear)] + row['CashOutflow_' + str(2016 + tYear)]) / ((1 + discountRate)**(tYear-1)))

#Function: Totals all net present values for all career years
def sum_NetPresentValue (row, careerYears):
    tempYear = 1
    total = 0
    while tempYear <= (careerYears):
        total += row['NetPresentValue_' + str(2016 + tempYear)]
        tempYear+=1
    return total
    
#Time Analysis - Start
#*Undergraduate tuition as of 2016
undTuition = undCost.at[undCost[undCost.Category.str.contains("4-year")].index[0], 'All_Institutions_2016']
#*Average number of medical or advanced dentistry residency years
resYears = stdVar.at[stdVar[stdVar.Variable.str.contains("Residency")].index[0], 'Value'].astype(int)
#*Average residency salary as of 2016
resSal = stdVar.at[stdVar[stdVar.Variable.str.contains("Average Residency Salary")].index[0], 'Value']
#*Average annual tuition change/escalation
tuitionChange = stdVar.at[stdVar[stdVar.Variable.str.contains("Tuition Change")].index[0], 'Value']
#*Average undergraduate loan repayment years
undRepay = stdVar.at[stdVar[stdVar.Variable.str.contains("Undergraduate Repayment")].index[0], 'Value']
#*Average graduate loan repayment years
grdRepay = stdVar.at[stdVar[stdVar.Variable.str.contains("Graduate Repayment")].index[0], 'Value']
#*Average undergraduate loan interest rate
undRate = stdVar.at[stdVar[stdVar.Variable.str.contains("Undergraduate Loan Rate")].index[0], 'Value']
#*Average graduate loan interest rate
grdRate = stdVar.at[stdVar[stdVar.Variable.str.contains("Graduate Loan Rate")].index[0], 'Value']
#*Assumed time horizon for analysis - 45 years
careerYears = stdVar.at[stdVar[stdVar.Variable.str.contains("Career Years")].index[0], 'Value']
#*Assumed discount rate - 30 year Treasury rate
discountRate = stdVar.at[stdVar[stdVar.Variable.str.contains("Discount Rate")].index[0], 'Value']
#*Desired decimal places
desred_decimals = 2

#Create new subset of employment projection data, keeping needed columns
timeAnalysis = empProj[['Employment_Title','Employment_2016', 'Employment_2026', 'Openings_2016_2026', 'Median_Wage', 'Education_Needed', 'Work_Experience', 'Job_Training']].copy()
#*Format median salary to desired decimal places
timeAnalysis['Median_Wage'] = timeAnalysis['Median_Wage'].apply(lambda x: round(x,desred_decimals))
#*Assign education index ID based on function above
timeAnalysis = timeAnalysis.assign(Education_FK=timeAnalysis.apply (lambda row: assign_edu_fk (row),axis=1))
#*Assign residency years based on function above
timeAnalysis = timeAnalysis.assign(Residency=timeAnalysis.apply (lambda row: assign_resYears (row, resYears),axis=1))
#*Join employment projection data with tuition table to pull in annual degree cost and total education years
timeAnalysis = timeAnalysis.merge(tuiTable[['Education_Years','Annual_Cost']], left_on=['Education_FK'], right_index=True)

#*Iterate through each year in time analysis to calculate cash inflow and outflow, as well as net present value based on functions above
tYear = 1
while tYear <= (careerYears):
    timeAnalysis['CashInflow_' + str(2016 + tYear)] = timeAnalysis.apply (lambda row: assign_inflow (row,tYear,careerYears,resSal),axis=1)
    timeAnalysis['CashInflow_' + str(2016 + tYear)] = timeAnalysis['CashInflow_' + str(2016 + tYear)].apply(lambda x: round(x,desred_decimals))
    
    timeAnalysis['CashOutflow_' + str(2016 + tYear)] = timeAnalysis.apply (lambda row: assign_outflow (row,tYear,careerYears,undTuition,tuitionChange,undRepay,grdRepay,undRate,grdRate),axis=1)
    timeAnalysis['CashOutflow_' + str(2016 + tYear)] = timeAnalysis['CashOutflow_' + str(2016 + tYear)].apply(lambda x: round(x,desred_decimals))
    
    timeAnalysis['NetPresentValue_' + str(2016 + tYear)] = timeAnalysis.apply (lambda row: netPresentValue (row,tYear,discountRate),axis=1)
    timeAnalysis['NetPresentValue_' + str(2016 + tYear)] = timeAnalysis['NetPresentValue_' + str(2016 + tYear)].apply(lambda x: round(x,desred_decimals))
    tYear+=1
    
#*Total net present value for all years    
timeAnalysis = timeAnalysis.assign(Total_NetPresentValue=timeAnalysis.apply (lambda row: sum_NetPresentValue (row,careerYears),axis=1))
#*Format total net present value to desired decimal places
timeAnalysis['Total_NetPresentValue'] = timeAnalysis['Total_NetPresentValue'].apply(lambda x: round(x,desred_decimals))

#*Remove unneeded columns for data visualization
timeAnalysis = timeAnalysis.drop('Work_Experience', 1)
timeAnalysis = timeAnalysis.drop('Job_Training', 1)
timeAnalysis = timeAnalysis.drop('Annual_Cost', 1)
#Time Analysis - End


#SQLITE Database setup - Start (Create tables for time series analysis, tuition table, and calculation variables)
conn = sqlite3.connect('employment_analysis.db')
cur = conn.cursor() 

timeAnalysis.to_sql('timeAnalysis', conn, if_exists='replace')
tuiTable.to_sql('tuitionCost', conn, if_exists='replace')
stdVar.to_sql('calculationVariables', conn, if_exists='replace')

cur.close()
conn.close()
#SQLITE Database setup - End


'0.22.0'

In [1]:
from bokeh.io import show, output_file
from bokeh.layouts import row, widgetbox
from bokeh.models import CustomJS, ColumnDataSource
from bokeh.models.widgets import RangeSlider
from bokeh.plotting import figure
from bokeh.models import HoverTool, NumeralTickFormatter

conn = sqlite3.connect('employment_analysis.db')
cur = conn.cursor()

#List of standard tools to include
TOOLS = "pan,wheel_zoom,box_zoom,reset,hover,save"

#Create pandas dataframe to select needed columns for SQL
barChartDataAll = pd.read_sql(""" SELECT Employment_Title, Total_NetPresentValue, Education_Needed, Education_Years, Residency FROM timeAnalysis ORDER BY Total_NetPresentValue DESC""", con=conn)

#To achieve bar chart format, utilize index column to create x-values to update easily from slider
#*Reset index
barChartDataAll = barChartDataAll.reset_index(drop=True)
#*Create new column from reset index and multiply by 0.5 to graph each bar at center of "tick" mark
barChartDataAll['graph_Ind'] = (barChartDataAll.index * 1.0) + 0.5
#*Count number of rows
data_lengthAll = barChartDataAll.shape[0]
#*Determine the maximum net present valye for the y axis
y_maxAll = barChartDataAll['Total_NetPresentValue'].values.max() + 1000000

#Column Data source set to SQL pull for pandas dataframe
sourceAll = ColumnDataSource(barChartDataAll)

#Set up figure
pAll = figure(plot_height=500, plot_width=900, toolbar_location="right", x_axis_label='Position Type (Hover Over)', y_axis_label='Net Present Value (in 2017 Dollars)', title="Net Present Value Over Entire Career (Ranked By Position Type)", tools=TOOLS)
#*Set x and y ranges based on row count (x range) and net present value max (y range)
pAll.y_range.start=0
pAll.y_range.end=y_maxAll
pAll.yaxis.formatter = NumeralTickFormatter(format="0,0")
pAll.x_range.start=0
pAll.x_range.end=data_lengthAll
pAll.title.align = "center"
#*Set plot type as bar chart, assign x axis to be modified index and y axis to be net present value
pAll.vbar(x='graph_Ind', top='Total_NetPresentValue', width=0.8, source=sourceAll)

#Set up hover
hoverAll = pAll.select_one(HoverTool)
hoverAll.tooltips = [('Position', '@Employment_Title'),
                      ('Net Present Value', '@Total_NetPresentValue{0,0}'),
                      ('Degree', '@Education_Needed'),
                      ('Education Years', '@Education_Years'),
                      ('Residency', '@Residency')
                     ]

#Chart formatting to remove unneed grid lines and tick marks
pAll.xgrid.grid_line_color = None
pAll.xaxis.major_tick_line_color = None
pAll.xaxis.minor_tick_line_color = None
pAll.xaxis.major_label_text_font_size = '0pt'
pAll.x_range.range_padding = 0.1

#Range slider callback function to limit x-axis as selected
callbackRangeAll = CustomJS(args=dict(xr=pAll.x_range), code="""
    var a = cb_obj.value;
    xr.start = a[0];
    xr.end = a[1];
""")

#Set up range slider starting at 0 and ending at the count of all rows in the data set
range_sldr_All = RangeSlider(start=0, end=data_lengthAll, value=(0,data_lengthAll), step=10, title="Positions to display (By Descending Value)")
range_sldr_All.js_on_change('value', callbackRangeAll)

#Set up layout format with plot and range slider
layout = row(pAll,widgetbox(range_sldr_All))

cur.close()
conn.close()

NameError: name 'sqlite3' is not defined

In [7]:
from bokeh.embed import components

#Set up components for embedding chart in html document
script, div = components(layout)

#HTML document text including necessary stylesheets, scripts, and comments to display
html = """<!DOCTYPE html>
<html lang="en">
    <head>
        <meta charset="utf-8">
        <title>Bokeh Net Present Value Analysis</title>

        <link rel="stylesheet" href="https://cdn.pydata.org/bokeh/release/bokeh-0.12.14.min.css" type="text/css" />
        <link rel="stylesheet" href="https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.14.min.css" type="text/css" />
        <script type="text/javascript" src="https://cdn.pydata.org/bokeh/release/bokeh-0.12.14.min.js"></script>
        <script type="text/javascript" src="https://cdn.pydata.org/bokeh/release/bokeh-widgets-0.12.14.min.js"></script>
        <!-- COPY/PASTE SCRIPT HERE -->
        {}

    </head>
    <body style="padding:20px;">
        <!-- INSERT DIVS HERE -->
        {}
        <div style="padding:20px; margin:20px; border-style:solid; border-width:thin;">
        <p><b>Data Assumptions, Sources, and Calculations:</b></p>
           <ul>
               <li type="square">Position information, education required, and median 2016 salaries from <a href="https://data.bls.gov/projections/occupationProj">Bureau of Labor Statistics - Employment Projections</a></li>
               <li type="square">Undergraduate tuition cost from <a href="https://nces.ed.gov/programs/digest/d16/tables/dt16_330.40.asp?current=yes">National Center for Education Statistics - Average Cost of Attenance</a></li>
               <li type="square">Graduate tuition cost from <a href="https://nces.ed.gov/programs/digest/d16/tables/dt16_330.50.asp?current=yes">National Center for Education Statistics - Average Cost of Attenance</a></li>
               <li type="square">Doctoral and Professional Degree tuition cost from <a href="https://nces.ed.gov/programs/digest/d10/tables/dt10_348.asp">National Center for Education Statistics - Average Cost of Attenance</a></li>
               <li type="square">To adjust for data availabilty by year and salary increases, the 10 year average annual compensation change was from <a href="https://data.bls.gov/timeseries/CIS1010000000000Q">Bureau of Labor Statistics - Employment Cost Index</a></li>
               <li type="square">To adjust for data availabilty by year and tuition changes, the average tuition change was from <a href="https://nces.ed.gov/programs/digest/d16/tables/dt16_330.40.asp?current=yes">National Center for Education Statistics - 5 Year Constant Dollar Change</a></li>
               <li type="square">Undergraduate and Graduate average loan repayments in years were from <a href="https://studentaid.ed.gov/sa/repay-loans/understand/plans">US Department of Education - Federal Student Aid Office</a></li>
               <li type="square">Undergraduate and Graduate average annual interest rates were from <a href="https://studentaid.ed.gov/sa/types/loans/interest-rates">US Department of Education - Federal Student Aid Office</a></li>
               <li type="square">Discount rate used for net present value calculations was the 30 Year Treasury Rate from <a href="https://www.treasury.gov/resource-center/data-chart-center/interest-rates/Pages/TextView.aspx?data=yield">US Treasury - Daily Yield Curve Rates as of 3/27/18</a></li>
               <li type="square">Average medical and advanced dentistry residency years and salary were from <a href="https://www.medscape.com/features/slideshow/public/residents-salary-and-debt-report-2016#page=2">MEDSCAPE - Resident Salary Report</a></li>
               <li type="square">Assumption: Only education costs and expected salary were considered. Living expenses, books, supplies, additional fees, etc. were not considered in this analysis</li>    
               <li type="square">Assumption: Time series analysis was set at 45 years total, divided between applicable position education years and working years</li>
               <li type="square">Assumption: Education attendance was assumed to be full-time and all education costs were assumed to be borrowed and paid back based on average undergraduate and graduate repayment years</li>
               <li type="square">Assumption: Median career salary was expected at the midpoint of working years (median salary was escalated appropriately based on average compensataion change)</li>
               <li type="square">Calculation: Present Value of Annuity formula was used to determine total annual student loan payment after all education years (with tuition escalation)</li>
               <li type="square">Calculation: Net Present Value formula was used to net any annual student loan payments with projected annual salary (with salary escalation)</li>
           </ul>
        </div>       
        
    </body>
</html>""".format(script, div)

#Open and write the HTML file
with open("Employment_NPV_Analysis.html", "w") as f:
    f.write(html)