In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup as BS
import numpy as np

### Common Abbreviations Used in this Notebook:

- CIRR: Council on Integrity in Results Reporting
- THEC: Tennessee Higher Education Commission

# Bringing in NSS data
Taken from the Tennessee Higher Education Comittee, NSS's data report was in the form of a three-page pdf. I split this pdf into three one-page pdfs, converted each to an excel sheet, then saved as a csv file. Each page had the same row structure wth distinct columns (each column corresponding to a different program offered by NSS), so I was able to merge all three dataframes on the row identifier column, creating a single dataframe holding all NSS-related data

For the extent of my capstone, I will only be using the "nss_3" dataframe, as I will only be utilizing the final column. However, I am leaving in the larger, merged data set for the purpose of further EDA on other NSS programs.

In [None]:
nss_1 = pd.read_csv('../data/upncoming/NSS_1.csv')
nss_2 = pd.read_csv('../data/upncoming/NSS_2.csv')
nss_3 = pd.read_csv('../data/upncoming/NSS_3.csv')

In [None]:
nss_data = pd.merge(pd.merge(nss_1, nss_2, on='Unnamed: 0'), nss_3, on='Unnamed: 0')
nss_data

# Up-And-Coming Tech Cities

I selected several programs that appeared to be comparable to NSS in the sense that they are committed to a similar goal of bolstering the growing tech industries in their cities, which were/are not tech hubs. Though there was an abundance of these programs, I limited my selections to programs that were CIRR-certified. The reason for doing this was that all CIRR-certified data sets are formatted the same way, although this format is not shared with NSS's data, due to NSS not being CIRR-certified.

This means that the bulk of the labor required to perform any meaningful analysis will be bridging the CIRR-certified data formatting with THEC's data formatting

## Tech Elevator (Detroit)

In [None]:
tech_elevator= pd.read_csv('../data/upncoming/tech_elevator_detroit.csv')

## Hack Reactor (Austin)

In [None]:
hack_reactor = pd.read_csv('../data/upncoming/hack_reactor_austin.csv')

## Project Shift (Durham)

In [None]:
project_shift = pd.read_csv('../data/upncoming/project_shift_durham.csv')

## Hack Upstate (Syracuse)

In [None]:
hack_upstate = pd.read_csv('../data/upncoming/hack_upstate_syracuse.csv')

## Codeup (San Antonio)

In [None]:
codeup_SA = pd.read_csv('../data/upncoming/codeup_san_antonio.csv')

# Established Tech Hubs

Additionally, I wanted to compare these full stack bootcamps in up-and-coming tech hub cities with full stack bootcamps in cities that are already massive tech hubs. I expect the certain items from these bootcamps, such as starting salaries and time between graduation and job placement, to vary from the former category, as job opportunities are likely more abundant.

## Code Platoon (Chicago)

In [None]:
code_platoon = pd.read_csv('../data/techcenters/code_platoon_chicago.csv')

## Codesmith (Los Angeles)

In [None]:
codesmith = pd.read_csv('../data/techcenters/codesmith_la.csv')

## Epicodus (Seattle)

In [None]:
epicodus = pd.read_csv('../data/techcenters/epicodus_seattle.csv')

## Grace Hopper Full Stack Academy (NYC) 

In [None]:
fsa_gh = pd.read_csv('../data/techcenters/fullstack_academy_grace_hopper_nyc.csv')

## Launch Academy (Boston)

In [None]:
launch_academy = pd.read_csv('../data/techcenters/launch_academy_boston.csv')

# EXPLORATORY DATA ANALYSIS

### Notes for Accessing Data:
df['Unnamed: x'].loc[n] To access point data in columns
- x = [1,2], x ∈ ℤ
- n = [0,40], n ∈ ℤ

### Up and Coming Dataframe Names:
- tech_elevator
- hack_reactor
- project_shift
- hack_upstate
- codeup_SA

### NSS Dataframe Name:
- nss_data

### Tech Hub Dataframe Names:
- code_platoon
- codesmith
- epicodus
- fsa_gh
- launch_academy

## Graduation Rates

In [None]:
grad_rates_upncoming = pd.DataFrame({'School Name':[tech_elevator['Unnamed: 1'].loc[0], hack_reactor['Unnamed: 1'].loc[0], 
                                                   project_shift['Unnamed: 1'].loc[0], hack_upstate['Unnamed: 1'].loc[0], 
                                                   codeup_SA['Unnamed: 1'].loc[0]],
                                     'Graduation Rates':[tech_elevator['Unnamed: 1'].loc[10], hack_reactor['Unnamed: 1'].loc[10], 
                                                        project_shift['Unnamed: 1'].loc[10], hack_upstate['Unnamed: 1'].loc[10], 
                                                         codeup_SA['Unnamed: 1'].loc[10]]})

grad_rates_techcenters = pd.DataFrame({'School Name':[code_platoon['Unnamed: 1'].loc[0], codesmith['Unnamed: 1'].loc[0],
                                                     epicodus['Unnamed: 1'].loc[0], fsa_gh['Unnamed: 1'].loc[0],
                                                     launch_academy['Unnamed: 1'].loc[0]],
                                      'Graduation Rates':[code_platoon['Unnamed: 1'].loc[10], codesmith['Unnamed: 1'].loc[10],
                                                         epicodus['Unnamed: 1'].loc[10], fsa_gh['Unnamed: 1'].loc[10],
                                                         launch_academy['Unnamed: 1'].loc[10]]})

grad_rates_nss = pd.DataFrame({'School Name': ['Nashville Software School'], 'Graduation Rates': [nss_3['Web Developer Bootcamp, Certificate'].loc[10]]})

In [None]:
grad_rates_upncoming['Graduation Rates'] = grad_rates_upncoming['Graduation Rates'].str.replace("%","").astype(float)
grad_rates_nss['Graduation Rates'] = grad_rates_nss['Graduation Rates'].str.replace('%','').astype(float)
grad_rates_techcenters['Graduation Rates'] = grad_rates_techcenters['Graduation Rates'].str.replace('%','').astype(float)

In [None]:
#group_averages = pd.DataFrame({'Group' : ['Up and Coming Group Average', 'Tech Hub Group Average'],
#                              'Average' : [grad_rates_upncoming['Graduation Rates'].mean(),
#                                           grad_rates_techcenters['Graduation Rates'].mean()]})

In [None]:
plt.barh(grad_rates_upncoming['School Name'], grad_rates_upncoming['Graduation Rates'], color='#d683a6', 
        label='Up and Coming Cities')
plt.barh(grad_rates_nss['School Name'], grad_rates_nss['Graduation Rates'], color='#747e79', label='NSS')
plt.barh(grad_rates_techcenters['School Name'], grad_rates_techcenters['Graduation Rates'], color='#83d6b3', 
        label='Established Tech Hub Cities')
plt.title('Graduation Rates Among Programs (Full Stack)')
plt.xlabel('Graduation Rate (%)')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig('../data/figures/grad_rates.png', bbox_inches="tight")

## Employment (in-field) Rates

In [None]:
#index 16 (NSS)
#index 15 (All Else)

employment_upncoming = pd.DataFrame({'School Name':[tech_elevator['Unnamed: 1'].loc[0], hack_reactor['Unnamed: 1'].loc[0], 
                                                   project_shift['Unnamed: 1'].loc[0], hack_upstate['Unnamed: 1'].loc[0], 
                                                   codeup_SA['Unnamed: 1'].loc[0]],
                                     'Employment Rates':[tech_elevator['Unnamed: 1'].loc[15], hack_reactor['Unnamed: 1'].loc[15], 
                                                        project_shift['Unnamed: 1'].loc[15], hack_upstate['Unnamed: 1'].loc[15], 
                                                         codeup_SA['Unnamed: 1'].loc[15]]})

employment_techcenters = pd.DataFrame({'School Name':[code_platoon['Unnamed: 1'].loc[0], codesmith['Unnamed: 1'].loc[0],
                                                     epicodus['Unnamed: 1'].loc[0], fsa_gh['Unnamed: 1'].loc[0],
                                                     launch_academy['Unnamed: 1'].loc[0]],
                                      'Employment Rates':[code_platoon['Unnamed: 1'].loc[15], codesmith['Unnamed: 1'].loc[15],
                                                         epicodus['Unnamed: 1'].loc[15], fsa_gh['Unnamed: 1'].loc[15],
                                                         launch_academy['Unnamed: 1'].loc[15]]})

employment_nss = pd.DataFrame({'School Name': ['Nashville Software School'], 'Employment Rates': [nss_3['Web Developer Bootcamp, Certificate'].loc[16]]})

In [None]:
employment_upncoming

In [None]:
employment_upncoming['Employment Rates'] = employment_upncoming['Employment Rates'].str.replace("%","").astype(float)
employment_nss['Employment Rates'] = employment_nss['Employment Rates'].str.replace('%','').astype(float)
employment_techcenters['Employment Rates'] = employment_techcenters['Employment Rates'].str.replace('%','').astype(float)

In [None]:
plt.barh(employment_upncoming['School Name'], employment_upncoming['Employment Rates'], color='#d683a6', 
        label='Up and Coming Cities')
plt.barh(employment_nss['School Name'], employment_nss['Employment Rates'], color='#747e79', label='NSS')
plt.barh(employment_techcenters['School Name'], employment_techcenters['Employment Rates'], color='#83d6b3', 
        label='Established Tech Hub Cities')
plt.title('In-Field Employment Rates Among Programs (Full Stack)')
plt.xlabel('Employment Rate (%)')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig('../data/figures/employ_rates.png', bbox_inches="tight")

### Web Scraping (Monthly Cost of Living by State)

In [None]:
URL = 'https://livingcost.org/cost/united-states'

response = requests.get(URL)

In [None]:
response.status_code

In [None]:
soup = BS(response.text)

In [None]:
COL_list = pd.read_html(str(soup.find('table', class_='table table-sm table-striped text-center')))
#pd.DataFrame(monthly_COL)
COL = COL_list[0]
COL.drop(['Rank'], axis=1, inplace=True)
COL['Cost of living'] = COL['Cost of living'].str.replace('$','').astype(int)
COL['Cost of living'] = COL['Cost of living']*12
COL = COL.set_index('State')

## EXTRA STUFF??

## Salary?

### Notes for Accessing Data:
df['Unnamed: x'].loc[n] To access point data in columns

### Up and Coming Dataframe Names:
- tech_elevator (DETROIT)
- hack_reactor (AUSTIN)
- project_shift (DURHAM)
- hack_upstate (SYRACUSE)
- codeup_SA (SAN ANTONIO)

### NSS Dataframe Name:
- nss_data

### Tech Hub Dataframe Names:
- code_platoon (CHICAGO)
- codesmith (LOS ANGELES)
- epicodus (SEATLLE)
- fsa_gh (NYC)
- launch_academy (BOSTON)

In [None]:
income_upncoming = pd.DataFrame({'School Name':[tech_elevator['Unnamed: 1'].loc[0], hack_reactor['Unnamed: 1'].loc[0], 
                                                   project_shift['Unnamed: 1'].loc[0], hack_upstate['Unnamed: 1'].loc[0], 
                                                   codeup_SA['Unnamed: 1'].loc[0]],
                                 'State':['Michigan', 'Texas State', 'North Carolina', 'New York State', 'Texas State'],
                                     'Median Income':[tech_elevator['Unnamed: 1'].loc[27], hack_reactor['Unnamed: 1'].loc[27], 
                                                        project_shift['Unnamed: 1'].loc[27], hack_upstate['Unnamed: 1'].loc[27], 
                                                         codeup_SA['Unnamed: 1'].loc[27]]})

income_techcenters = pd.DataFrame({'School Name':[code_platoon['Unnamed: 1'].loc[0], codesmith['Unnamed: 1'].loc[0],
                                                     epicodus['Unnamed: 1'].loc[0], fsa_gh['Unnamed: 1'].loc[0],
                                                     launch_academy['Unnamed: 1'].loc[0]],
                                   'State':['Illinois', 'California', 'Washington', 'New York State', 'Massachusetts'],
                                      'Median Income':[code_platoon['Unnamed: 1'].loc[27], codesmith['Unnamed: 1'].loc[27],
                                                         epicodus['Unnamed: 1'].loc[27], fsa_gh['Unnamed: 1'].loc[27],
                                                         launch_academy['Unnamed: 1'].loc[27]]})

income_nss = pd.DataFrame({'School Name' : ['Nashville Software School'], 'State' : ['Tennessee'],
                          'Median Income' : [60000], 'Normalized' : [60000/COL['Cost of living'].loc['Tennessee']]})
#employment_nss = pd.DataFrame({'School Name': ['Nashville Software School'], 'Employment Rates': [nss_3['Web Developer Bootcamp, Certificate'].loc[16]]})

In [None]:
income_nss

In [None]:
income_upncoming['Median Income'] = income_upncoming['Median Income'].str.replace("$","")
income_upncoming['Median Income'] = income_upncoming['Median Income'].str.replace(",","").astype(float)
income_techcenters['Median Income'] = income_techcenters['Median Income'].str.replace('$','') 
income_techcenters['Median Income'] = income_techcenters['Median Income'].str.replace(',','').astype(float)

In [None]:
income_upncoming

In [None]:
normal_income_upncoming = []
for i in np.arange(len(income_upncoming)):
    normal_income_upncoming.append(income_upncoming['Median Income'][i]/COL['Cost of living'].loc[income_upncoming['State'][i]])
    
normal_income_techcenters = []
for i in np.arange(len(income_techcenters)):
    normal_income_techcenters.append(income_techcenters['Median Income'][i]/COL['Cost of living'].loc[income_techcenters['State'][i]])

In [None]:
income_upncoming['Normalized'] = normal_income_upncoming
income_techcenters['Normalized'] = normal_income_techcenters
income_techcenters

In [None]:
income_upncoming

In [None]:
plt.barh(income_upncoming['School Name'], income_upncoming['Median Income'], color='#d683a6', 
        label='Up and Coming Cities')
plt.barh(income_nss['School Name'], income_nss['Median Income'], color='#747e79', label='NSS')
plt.barh(income_techcenters['School Name'], income_techcenters['Median Income'] , color='#83d6b3', 
        label='Established Tech Hub Cities')
plt.title('Median Salary Among Programs (Full Stack)')
plt.xlabel('Median Salary ($)')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig('../data/figures/med_sal_rates.png', bbox_inches="tight")

In [None]:
plt.barh(income_upncoming['School Name'], income_upncoming['Normalized'], color='#d683a6', 
        label='Up and Coming Cities')
plt.barh(income_nss['School Name'], income_nss['Normalized'], color='#747e79', label='NSS')
plt.barh(income_techcenters['School Name'], income_techcenters['Normalized'] , color='#83d6b3', 
        label='Established Tech Hub Cities')
#plt.xticks(rotation=90)
plt.title('Median Salary Among Programs (Full Stack), Normalized by State Cost of Living')
plt.xlabel('Median Salary / State Cost of Living')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig('../data/figures/med_sal_rates_normed.png', bbox_inches="tight")

## Tuition and Duration

- Setting up new dataframe containing all this information, since I had to manually collect it from each program's webpage, and not from a previously constructed dataset.

In [None]:
# Removing " State" from New York and Texas for easy merging

state_indices = COL.index.to_list()
for i in np.arange(len(state_indices)):
    state_indices[i] = state_indices[i].replace(' State', '')

COL['States'] = state_indices
COL = COL.set_index('States')
COL.reset_index(inplace=True)

In [None]:
#Setting up basic geographical information for each program

programs_geo = pd.DataFrame({'Program' : ['Tech Elevator', 'Hack Reactor', 'Project Shift', 'Hack Upstate', 'Codeup', 
                                          'Nashville Software School', 'Code Platoon', 'Codesmith', 'Epicodus', 
                                          'Full Stack Academy', 'Launch Academy'],
                             'City' : ['Detroit', 'Austin', 'Durham', 'Syracuse', 'San Antonio', 'Nashville', 'Chicago',
                                       'Los Angeles', 'Seattle', 'New York City', 'Boston'],
                             'State' : ['Michigan', 'Texas', 'North Carolina', 'New York', 'Texas', 'Tennessee', 'Illinois',
                                        'California', 'Washington', 'New York', 'Massachusetts']})\


#Reading in collected data on tuition and duration

tuition_data = pd.read_csv('../data/tuition_duration.csv')

In [None]:
#Merging

programs_df = pd.merge(pd.merge(programs_geo, COL, how='left', left_on='State', right_on='States'), tuition_data, 
                        how='left', left_on='Program', right_on='Program')

programs_df.drop(['States'], inplace=True, axis=1)
programs_df

In [None]:
tudur_upncoming = programs_df.loc[0:4]
tudur_nss = pd.DataFrame(programs_df.loc[5]).T
tudur_techcenters = programs_df.loc[6:10]

In [None]:
plt.barh(tudur_upncoming['City'], tudur_upncoming['Cost of living'], color='black', 
        label='Up and Coming Cities')
plt.barh(tudur_nss['City'], tudur_nss['Cost of living'], color='black', label='NSS')
plt.barh(tudur_techcenters['City'], tudur_techcenters['Cost of living'] , color='black', 
        label='Established Tech Hub Cities')
plt.title('Cost of Living by State')
plt.xlabel('Cost of living ($/year)')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

#plt.savefig('../data/figures/cost_tuition.png', bbox_inches="tight")

In [None]:
plt.barh(tudur_upncoming['Program'], tudur_upncoming['Tuition'], color='#d683a6', 
        label='Up and Coming Cities')
plt.barh(tudur_nss['Program'], tudur_nss['Tuition'], color='#747e79', label='NSS')
plt.barh(tudur_techcenters['Program'], tudur_techcenters['Tuition'] , color='#83d6b3', 
        label='Established Tech Hub Cities')
plt.title('Cost of Tuition by Bootcamp')
plt.xlabel('Cost of Tuition ($)')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig('../data/figures/cost_tuition.png', bbox_inches="tight")

In [None]:
plt.barh(tudur_upncoming['Program'], tudur_upncoming['Duration'], color='#d683a6', 
        label='Up and Coming Cities')
plt.barh(tudur_nss['Program'], tudur_nss['Duration'], color='#747e79', label='NSS')
plt.barh(tudur_techcenters['Program'], tudur_techcenters['Duration'] , color='#83d6b3', 
        label='Established Tech Hub Cities')
plt.title('Duration of Bootcamp')
plt.xlabel('Duration (Weeks)')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig('../data/figures/duration_bootcamp.png', bbox_inches="tight")

In [None]:
plt.barh(tudur_upncoming['Program'], tudur_upncoming['Cost per Week'], color='#d683a6', 
        label='Up and Coming Cities')
plt.barh(tudur_nss['Program'], tudur_nss['Cost per Week'], color='#747e79', label='NSS')
plt.barh(tudur_techcenters['Program'], tudur_techcenters['Cost per Week'] , color='#83d6b3', 
        label='Established Tech Hub Cities')
plt.title('Cost per Week by Bootcamp')
plt.xlabel('Cost per Week ($/week)')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig('../data/figures/cost_per_week.png', bbox_inches="tight")

In [None]:
med_incomes = pd.concat([pd.concat([income_upncoming, income_nss]), income_techcenters]).drop(['State', 'Normalized'], 
                                                                                              axis=1)
med_incomes['Median Income'] = med_incomes['Median Income']/52
med_incomes.rename(columns={'Median Income' : 'Salary per Week'}, inplace=True)
med_incomes['School Name'] = med_incomes['School Name'].replace(['Hack Upstate\'s Careers in Code', 
                                                                 'Codeup, LLC', 'Fullstack Academy, Inc.'],
                                                                ['Hack Upstate', 'Codeup', 'Full Stack Academy'])

#= med_incomes['School Name'].map({'Hack Upstate\'s Careers in Code' : 'Hack Upstate',
#                                                            'Codeup, LLC' : 'Codeup', 
#                                                             'Fullstack Academy, Inc.' : 'Full Stack Academy'},
#                                                           na_action=None)
med_incomes

In [None]:
programs_df

In [None]:
cost_by_salary = pd.merge(programs_df, med_incomes, how='left', left_on='Program', right_on='School Name')
cost_by_salary.drop(['School Name'], axis=1, inplace=True)
cost_by_salary['Cost per Salary'] = cost_by_salary['Salary per Week']/cost_by_salary['Cost per Week']
cost_by_salary

In [None]:
cps_upncoming = cost_by_salary.loc[0:4]
cps_nss = pd.DataFrame(cost_by_salary.loc[5]).T
cps_techcenters = cost_by_salary.loc[6:10]

In [None]:
plt.barh(cps_upncoming['Program'], cps_upncoming['Cost per Salary'], color='#d683a6', 
        label='Up and Coming Cities')
plt.barh(cps_nss['Program'], cps_nss['Cost per Salary'], color='#747e79', label='NSS')
plt.barh(cps_techcenters['Program'], cps_techcenters['Cost per Salary'] , color='#83d6b3', 
        label='Established Tech Hub Cities')
plt.axvline(x = 1, color = 'black', linestyle = '--')
plt.title('Weekly Cost vs Weekly Payout by Bootcamp')
plt.xlabel('Salary by Week / Cost by Week')
#plt.legend(fontsize=10)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))

plt.savefig('../data/figures/cost_per_salary.png', bbox_inches="tight")