In [1]:
# Dependencies
import pandas as pd

In [2]:
# load CSV
file = "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [3]:
# Read with pandas--low_memory required to suppress errors about mixed data types
df = pd.read_csv(file, encoding = 'iso-8859-1', low_memory=False)
df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampMonthsAgo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,...,ResourceSoloLearn,ResourceStackOverflow,ResourceTreehouse,ResourceUdacity,ResourceUdemy,ResourceW3Schools,ResourceYouTube,SchoolDegree,SchoolMajor,StudentDebtOwe
0,28.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,20000.0
1,22.0,0.0,,,,,,,,,...,,,,,1.0,,,"some college credit, no degree",,
2,19.0,0.0,,,,,,,,,...,,,,,,,,high school diploma or equivalent (GED),,
3,26.0,0.0,,,,,,,,,...,,,,,,,,bachelor's degree,Cinematography And Film,7000.0
4,20.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,


In [4]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
reduced = df.iloc[:,[0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]

In [5]:
# Change reading 0 and 1 to No and Yes, respectively
reduced = reduced.replace({0.0: "No", 1.0:"Yes"})

In [6]:
# Extract rows for only those who attended a bootcamp
attended = reduced.loc[reduced["AttendedBootcamp"] == "Yes"]
attended.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
93,32.0,Yes,Yes,No,No,Codify Academy,,No,,"between 100,000 and 1 million",...,United States of America,"arts, entertainment, sports, or media",Self-employed business owner,male,,20,67000.0,,bachelor's degree,Biology
97,26.0,Yes,Yes,Yes,No,DaVinci Coders,45000.0,No,,more than 1 million,...,United States of America,software development,Employed for wages,male,No,10,40000.0,,master's degree (non-professional),Music
130,41.0,Yes,Yes,Yes,Yes,Coder Foundry,75000.0,Yes,3.0,"less than 100,000",...,United States of America,software development,Employed for wages,male,Yes,30,75000.0,,"some college credit, no degree",
159,26.0,Yes,Yes,No,No,General Assembly,,No,,"between 100,000 and 1 million",...,United States of America,,Not working and not looking for work,female,,30,,Full-Stack Web Developer,"some college credit, no degree",
188,24.0,Yes,No,,Yes,,,No,,"between 100,000 and 1 million",...,Canada,,Not working but looking for work,female,,60,,,"some college credit, no degree",


In [7]:
# Create DataFrame of the different boot camps that had a significant number of attendees
highest_attend = attended["BootcampName"].value_counts()
highest_attend_df = pd.DataFrame(highest_attend)
highest_attend_df = highest_attend_df.rename(
    columns={"BootcampName": "Attendees"})
highest_attend_df.head(10)

Unnamed: 0,Attendees
General Assembly,90
Flatiron School,54
Dev Bootcamp,48
The Iron Yard,40
Prime Digital Academy,30
Hack Reactor,29
Turing,27
Hackbright Academy,22
App Academy,22
Bloc.io,21


In [8]:
# Count how many attendees of each bootcamp would recommend the bootcamp
#recommended = highest_attend_df.loc[highest_attend_df["BootcampRecommend"] == "Yes"]
#recommended.count()

recommended = attended.loc[attended["BootcampRecommend"] == "Yes"]
highest_recs = recommended["BootcampName"].value_counts()
highest_recs_df = pd.DataFrame(highest_recs)
highest_recs_df = highest_recs_df.rename(
    columns={"BootcampName": "Recommended"})
highest_recs_df.head(10)

Unnamed: 0,Recommended
General Assembly,70
Flatiron School,50
Dev Bootcamp,41
The Iron Yard,31
Hack Reactor,27
Turing,26
Prime Digital Academy,25
App Academy,20
Hackbright Academy,19
MakerSquare,18


In [9]:
# Merge the two created data frames on the name of tbe bootcamp
recommendedloc = attended.loc[attended["BootcampRecommend"] == "Yes"]

#Group Grade dataframes according to school
recommended = recommendedloc.groupby("BootcampName")
attended = attended.groupby("BootcampName")
#Find average math score for each grouped grade
highest_recs = recommended["BootcampName"].value_counts()
highest_attend = attended["BootcampName"].value_counts()
#Create Dataframe and rename columns using dictionaries
merged_df = pd.DataFrame({"Recommended":highest_recs, "Attended":highest_attend})
merged_df = merged_df.sort_values(by='Recommended', ascending=False)
merged_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Recommended,Attended
BootcampName,BootcampName,Unnamed: 2_level_1,Unnamed: 3_level_1
General Assembly,General Assembly,70.0,90
Flatiron School,Flatiron School,50.0,54
Dev Bootcamp,Dev Bootcamp,41.0,48
The Iron Yard,The Iron Yard,31.0,40
Hack Reactor,Hack Reactor,27.0,29
Turing,Turing,26.0,27
Prime Digital Academy,Prime Digital Academy,25.0,30
App Academy,App Academy,20.0,22
Hackbright Academy,Hackbright Academy,19.0,22
MakerSquare,MakerSquare,18.0,20


In [10]:
# Calculate percentage of eac bootcamp's students who are recommenders
rec_percentage = (highest_recs / highest_attend)*100
remerged_df = pd.DataFrame({"Recommended":highest_recs, "Attended":highest_attend, "Recommendation %":rec_percentage})
remerged_df = remerged_df.sort_values(by='Recommended', ascending=False)
remerged_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,Recommended,Attended,Recommendation %
BootcampName,BootcampName,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
General Assembly,General Assembly,70.0,90,77.777778
Flatiron School,Flatiron School,50.0,54,92.592593
Dev Bootcamp,Dev Bootcamp,41.0,48,85.416667
The Iron Yard,The Iron Yard,31.0,40,77.5
Hack Reactor,Hack Reactor,27.0,29,93.103448
Turing,Turing,26.0,27,96.296296
Prime Digital Academy,Prime Digital Academy,25.0,30,83.333333
App Academy,App Academy,20.0,22,90.909091
Hackbright Academy,Hackbright Academy,19.0,22,86.363636
MakerSquare,MakerSquare,18.0,20,90.0


In [11]:
# Export to excel and remove index
