In [1]:
# Dependencies
import pandas as pd

In [2]:
# load CSV
csv_path = "Resources/2016-FCC-New-Coders-Survey-Data.csv"

In [3]:
# Read with pandas--low_memory required to suppress errors about mixed data types
dirty_df = pd.read_csv(csv_path, low_memory = False)
dirty_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampMonthsAgo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,...,ResourceSoloLearn,ResourceStackOverflow,ResourceTreehouse,ResourceUdacity,ResourceUdemy,ResourceW3Schools,ResourceYouTube,SchoolDegree,SchoolMajor,StudentDebtOwe
0,28.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,20000.0
1,22.0,0.0,,,,,,,,,...,,,,,1.0,,,"some college credit, no degree",,
2,19.0,0.0,,,,,,,,,...,,,,,,,,high school diploma or equivalent (GED),,
3,26.0,0.0,,,,,,,,,...,,,,,,,,bachelor's degree,Cinematography And Film,7000.0
4,20.0,0.0,,,,,,,,,...,,,,,,,,"some college credit, no degree",,


In [4]:
# Take only columns 0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
narrow_df = dirty_df.iloc[:, [0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111]]
narrow_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [5]:
# Change reading 0 and 1 to No and Yes, respectively
boolean_df = narrow_df.replace({0 : "No", 1 : "Yes"})
boolean_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,office and administrative support,Employed for wages,male,No,30,32000.0,,"some college credit, no degree",
1,22.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,food and beverage,Employed for wages,male,,30,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,No,,,,,,,,more than 1 million,...,United States of America,finance,Employed for wages,male,,20,48000.0,,high school diploma or equivalent (GED),
3,26.0,No,,,,,,,,more than 1 million,...,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,No,,,,,,,,"between 100,000 and 1 million",...,United States of America,education,Employed for wages,female,,25,6000.0,Full-Stack Web Developer,"some college credit, no degree",


In [6]:
# Extract rows for only those who attended a bootcamp
attended_df = boolean_df.loc[boolean_df["AttendedBootcamp"] == "Yes", :]
attended_df.head()

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampName,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,...,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
93,32.0,Yes,Yes,No,No,Codify Academy,,No,,"between 100,000 and 1 million",...,United States of America,"arts, entertainment, sports, or media",Self-employed business owner,male,,20,67000.0,,bachelor's degree,Biology
97,26.0,Yes,Yes,Yes,No,DaVinci Coders,45000.0,No,,more than 1 million,...,United States of America,software development,Employed for wages,male,No,10,40000.0,,master's degree (non-professional),Music
130,41.0,Yes,Yes,Yes,Yes,Coder Foundry,75000.0,Yes,3.0,"less than 100,000",...,United States of America,software development,Employed for wages,male,Yes,30,75000.0,,"some college credit, no degree",
159,26.0,Yes,Yes,No,No,General Assembly,,No,,"between 100,000 and 1 million",...,United States of America,,Not working and not looking for work,female,,30,,Full-Stack Web Developer,"some college credit, no degree",
188,24.0,Yes,No,,Yes,,,No,,"between 100,000 and 1 million",...,Canada,,Not working but looking for work,female,,60,,,"some college credit, no degree",


In [7]:
# Create DataFrame of the different boot camps that had a significant number of attendees
attendees_per_bootcamp = pd.DataFrame(attended_df["BootcampName"].value_counts())
#attendees_per_bootcamp.head()

# INDEX MUST be RESET in order to filter or merge this DataFrame
attendees_per_bootcamp.reset_index(inplace = True)

# rename the two columns:
   # one with the bootcamp name, and
   # one with the number of respondents who attended each bootcamp

attendees_per_bootcamp.columns = ["BootcampName", "Attendee Count"]
attendees_per_bootcamp.head()

Unnamed: 0,BootcampName,Attendee Count
0,General Assembly,90
1,Flatiron School,54
2,Dev Bootcamp,48
3,The Iron Yard,40
4,Prime Digital Academy,30


In [8]:
# Count how many attendees of each bootcamp would recommend the bootcamp
recommended_df = attended_df.loc[attended_df["BootcampRecommend"] == "Yes", :]
#recommended_df.head()

recommend_bootcamp = pd.DataFrame(recommended_df["BootcampName"].value_counts())
#recommend_bootcamp.head()

# again: INDEX MUST be RESET in order to filter or merge this DataFrame
recommend_bootcamp = recommend_bootcamp.reset_index()

recommend_bootcamp.columns = ['BootcampName', 'Number of Recommenders']
recommend_bootcamp.head()

Unnamed: 0,BootcampName,Number of Recommenders
0,General Assembly,70
1,Flatiron School,50
2,Dev Bootcamp,41
3,The Iron Yard,31
4,Hack Reactor,27


In [9]:
# MERGE the two created data frames on the name of the bootcamp

merged_df = pd.merge(attendees_per_bootcamp, recommend_bootcamp, on="BootcampName")
merged_df.head()

Unnamed: 0,BootcampName,Attendee Count,Number of Recommenders
0,General Assembly,90,70
1,Flatiron School,54,50
2,Dev Bootcamp,48,41
3,The Iron Yard,40,31
4,Prime Digital Academy,30,25


In [10]:
# Calculate percentage of each bootcamp's students who are recommenders

merged_df["% Recommended"] = merged_df["Number of Recommenders"]/ merged_df["Attendee Count"] *100
merged_df = merged_df.sort_values(by="% Recommended", ascending=False).round(2)

merged_df["% Recommended"] = merged_df["% Recommended"].map("{:,.2f}%".format)
merged_df

Unnamed: 0,BootcampName,Attendee Count,Number of Recommenders,% Recommended
115,Code Institute,1,1,100.00%
101,Science to Data Science,1,1,100.00%
100,Austin Coding Academy,1,1,100.00%
99,codeU,1,1,100.00%
98,CodeCraft School,1,1,100.00%
97,Founders & Coders,1,1,100.00%
96,Stackademy,1,1,100.00%
95,Coder's Lab,1,1,100.00%
44,Academia de Código,4,4,100.00%
94,Betamore,1,1,100.00%


In [11]:
# Export to excel and remove index
merged_df.to_excel("output/Bootcamppart2.xlsx", index=False)