In [1]:
# Dependencies
import pandas as pd
import numpy as np
import statistics

In [2]:
# load CSV and Extract only columns 0, 1, 2, 3, 4, 7, 8, 9, 10,11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111
bc_df = pd.read_csv('raw_data/2016-FCC-New-Coders-Survey-Data.csv', usecols=[0, 1, 2, 3, 4, 7, 8, 9, 10,11, 29, 30, 32, 36, 37, 45, 48, 56, 110, 111])

In [3]:
bc_df.head(10)

Unnamed: 0,Age,AttendedBootcamp,BootcampFinish,BootcampFullJobAfter,BootcampLoanYesNo,BootcampPostSalary,BootcampRecommend,ChildrenNumber,CityPopulation,CodeEventBootcamp,CountryLive,EmploymentField,EmploymentStatus,Gender,HasChildren,HoursLearning,Income,JobRoleInterest,SchoolDegree,SchoolMajor
0,28.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,office and administrative support,Employed for wages,male,0.0,30.0,32000.0,,"some college credit, no degree",
1,22.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,food and beverage,Employed for wages,male,,30.0,15000.0,Front-End Web Developer,"some college credit, no degree",
2,19.0,0.0,,,,,,,more than 1 million,,United States of America,finance,Employed for wages,male,,20.0,48000.0,,high school diploma or equivalent (GED),
3,26.0,0.0,,,,,,,more than 1 million,,United States of America,"arts, entertainment, sports, or media",Employed for wages,female,,20.0,43000.0,Front-End Web Developer,bachelor's degree,Cinematography And Film
4,20.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,education,Employed for wages,female,,25.0,6000.0,Full-Stack Web Developer,"some college credit, no degree",
5,34.0,0.0,,,,,,,more than 1 million,,United States of America,finance,Self-employed freelancer,male,,5.0,40000.0,,bachelor's degree,English
6,23.0,0.0,,,,,,,more than 1 million,,Singapore,software development,Employed for wages,male,,8.0,32000.0,,bachelor's degree,Computer Science
7,35.0,0.0,,,,,,,"between 100,000 and 1 million",,United Kingdom,office and administrative support,Employed for wages,male,0.0,,40000.0,,master's degree (non-professional),Education
8,33.0,0.0,,,,,,,"between 100,000 and 1 million",,United States of America,software development,Employed for wages,male,,5.0,80000.0,,bachelor's degree,Business Administration
9,33.0,0.0,,,,,,,more than 1 million,,United States of America,,Employed for wages,male,,20.0,80000.0,,master's degree (non-professional),Business Administration


In [4]:
# replace 0 with no and 1 with yes
bc_df.replace(0.0, "No", inplace=True)
bc_df.replace(1.0, "Yes", inplace=True)

In [5]:
total_respondents = max(bc_df.index+1)

total_respondents 

15620

In [6]:
# Filter for people who did attend bootcamp
bc_attendees_df = bc_df.loc[bc_df["AttendedBootcamp"]=='Yes']

In [7]:
# number of people who attended bootcamp
bc_count = bc_attendees_df.shape[0]


In [8]:
# Find average age without the null values
age_list = [i for i in bc_attendees_df["Age"].tolist() if not np.isnan(i) == True]
avg_age = statistics.mean(age_list)

In [9]:
# function to count a trait in a column for all people who attended bootcamps
def trait_count(col, trait):
    global bc_attendees_df
    gender_list = len([i for i in bc_attendees_df[col].tolist() if i == trait])
    return gender_list

def mult_trait(trait_list, col):
    total_count = sum([trait_count(col, row) for row in trait_list])
    return total_count

male = trait_count('Gender', 'male')
female = trait_count('Gender', 'female')
non_gender_specific = bc_count - male - female

In [10]:
# take a good look at all the degree names and get the degrees by index
degrees = list(set(bc_attendees_df["SchoolDegree"].tolist()))
degrees = [degrees[indx] for indx in [1,2,7,8,9]]  
degree_holders = mult_trait(degrees, 'SchoolDegree')
degree_holders

692

In [11]:
# percent of recipients who attended a bootcamp
pct_attended = "{0:,.2f}%".format(((bc_count/total_respondents)*100), prec=2)


In [12]:
# Calculate the percentage attendees who hold college degrees

pct_degree = "{0:,.2f}%".format(((degree_holders/bc_count)*100), prec=2)


In [13]:
# Calculate percent of responders who attended bootcamp and hold a college degrees
bootcamp_and_degree = "{0:,.2f}%".format(((degree_holders/total_respondents)*100), prec=2)

In [14]:
# get average salary without the nans
salary_list = [i for i in bc_attendees_df["BootcampPostSalary"].tolist() if not np.isnan(i) == True]
avg_salary = "${0:,.2f}".format(statistics.mean(salary_list), prec=2)
avg_salary

'$63,740.51'

In [15]:
# put everything into a dataframe
stats = pd.DataFrame({"Total Surveyed": [total_respondents], 
             "Total Bootcamp attendees": [bc_count],
              "Percent Attended Bootcamp": [pct_attended],
              "Mean Age": [avg_age],
              "Degree Holder": [degree_holders],
              "Percent with Degrees and Bootcamp": [bootcamp_and_degree],
              "Male": [male],
              "Female": [female],
              "Non Gender Specific": [non_gender_specific],
              "Average Post Bootcamp Salary": [avg_salary]
             })

In [17]:
# export to CSV
stats.to_csv('Bootcamp_Statistics.csv', index=False)