In [36]:
import pandas as pd

In [37]:
# The path to our CSV file
csv_path = "Resources/KickstarterData.csv"

# Read our Kickstarter data into pandas
kick_df = pd.read_csv(csv_path)

In [38]:
# Get a list of all of our columns for easy reference
list(kick_df)

['id',
 'photo',
 'name',
 'blurb',
 'goal',
 'pledged',
 'state',
 'slug',
 'disable_communication',
 'country',
 'currency',
 'currency_symbol',
 'currency_trailing_code',
 'deadline',
 'state_changed_at',
 'created_at',
 'launched_at',
 'staff_pick',
 'is_starrable',
 'backers_count',
 'static_usd_rate',
 'usd_pledged',
 'creator',
 'location',
 'category',
 'profile',
 'spotlight',
 'urls',
 'source_url',
 'friends',
 'is_starred',
 'is_backing',
 'permissions']

In [39]:
# Extract "name", "goal", "pledged", "state", "country", "staff_pick",
# "backers_count", and "spotlight"

loc_cleaned = kick_df.loc[:,["name","goal","pledged","state","country","staff_pick","backers_count","spotlight"]]
loc_cleaned.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True


In [40]:
# Remove projects that made no money at all
pledgedprojects = loc_cleaned.loc[loc_cleaned["pledged"] > 0,:]
pledgedprojects.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
1,MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,2500.0,2936.0,successful,GB,True,15,True
2,RUN,1000.0,1200.0,successful,GB,False,30,True
3,9th International Meeting of Youth Theatre sap...,2000.0,2135.0,successful,IT,False,24,True
4,Get Conti to the Ed Fringe!,1000.0,1250.0,successful,GB,False,28,True


In [41]:
# Collect only those projects that were hosted in the US
# Create a list of the columns
# Create a new df for "US" with the columns above. 
hosted_in_us = pledgedprojects.loc[pledgedprojects["country"] == "US",:].copy()
hosted_in_us.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True
14,Pride Con,15000.0,15110.0,successful,US,False,60,True
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True


In [42]:
# Create a new column that finds the average amount pledged to a project
hosted_in_us["average_donation"] = hosted_in_us["pledged"] / hosted_in_us["backers_count"]
hosted_in_us.head()

Unnamed: 0,name,goal,pledged,state,country,staff_pick,backers_count,spotlight,average_donation
0,The Class Act Players Theatre Company Presents...,1500.0,2925.0,successful,US,False,17,True,172.058824
8,Forefront Festival 2015,7200.0,7230.0,successful,US,False,68,True,106.323529
11,Hamlet the Hip-Hopera,9747.0,10103.0,successful,US,True,132,True,76.537879
14,Pride Con,15000.0,15110.0,successful,US,False,60,True,251.833333
15,En Garde Arts Emerging Artists Festival BOSSS,10000.0,10306.0,successful,US,True,80,True,128.825


In [43]:
# First convert "average_donation", "goal", and "pledged" columns to float
# Then Format to go to two decimal places, include a dollar sign, and use comma notation

In [44]:
# Calculate the total number of backers for all US projects

In [45]:
# Calculate the average number of backers for all US projects

In [46]:
# Collect only those US campaigns that have been picked as a "Staff Pick"

In [47]:
# Group by the state of the campaigns and see if staff picks matter (Seems to matter quite a bit)