In [1]:
import pandas as pd
import os

In [2]:
data_file = os.path.join("Resources", "KickstarterData.csv")
df = pd.read_csv(data_file)
df.head()

Unnamed: 0,id,photo,name,blurb,goal,pledged,state,slug,disable_communication,country,...,location,category,profile,spotlight,urls,source_url,friends,is_starred,is_backing,permissions
0,1645666704,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",The Class Act Players Theatre Company Presents...,The Class Act Players put on another one of th...,1500.0,2925.0,successful,the-class-act-players-theatre-company-presents...,False,US,...,"{""country"":""US"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,
1,874638240,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",MR INCREDIBLE by Camilla Whitehill - VAULT Fes...,"A brand new play about love and entitlement, b...",2500.0,2936.0,successful,mr-incredible-by-camilla-whitehill-vault-festival,False,GB,...,"{""country"":""GB"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,
2,247074984,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",RUN,Yonni's pissed off in a world filled with scho...,1000.0,1200.0,successful,run-10,False,GB,...,"{""country"":""GB"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,
3,1941196813,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",9th International Meeting of Youth Theatre sap...,27. April bis 1. Mai 2016 in Brixen/Südtirol/I...,2000.0,2135.0,successful,9th-international-meeting-of-youth-theatre-sap...,False,IT,...,"{""country"":""IT"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,
4,421961595,"{""small"":""https://ksr-ugc.imgix.net/assets/012...",Get Conti to the Ed Fringe!,The Italia Conti 2nd years are going to Ed Fri...,1000.0,1250.0,successful,get-conti-to-the-ed-fringe,False,GB,...,"{""country"":""GB"",""urls"":{""web"":{""discover"":""htt...","{""urls"":{""web"":{""discover"":""http://www.kicksta...","{""background_image_opacity"":0.8,""should_show_f...",True,"{""web"":{""project"":""https://www.kickstarter.com...",https://www.kickstarter.com/discover/categorie...,,,,


In [3]:
# Get a list of all of our columns for easy reference
print(df.columns)

Index(['id', 'photo', 'name', 'blurb', 'goal', 'pledged', 'state', 'slug',
       'disable_communication', 'country', 'currency', 'currency_symbol',
       'currency_trailing_code', 'deadline', 'state_changed_at', 'created_at',
       'launched_at', 'staff_pick', 'is_starrable', 'backers_count',
       'static_usd_rate', 'usd_pledged', 'creator', 'location', 'category',
       'profile', 'spotlight', 'urls', 'source_url', 'friends', 'is_starred',
       'is_backing', 'permissions'],
      dtype='object')


In [4]:
# create a new DataFrame with "name", "backers_count", "pledged", "state", "country", "staff_pick"
labels = ["name", "backers_count", "pledged", "state", "country", "staff_pick"]
kickstarter_df = df.loc[:,labels]

In [5]:
#list the dtypes in our new dataframe
kickstarter_df.dtypes

name              object
backers_count      int64
pledged          float64
state             object
country           object
staff_pick          bool
dtype: object

In [6]:
# Create a new column that finds the average amount pledged to a project
# average_donation = pledged / backers_count
kickstarter_df["average_donation"] = kickstarter_df['pledged']/kickstarter_df['backers_count']
kickstarter_df.dtypes

name                 object
backers_count         int64
pledged             float64
state                object
country              object
staff_pick             bool
average_donation    float64
dtype: object

In [7]:
# print the number of rows. Should be 4120
#len(kickstarter_df.index)
kickstarter_df.shape
#kickstarter_df.count()

(4120, 7)

In [8]:
# Sort out any projects that made no money at all
filtered_kickstarter = kickstarter_df[kickstarter_df["pledged"] > 0.0]

In [9]:
# print the number of rows again. Should be 3307
filtered_kickstarter.shape

(3307, 7)

In [11]:
# Collect only those projects that were hosted in the US
us_kickstarter = filtered_kickstarter[filtered_kickstarter['country'] == "US"]
us_kickstarter.head()

Unnamed: 0,name,backers_count,pledged,state,country,staff_pick,average_donation
0,The Class Act Players Theatre Company Presents...,17,2925.0,successful,US,False,172.058824
8,Forefront Festival 2015,68,7230.0,successful,US,False,106.323529
11,Hamlet the Hip-Hopera,132,10103.0,successful,US,True,76.537879
14,Pride Con,60,15110.0,successful,US,False,251.833333
15,En Garde Arts Emerging Artists Festival BOSSS,80,10306.0,successful,US,True,128.825


In [12]:
# Format our "average_donation", and "pledged" columns
# to go to two decimal places, include a dollar sign, and use comma notation
us_kickstarter["average_donation"] = us_kickstarter["average_donation"].map("${:.2f}".format)
us_kickstarter["pledged"] = us_kickstarter["pledged"].map("${:.2f}".format)
us_kickstarter.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


Unnamed: 0,name,backers_count,pledged,state,country,staff_pick,average_donation
0,The Class Act Players Theatre Company Presents...,17,$2925.00,successful,US,False,$172.06
8,Forefront Festival 2015,68,$7230.00,successful,US,False,$106.32
11,Hamlet the Hip-Hopera,132,$10103.00,successful,US,True,$76.54
14,Pride Con,60,$15110.00,successful,US,False,$251.83
15,En Garde Arts Emerging Artists Festival BOSSS,80,$10306.00,successful,US,True,$128.82


In [13]:
# Calculate the total number of backers for all US Projects. Should be 89273
print(us_kickstarter['backers_count'].sum())

89273


In [14]:
#Calculate the average number of backers for all US Projects. Should be 41.93
print(us_kickstarter['backers_count'].sum()/len(us_kickstarter))

41.931892907468296


In [20]:
# Collect only those US campaigns that have 
# been picked as a "Staff Pick" == True
# create a new dataFrame with this condition
staff_pick_US = us_kickstarter.loc[us_kickstarter["staff_pick"] == True]
staff_pick_US.head()

Unnamed: 0,name,backers_count,pledged,state,country,staff_pick,average_donation
11,Hamlet the Hip-Hopera,132,$10103.00,successful,US,True,$76.54
15,En Garde Arts Emerging Artists Festival BOSSS,80,$10306.00,successful,US,True,$128.82
39,"""Poor People"" at FringeNYC 2015",34,$5682.00,successful,US,True,$167.12
44,Queen Mab's Steampunk and Fairie Street Festival,62,$3363.00,successful,US,True,$54.24
45,RAFT: a new play by Emily Kitchens,120,$7826.00,successful,US,True,$65.22


In [21]:
# Group this "Staff Pick" DataFrame by the 'state' of the campaigns 
# Count them and see if staff picks matter (Seems to matter quite a bit)

state_groups = staff_pick_US.groupby("state")
state_groups["name"].count()

state
canceled        6
failed         21
live            2
successful    145
Name: name, dtype: int64