In [2]:
import pandas as pd

In [3]:
# The path to our CSV file

# Read our Crowdfunding data into pandas
df = pd.read_csv('Resources/CrowdfundingData.csv')

In [5]:
# Get a list of all of our columns for easy reference
df.columns

Index(['id', 'name', 'blurb', 'goal', 'pledged', 'outcome', 'backers_count',
       'country', 'currency', 'launched_at', 'deadline', 'staff_pick',
       'spotlight', 'category'],
      dtype='object')

In [7]:
# Extract "name", "goal", "pledged", "outcome", "country", "staff_pick",
# "backers_count", and "spotlight"
df = df[["name", "goal", "pledged", "outcome", "country", "staff_pick", "backers_count", "spotlight"]]
df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
0,"Baldwin, Riley and Jackson",100,0,failed,CA,False,0,False
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False


In [10]:
# Remove projects that made no money at all
df = df[df['pledged'] != 0]

In [11]:
# Collect only those projects that were hosted in the US
# Create a list of the columns
# Create a new df for "US" with the columns above. 
df_us = df[df['country'] == 'US']

In [17]:
# Create a new column that finds the average amount pledged to a project
df.groupby(['name'])['pledged'].mean()

name
Acevedo-Huffman                 4460.0
Acosta PLC                    143910.0
Acosta, Mullins and Morris     62127.0
Adams Group                   195750.0
Adams, Walker and Wong          6514.0
                                ...   
Young and Sons                  4596.0
Young, Gilbert and Escobar     57034.0
Young, Hart and Ryan             903.0
Young, Ramsey and Powell        9317.0
Yu and Sons                     5569.0
Name: pledged, Length: 972, dtype: float64

In [28]:
# First convert "average_donation", "goal", and "pledged" columns to float
# Then Format to go to two decimal places, include a dollar sign, and use comma notation
# df['average_donation'] = df['average_donation'].astype(float)
df['goal'] = df['goal'].astype(float).round(2)
df['pledged'] = df['pledged'].astype(float).round(2)

In [30]:
# Calculate the total number of backers for all US projects
df_us["backers_count"].sum()

545510

In [33]:
# Calculate the average number of backers for all US projects
df_us.groupby(['name'])['backers_count'].mean()
df_us['backers_count'].mean()

715.8923884514436

In [34]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
df_special = df_us[df_us['staff_pick']]

In [35]:
# Group by the outcome of the campaigns and see if staff picks matter (Seems to matter quite a bit)
df_us.groupby('staff_pick')['outcome'].count()

staff_pick
False    723
True      39
Name: outcome, dtype: int64