In [2]:
import pandas as pd
from pathlib import Path

In [3]:
# The path to our CSV file
crowdfunding_data = Path("Resources/Crowdfunding_data.csv")

# Read our Crowdfunding data into pandas
crowdfunding_df = pd.read_csv(crowdfunding_data)
crowdfunding_df.head()

Unnamed: 0,id,name,blurb,goal,pledged,outcome,backers_count,country,currency,launched_at,deadline,staff_pick,spotlight,category
0,0,"Baldwin, Riley and Jackson",Pre-emptive tertiary standardization,100,0,failed,0,CA,CAD,1448690400,1450159200,False,False,food/food trucks
1,1,Odom Inc,Managed bottom-line architecture,1400,14560,successful,158,US,USD,1408424400,1408597200,False,True,music/rock
2,2,"Melton, Robinson and Fritz",Function-based leadingedge pricing structure,108400,142523,successful,1425,AU,AUD,1384668000,1384840800,False,False,technology/web
3,3,"Mcdonald, Gonzalez and Ross",Vision-oriented fresh-thinking conglomeration,4200,2477,failed,24,US,USD,1565499600,1568955600,False,False,music/rock
4,4,Larson-Little,Proactive foreground core,7600,5265,failed,53,US,USD,1547964000,1548309600,False,False,theater/plays


In [5]:
# Get a list of all of our columns for easy reference
crowdfunding_df.columns

Index(['id', 'name', 'blurb', 'goal', 'pledged', 'outcome', 'backers_count',
       'country', 'currency', 'launched_at', 'deadline', 'staff_pick',
       'spotlight', 'category'],
      dtype='object')

In [7]:
# Extract "name", "goal", "pledged", "outcome", "country", "staff_pick",
# "backers_count", and "spotlight"
reduced_crowdfunding_df = crowdfunding_df [['name', 'goal', 'pledged', 'outcome', 'country',  'staff_pick', 'backers_count', 'spotlight']] 

In [8]:
reduced_crowdfunding_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
0,"Baldwin, Riley and Jackson",100,0,failed,CA,False,0,False
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
...,...,...,...,...,...,...,...,...
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True
996,Butler LLC,6600,4814,failed,US,False,112,False
997,Ball LLC,7600,4603,canceled,IT,False,139,False
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True


In [21]:
# Remove projects that made no money at all
reduced_crowdfunding_df = reduced_crowdfunding_df.loc[ (reduced_crowdfunding_df["pledged"] > 0)] 

In [22]:
reduced_crowdfunding_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,US,False,158,True
2,"Melton, Robinson and Fritz",108400,142523,successful,AU,False,1425,False
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
5,Harris Group,7600,13195,successful,DK,False,174,False
...,...,...,...,...,...,...,...,...
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True
996,Butler LLC,6600,4814,failed,US,False,112,False
997,Ball LLC,7600,4603,canceled,IT,False,139,False
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True


In [25]:
# Collect only those projects that were hosted in the US.

# Create a list of the columns

#  Create a new df for "US" with the columns. 
hosted_in_us_df = reduced_crowdfunding_df.loc[reduced_crowdfunding_df["country"] == "US"]
hosted_in_us_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight
1,Odom Inc,1400,14560,successful,US,False,158,True
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False
4,Larson-Little,7600,5265,failed,US,False,53,False
9,"Rangel, Holt and Jones",6200,3208,failed,US,False,44,False
10,Green Ltd,5200,13838,successful,US,False,220,False


In [26]:
hosted_in_us_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 762 entries, 1 to 999
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   name           762 non-null    object
 1   goal           762 non-null    int64 
 2   pledged        762 non-null    int64 
 3   outcome        762 non-null    object
 4   country        762 non-null    object
 5   staff_pick     762 non-null    bool  
 6   backers_count  762 non-null    int64 
 7   spotlight      762 non-null    bool  
dtypes: bool(2), int64(3), object(3)
memory usage: 43.2+ KB


In [29]:
# Create a new column that finds the average amount pledged to a project
hosted_in_us_df['average_donation'] = hosted_in_us_df['pledged'] / hosted_in_us_df['backers_count']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hosted_in_us_df['average_donation'] = hosted_in_us_df['pledged'] / hosted_in_us_df['backers_count']


In [30]:
hosted_in_us_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
1,Odom Inc,1400,14560,successful,US,False,158,True,92.151899
3,"Mcdonald, Gonzalez and Ross",4200,2477,failed,US,False,24,False,103.208333
4,Larson-Little,7600,5265,failed,US,False,53,False,99.339623
9,"Rangel, Holt and Jones",6200,3208,failed,US,False,44,False,72.909091
10,Green Ltd,5200,13838,successful,US,False,220,False,62.900000
...,...,...,...,...,...,...,...,...,...
994,"Leach, Rich and Price",141100,74073,failed,US,False,842,True,87.972684
995,Manning-Hamilton,97300,153216,successful,US,False,2043,True,74.995595
996,Butler LLC,6600,4814,failed,US,False,112,False,42.982143
998,"Taylor, Santiago and Flores",66600,37823,failed,US,False,374,True,101.131016


In [31]:
hosted_in_us_df.info()


<class 'pandas.core.frame.DataFrame'>
Index: 762 entries, 1 to 999
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              762 non-null    object 
 1   goal              762 non-null    int64  
 2   pledged           762 non-null    int64  
 3   outcome           762 non-null    object 
 4   country           762 non-null    object 
 5   staff_pick        762 non-null    bool   
 6   backers_count     762 non-null    int64  
 7   spotlight         762 non-null    bool   
 8   average_donation  762 non-null    float64
dtypes: bool(2), float64(1), int64(3), object(3)
memory usage: 49.1+ KB


In [32]:
# Calculate the total number of backers for all US projects
hosted_in_us_df['backers_count'].sum()

545510

In [33]:
# Calculate the average number of backers for all US projects
hosted_in_us_df['backers_count'].mean()

715.8923884514436

In [38]:
hosted_in_us_df['backers_count'].std()

1157.6870474514533

In [41]:
# Collect only those US campaigns that have been picked as a "Staff Pick"
picked_by_staff_df = hosted_in_us_df.loc[hosted_in_us_df["staff_pick"]]
picked_by_staff_df

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
76,"Martin, Conway and Larsen",122900,95993,failed,US,True,1684,True,57.002969
86,Davis-Smith,7400,12405,successful,US,True,203,False,61.108374
193,"Calhoun, Rogers and Long",6600,3012,failed,US,True,65,False,46.338462
205,Weaver-Marquez,1300,5614,successful,US,True,80,False,70.175
220,Owens-Le,7900,667,failed,US,True,17,False,39.235294
221,Huff LLC,121500,119830,failed,US,True,2179,False,54.993116
225,Fox-Quinn,67800,176398,successful,US,True,5880,False,29.99966
259,Watkins Ltd,1800,10755,successful,US,True,138,False,77.934783
291,"Bell, Grimes and Kerr",1800,8219,successful,US,True,107,False,76.813084
384,"Baker, Collins and Smith",114400,196779,successful,US,True,4799,True,41.004168


In [47]:
picked_by_staff_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 39 entries, 76 to 980
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   name              39 non-null     object 
 1   goal              39 non-null     int64  
 2   pledged           39 non-null     int64  
 3   outcome           39 non-null     object 
 4   country           39 non-null     object 
 5   staff_pick        39 non-null     bool   
 6   backers_count     39 non-null     int64  
 7   spotlight         39 non-null     bool   
 8   average_donation  39 non-null     float64
dtypes: bool(2), float64(1), int64(3), object(3)
memory usage: 2.5+ KB


In [50]:
picked_by_staff_df.reset_index(inplace=True)

In [51]:
picked_by_staff_df

Unnamed: 0,level_0,index,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
0,0,76,"Martin, Conway and Larsen",122900,95993,failed,US,True,1684,True,57.002969
1,1,86,Davis-Smith,7400,12405,successful,US,True,203,False,61.108374
2,2,193,"Calhoun, Rogers and Long",6600,3012,failed,US,True,65,False,46.338462
3,3,205,Weaver-Marquez,1300,5614,successful,US,True,80,False,70.175
4,4,220,Owens-Le,7900,667,failed,US,True,17,False,39.235294
5,5,221,Huff LLC,121500,119830,failed,US,True,2179,False,54.993116
6,6,225,Fox-Quinn,67800,176398,successful,US,True,5880,False,29.99966
7,7,259,Watkins Ltd,1800,10755,successful,US,True,138,False,77.934783
8,8,291,"Bell, Grimes and Kerr",1800,8219,successful,US,True,107,False,76.813084
9,9,384,"Baker, Collins and Smith",114400,196779,successful,US,True,4799,True,41.004168


In [53]:
picked_by_staff_df = picked_by_staff_df.drop(columns=['level_0','index'])

In [55]:
picked_by_staff_df.head()

Unnamed: 0,name,goal,pledged,outcome,country,staff_pick,backers_count,spotlight,average_donation
0,"Martin, Conway and Larsen",122900,95993,failed,US,True,1684,True,57.002969
1,Davis-Smith,7400,12405,successful,US,True,203,False,61.108374
2,"Calhoun, Rogers and Long",6600,3012,failed,US,True,65,False,46.338462
3,Weaver-Marquez,1300,5614,successful,US,True,80,False,70.175
4,Owens-Le,7900,667,failed,US,True,17,False,39.235294


In [46]:
# Group by the outcome of the campaigns and see if staff picks matter (Seems to matter quite a bit)
picked_by_staff_df['outcome'].value_counts()

outcome
successful    23
failed        13
canceled       3
Name: count, dtype: int64

In [57]:
outcome_groups = picked_by_staff_df.groupby("outcome")

In [66]:
outcome_groups['pledged'].std()

outcome
canceled      28939.243120
failed        42592.502557
successful    63157.550323
Name: pledged, dtype: float64