# Kickstarter Cleanup

#### Import dependencies

In [None]:
import pandas as pd

#### Load data file

In [None]:
# The path to our CSV file
file = "Resources/KickstarterData.csv"

# Read our Kickstarter data into pandas
df = pd.read_csv(file)
df.head()

#### Display DataFrame's shape

In [None]:
df.shape

#### Get a list of all of our columns for easy reference

In [None]:
df.columns

#### Create a new DataFrame limited to just these columns: "name", "goal", "pledged", "state", "country", "staff_pick", "backers_count", and "spotlight"

In [None]:
# ["name", "goal", "pledged", "state", "country", "staff_pick", "backers_count", "spotlight"]

reduced_kickstarter_df = df.loc[:, ["name", "goal", "pledged","state", "country", "staff_pick", "backers_count", "spotlight"]]
reduced_kickstarter_df.head()

#### Remove projects that made no money at all

In [None]:
# can also be accomplished without .loc
reduced_kickstarter_df = reduced_kickstarter_df.loc[(reduced_kickstarter_df["pledged"] > 0)]
reduced_kickstarter_df.head()

#### Collect only those projects that were hosted in the US

In [None]:
# can also be accomplished without .loc
hosted_in_us = reduced_kickstarter_df.loc[reduced_kickstarter_df["country"] == "US"]
hosted_in_us.head()

#### Create a new column that finds the average amount pledged to a project



In [None]:
hosted_in_us["average_donation"] = hosted_in_us['pledged'] / hosted_in_us['backers_count']
hosted_in_us.head()

#### (1) Convert "average_donation", "goal", and "pledged" columns to float. (2) Format to go to two decimal places, include a dollar sign, and use comma notation

In [None]:
hosted_in_us.loc[:,"average_donation"] = hosted_in_us["average_donation"].astype(float).map("${:,.2f}".format)
hosted_in_us.loc[:,"goal"] = hosted_in_us["goal"].astype(float).map("${:,.2f}".format)
hosted_in_us.loc[:,"pledged"] = hosted_in_us["pledged"].astype(float).map("${:,.2f}".format)

hosted_in_us.head()

#### Calculate the total number of backers for all US projects

In [None]:
hosted_in_us["backers_count"].sum()

#### Calculate the average number of backers for all US projects

In [None]:
hosted_in_us["backers_count"].mean()

#### Collect only those US campaigns that have been picked as a "Staff Pick"

In [None]:
picked_by_staff = hosted_in_us.loc[hosted_in_us["staff_pick"] == True]
picked_by_staff.head()

#### Group by the state of the campaigns and see if staff picks matter (Seems to matter quite a bit)

In [None]:
state_groups = picked_by_staff.groupby("state")
state_groups["name"].count()