In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import random
import csv

In [None]:
# Read in 100 Kicktarter projects from the file 'kickstarter_data.csv'
kickstarter_df = pd.read_csv('kickstarter_data.csv', nrows=100)

In [None]:
# The DataFrame is the main structure that we use in Pandas
# We use it to store data by different values of different types in an organized table
kickstarter_df

In [None]:
# We can get overall stats regarding our indexes
kickstarter_df.describe()

In [None]:
# We can sort by index
kickstarter_df.sort_values('backers', ascending=False)

In [None]:
# We can filter out entries that meet certain criteria
kickstarter_df[kickstarter_df.state == 'successful']

In [None]:
# Several criteria? No problem.
kickstarter_df[(kickstarter_df.state == 'failed') & (kickstarter_df.pledged > 1000.0) & (kickstarter_df.currency == 'USD')] 
# Find all failed kickstarters that raised more than $1000

In [None]:
# Using DataFrames, we can do some pretty cool stuff with data visualization
#plt.scatter(kickstarter_df['pledged'], kickstarter_df['goal']);
#plt.axis([0, 20000, 0, 100000])

plt.rcParams['figure.figsize'] = [20,15]

colors = {
    'successful' : 'green',
    'canceled' : 'gold',
    'failed' : 'red'
}
status = ['successful', 'canceled', 'failed']

for state in status:
    temp_df = kickstarter_df[kickstarter_df.state == state]
    plt.scatter(temp_df['pledged'], temp_df['goal'], c=colors[state], s=100)

plt.xlabel("Pledged")
plt.ylabel("Goal")

red_patch = mpatches.Patch(color='red', label='Failed')
yellow_patch = mpatches.Patch(color='gold', label='Cancelled')
green_patch = mpatches.Patch(color='green', label='Successful')
plt.legend(handles=[red_patch, yellow_patch, green_patch], prop={'size':30})

plt.show();