In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [2]:
# Import the data
df = pd.read_csv("https://static.bc-edx.com/ai/ail-v-1-0/m14/datasets/crowdfunding-data.csv")
df.head()

Unnamed: 0,goal,pledged,backers_count,country,staff_pick,spotlight,category,days_active,outcome
0,100,0,0,3,0,0,0,17,0
1,1400,14560,158,0,0,1,1,27,1
2,108400,142523,1425,4,0,0,2,20,1
3,4200,2477,24,0,0,0,1,40,0
4,7600,5265,53,0,0,0,3,4,0


In [3]:
# Create a column "pledged_per_backer"
df['pledged_per_backer'] = df['pledged'] / df['backers_count']

df.head()

Unnamed: 0,goal,pledged,backers_count,country,staff_pick,spotlight,category,days_active,outcome,pledged_per_backer
0,100,0,0,3,0,0,0,17,0,
1,1400,14560,158,0,0,1,1,27,1,92.151899
2,108400,142523,1425,4,0,0,2,20,1,100.01614
3,4200,2477,24,0,0,0,1,40,0,103.208333
4,7600,5265,53,0,0,0,3,4,0,99.339623


In [4]:
# Fill the missing values with zeros
df['pledged_per_backer'] = df['pledged_per_backer'].fillna(0)
df.head()

Unnamed: 0,goal,pledged,backers_count,country,staff_pick,spotlight,category,days_active,outcome,pledged_per_backer
0,100,0,0,3,0,0,0,17,0,0.0
1,1400,14560,158,0,0,1,1,27,1,92.151899
2,108400,142523,1425,4,0,0,2,20,1,100.01614
3,4200,2477,24,0,0,0,1,40,0,103.208333
4,7600,5265,53,0,0,0,3,4,0,99.339623


In [5]:
# Create a backers_per_day column
df['backers_per_day'] = df['backers_count'] / df['days_active']

df.head()

Unnamed: 0,goal,pledged,backers_count,country,staff_pick,spotlight,category,days_active,outcome,pledged_per_backer,backers_per_day
0,100,0,0,3,0,0,0,17,0,0.0,0.0
1,1400,14560,158,0,0,1,1,27,1,92.151899,5.851852
2,108400,142523,1425,4,0,0,2,20,1,100.01614,71.25
3,4200,2477,24,0,0,0,1,40,0,103.208333,0.6
4,7600,5265,53,0,0,0,3,4,0,99.339623,13.25


In [6]:
# Create a days_to_goal column
def days_to_goal(row):
    amount_remaining = row['goal'] - row['pledged']
    pledged_per_day = row['pledged_per_backer'] * row['backers_per_day']
    # Note that we can't divide by zero:
    # return a large number if pledged_per_day is zero
    if pledged_per_day == 0:
        return 10000
    return (amount_remaining)/(pledged_per_day)

df['days_to_goal'] = df.apply(days_to_goal, axis=1)
df.head()

Unnamed: 0,goal,pledged,backers_count,country,staff_pick,spotlight,category,days_active,outcome,pledged_per_backer,backers_per_day,days_to_goal
0,100,0,0,3,0,0,0,17,0,0.0,0.0,10000.0
1,1400,14560,158,0,0,1,1,27,1,92.151899,5.851852,-24.403846
2,108400,142523,1425,4,0,0,2,20,1,100.01614,71.25,-4.78842
3,4200,2477,24,0,0,0,1,40,0,103.208333,0.6,27.823981
4,7600,5265,53,0,0,0,3,4,0,99.339623,13.25,1.773979
