In [1]:
import altair as alt
import pandas as pd

In [3]:
new_df = pd.read_csv('cleaned_kick.csv')

In [5]:
new_df

Unnamed: 0,project id,name,location,status,goal,pledged,funded(%),funded date,backers,levels,reward levels,duration
0,126581,Educational Online Trading Card Game,"Maplewood, NJ",failed,4000,20,0.01%,2010-08-02 00:00:00,2,5,"$1,$5,$10,$25,$50",47.18
1,3656699,"Time, Space and Bullets! board game","San Rafael, CA",failed,7500,0,0.00%,2010-11-01 00:00:00,0,5,"$15,$25,$50,$100,$500",60.00
2,3702708,Titans of Industry Board Game,"Chicago, IL",successful,30000,30388,1.01%,2012-05-21 00:00:00,244,11,"$10,$25,$55,$80,$100,$150,$225,$250,$500,$2,00...",44.42
3,4326268,Free RPG Day Pathfinder Adventure from Gaming ...,"Grand Rapids, MI",successful,1000,2665,2.66%,2012-04-16 00:00:00,120,5,"$6,$15,$28,$34,$95",18.10
4,6296368,The Bigfoot Project,"Chicago, IL",failed,2000,278,0.14%,2012-05-01 00:00:00,11,8,"$1,$5,$15,$30,$50,$75,$150,$400",55.04
...,...,...,...,...,...,...,...,...,...,...,...,...
537,2120375111,The New Forest Arimaa Set - Play Arimaa on Han...,"Williamsburg, VA",successful,2850,6563,2.30%,2012-05-31 00:00:00,34,15,"$1,$2,$5,$10,$20,$25,$50,$75,$100,$200,$300,$5...",45.00
538,2125571001,Solar Circuit Racing,"Seattle, WA",successful,12000,12843,1.07%,2011-11-08 00:00:00,152,10,"$1,$10,$20,$30,$50,$75,$100,$250,$500,$2,000",60.04
539,2132127788,THROWDOWN! Card Game - Robot Cowboy Samurai Ed...,"Spotsylvania, VA",live,20000,1421,0.07%,2012-07-02 00:00:00,34,16,"$5,$10,$11,$20,$22,$30,$33,$50,$100,$225,$350,...",52.99
540,2143119805,Disaster Looms!,"Seattle, WA",live,25000,28435,1.14%,2012-06-18 00:00:00,327,24,"$1,$10,$20,$30,$40,$50,$65,$90,$100,$100,$115,...",52.00


In [9]:
# a. Mean total pledge
mean_pledged = new_df['pledged'].mean()
print(f"The mean total pledge is: {mean_pledged}")

The mean total pledge is: 13258.142066420663


In [39]:
# b. Histogram of backers
chart_backers = alt.Chart(new_df).mark_bar().encode(
    alt.X("backers", bin=True),
    y='count()',
    tooltip=['count()']
).properties(
    title='Distribution of the Number of Backers'
)
chart_backers.display() 

# Interpretation of the histogram:
# The histogram shows a skewed distribution with a large number of projects having a small number of backers.
# There are a few projects with a very high number of backers, which are outliers.
# This suggests that while some projects attract many backers, most projects have a more modest number.

In [43]:
# c. Success rate of projects
success_rate = (new_df['status'] == 'successful').mean()
print(f"The success rate of projects is: {success_rate:.2f}")

The success rate of projects is: 0.47


In [53]:
 #d. Success rate across US states
# Assuming 'location' contains 'City, State' and we can extract the state.
new_df['state'] = new_df['location'].str.split(',').str[1].str.strip()

us_states_df = new_df[new_df['state'].isin(['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY', 'DC'])]

success_rate_by_state = us_states_df.groupby('state')['status'].apply(lambda x: (x == 'successful').mean()).reset_index(name='success_rate')
print("\nSuccess rate by US State:")
print(success_rate_by_state.sort_values(by='success_rate', ascending=False))



Success rate by US State:
   state  success_rate
39    TN      1.000000
7     DC      1.000000
1     AL      1.000000
16    KS      1.000000
42    VA      0.769231
30    NM      0.750000
31    NV      0.666667
26    NC      0.666667
6     CT      0.666667
19    MA      0.647059
40    TX      0.629630
41    UT      0.620690
15    IN      0.588235
36    PA      0.555556
44    WI      0.533333
10    GA      0.533333
14    IL      0.518519
2     AR      0.500000
11    HI      0.500000
35    OR      0.500000
21    ME      0.500000
34    OK      0.500000
23    MN      0.461538
4     CA      0.447368
32    NY      0.441176
43    WA      0.407407
29    NJ      0.400000
5     CO      0.384615
3     AZ      0.375000
22    MI      0.333333
17    KY      0.333333
20    MD      0.285714
24    MO      0.272727
33    OH      0.266667
9     FL      0.263158
38    SC      0.250000
37    RI      0.000000
0     AK      0.000000
28    NH      0.000000
27    NE      0.000000
25    MS      0.000000
18    L

In [55]:
# e. Differences between successful and failed projects
successful_projects = new_df[new_df['status'] == 'successful']
failed_projects = new_df[new_df['status'] == 'failed']

print("\nSummary statistics for successful projects:")
print(successful_projects[['goal', 'pledged', 'backers', 'levels', 'duration']].describe())

print("\nSummary statistics for failed projects:")
print(failed_projects[['goal', 'pledged', 'backers', 'levels', 'duration']].describe())


Summary statistics for successful projects:
               goal        pledged      backers      levels    duration
count    256.000000     256.000000   256.000000  256.000000  256.000000
mean    6549.039062   24368.714844   363.878906   10.757812   39.213008
std     6069.095175   78483.784435   600.178631    5.381517   13.947015
min       84.000000     101.000000     7.000000    2.000000   11.680000
25%     2150.000000    4414.750000    94.750000    7.000000   30.000000
50%     5000.000000   10189.000000   185.000000   10.000000   34.480000
75%     9562.500000   20517.250000   379.250000   13.000000   45.447500
max    35000.000000  923680.000000  5512.000000   40.000000   90.040000

Summary statistics for failed projects:
                goal       pledged     backers      levels    duration
count     204.000000    204.000000  204.000000  204.000000  204.000000
mean    11627.676471   1062.161765   22.352941    7.843137   43.316471
std     14093.892818   1608.401433   31.468695    3.9

In [57]:
 ##Bar chart of average backers by status
chart_avg_backers_by_status = alt.Chart(new_df.groupby('status')['backers'].mean().reset_index()).mark_bar().encode(
    x='status',
    y='backers',
    tooltip=['status', 'backers']
).properties(
    title='Average Number of Backers by Project Status'
)
chart_avg_backers_by_status.display() # Use chart_avg_backers_by_status.show() in a script

ðŸŽ¯ Key Actions for a Successful Kickstarter Campaign:
1) Set a Realistic Goal:
Successful campaigns tend to set lower, more achievable goals â€” which builds momentum and attracts backers early on.
2) Prioritize Backer Outreach:
Success is strongly linked to the number of backers. Focus on broad marketing and community-building to drive engagement.
3) Understand Your Locationâ€™s Impact:
Some states show higher success rates. Research similar campaigns in your region to see if a local or national focus makes more sense.
4) Design Strong Reward Levels:
While not the biggest differentiator in your data, varied and attractive reward tiers are a known factor in encouraging pledges.
5) Choose the Right Duration:
Successful campaigns give themselves enough time to gain traction â€” not too short to get missed, not too long to lose urgency.
6) Aim to Overdeliver on Your Goal:
Successful projects typically exceed their funding goals. Set an achievable goal, but plan your strategy to surpass it.
