In [205]:
# Import the required libraries and dependencies
import os
import pandas as pd
import plotly.express as px
import hvplot.pandas
from pathlib import Path
from dotenv import load_dotenv
import sqlalchemy

True

## Enable Mapbox API access token

In [220]:
# Load the .env file into the notebook
load_dotenv()

# Read in your MAPBOX_API_KEY
mapbox_api_access_token = os.getenv('MAPBOX_API_ACCESS_TOKEN')

# Confirm the availability of your Mapbox API access token by checking its type
display(type(mapbox_api_access_token))

# Set your Mapbox API access token
px.set_mapbox_access_token(mapbox_api_access_token)

str

## Import the data 

In [221]:
# Import large kickstarter recent dataset
kickstarter_large_recent_df = pd.read_csv(
    Path('./Resources/kickstarter_data/ks-projects-201801.csv')
)

# Imported data has a space at the end of the column name
# Remove spaces in columns name
kickstarter_large_recent_df.columns = kickstarter_large_recent_df.columns.str.replace(' ','')

# Set the index as the ID
kickstarter_large_recent_df.set_index('ID', inplace=True)

# View head
kickstarter_large_recent_df.head(2)

Unnamed: 0_level_0,name,category,main_category,currency,deadline,goal,launched,pledged,state,backers,country,usdpledged,usd_pledged_real,usd_goal_real
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1000002330,The Songs of Adelaide & Abullah,Poetry,Publishing,GBP,2015-10-09,1000.0,2015-08-11 12:12:28,0.0,failed,0,GB,0.0,0.0,1533.95
1000003930,Greeting From Earth: ZGAC Arts Capsule For ET,Narrative Film,Film & Video,USD,2017-11-01,30000.0,2017-09-02 04:43:57,2421.0,failed,15,US,100.0,2421.0,30000.0


In [222]:
# Import small kickstarter most backed dataset
kickstarter_small_most_backed_df = pd.read_csv(
    Path('./Resources/kickstarter_data/most_backed.csv')
)

# Fix ID column
kickstarter_small_most_backed_df.rename(columns={'Unnamed: 0': 'ID'}, inplace=True)

# Set the index as the ID
kickstarter_small_most_backed_df.set_index('ID', inplace=True)

# View head
kickstarter_small_most_backed_df.head(2)

Unnamed: 0_level_0,amt.pledged,blurb,by,category,currency,goal,location,num.backers,num.backers.tier,pledge.tier,title,url
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,8782571.0,\nThis is a card game for people who are into ...,Elan Lee,Tabletop Games,usd,10000.0,"Los Angeles, CA",219382,"[15505, 202934, 200, 5]","[20.0, 35.0, 100.0, 500.0]",Exploding Kittens,/projects/elanlee/exploding-kittens
1,6465690.0,"\nAn unusually addicting, high-quality desk to...",Matthew and Mark McLachlan,Product Design,usd,15000.0,"Denver, CO",154926,"[788, 250, 43073, 21796, 41727, 21627, 12215, ...","[1.0, 14.0, 19.0, 19.0, 35.0, 35.0, 79.0, 79.0...",Fidget Cube: A Vinyl Desk Toy,/projects/antsylabs/fidget-cube-a-vinyl-desk-toy


In [223]:
# Import indiegogo dataset
indiegogo_df = pd.read_csv(
    Path('./Resources/indiegogo_data/indiegogo.csv')
)

# Fix ID column
indiegogo_df.rename(columns={'project_id' : 'ID'}, inplace=True)

# Set the index as the ID
indiegogo_df.set_index('ID', inplace=True)

# View head
indiegogo_df.head(2)

Unnamed: 0_level_0,currency,category,year_end,month_end,day_end,time_end,amount_raised,funded_percent,in_demand,year_launch,...,apr,may,jun,jul,aug,sep,oct,nov,dec,tperiod
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3936,USD,Transportation,2010,5,12,23:59:00,840,16.80%,False,2010,...,1,0,0,0,0,0,0,0,0,1
5109,USD,Human Rights,2010,7,2,23:59:00,250,20.83%,False,2010,...,0,0,1,0,0,0,0,0,0,2


## Rename cols

In [224]:
# Rename the indiegogo columns to match the kickstarter df
indiegogo_df.rename(columns={'category' : 'main_category', 'title' : 'name', 'amount_raised_usd' : 'usd_pledged_real', 'goal_usd' : 'usd_goal_real'}, inplace=True)

# View head
indiegogo_df.head(2)

Unnamed: 0_level_0,currency,main_category,year_end,month_end,day_end,time_end,amount_raised,funded_percent,in_demand,year_launch,...,apr,may,jun,jul,aug,sep,oct,nov,dec,tperiod
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3936,USD,Transportation,2010,5,12,23:59:00,840,16.80%,False,2010,...,1,0,0,0,0,0,0,0,0,1
5109,USD,Human Rights,2010,7,2,23:59:00,250,20.83%,False,2010,...,0,0,1,0,0,0,0,0,0,2


Unnamed: 0_level_0,usd_pledged_real,blurb,by,main_category,currency,usd_goal_real,location,backers,num.backers.tier,pledge.tier,name,url
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,8782571.0,\nThis is a card game for people who are into ...,Elan Lee,Tabletop Games,usd,10000.0,"Los Angeles, CA",219382,"[15505, 202934, 200, 5]","[20.0, 35.0, 100.0, 500.0]",Exploding Kittens,/projects/elanlee/exploding-kittens
1,6465690.0,"\nAn unusually addicting, high-quality desk to...",Matthew and Mark McLachlan,Product Design,usd,15000.0,"Denver, CO",154926,"[788, 250, 43073, 21796, 41727, 21627, 12215, ...","[1.0, 14.0, 19.0, 19.0, 35.0, 35.0, 79.0, 79.0...",Fidget Cube: A Vinyl Desk Toy,/projects/antsylabs/fidget-cube-a-vinyl-desk-toy


## Clean large kickstarter df

In [225]:
# Kickstarter code to fix dates
kickstarter_large_recent_df['launched'] = pd.to_datetime(kickstarter_large_recent_df['launched'])
kickstarter_large_recent_df['deadline'] = pd.to_datetime(kickstarter_large_recent_df['deadline'])

# Only pull out failed and successful cases to match indiegogo df
failure_success = ['failed','successful']

# Quick check to see if we have already renamed state col
if 'failed' in kickstarter_large_recent_df['state'].values:
    kickstarter_large_recent_df = kickstarter_large_recent_df[kickstarter_large_recent_df['state'].isin(failure_success)]
    # Rename successful -> 1 and failed -> 0 to match indiegogo
    kickstarter_large_recent_df['state'].replace({'failed': '0', 'successful': '1'}, inplace=True)
    # Fix types
    kickstarter_large_recent_df = kickstarter_large_recent_df.astype({'state': 'int64'})

# Sometimes projects are successful with no backers. Remove this data
kickstarter_large_recent_df.drop(kickstarter_large_recent_df[(kickstarter_large_recent_df['state'] == 1) & (kickstarter_large_recent_df['backers'] == 0)].index, inplace = True)

# Remove all countries with that have euro currency and country equal to N,0"
kickstarter_large_recent_df.drop(kickstarter_large_recent_df[(kickstarter_large_recent_df['currency'] == 'EUR') & (kickstarter_large_recent_df['country'] == 'N,0"')].index, inplace = True)

# Create a dictionary of country and currency pairs to fix other N,0" countries to right country
country_currency_df = kickstarter_large_recent_df.loc[:,['country', 'currency']]
country_currency_df.drop(kickstarter_large_recent_df[kickstarter_large_recent_df['country'] == 'N,0"'].index, inplace=True)
country_currency_df.drop_duplicates(inplace=True)
country_currency_df.set_index('currency', inplace = True)
currency_country_dict = country_currency_df.to_dict()['country']

# Function to clean up country col
def replace_N0(country, currency):
    if country == 'N,0"':
        return currency_country_dict[currency]
    else:
        return country

# Clean up all the N,0" values for countries using the currency_country_dict
kickstarter_large_recent_df['country'] = kickstarter_large_recent_df.apply(lambda row: replace_N0(row['country'], row['currency']), axis=1)

# Clean up and check categories to compare to indiegogo and make similar later
display(kickstarter_large_recent_df['main_category'].unique())

# Clean up and check currency to compare to indiegogo and make similar later
display(kickstarter_large_recent_df['currency'].unique())

# Clean up and check country to compare to indiegogo and make similar later
display(kickstarter_large_recent_df['country'].unique())

# View head
kickstarter_large_recent_df.head(2)


array(['Publishing', 'Film & Video', 'Music', 'Food', 'Crafts', 'Games',
       'Design', 'Comics', 'Fashion', 'Theater', 'Art', 'Photography',
       'Technology', 'Dance', 'Journalism'], dtype=object)

array(['GBP', 'USD', 'CAD', 'AUD', 'NOK', 'EUR', 'MXN', 'SEK', 'NZD',
       'CHF', 'DKK', 'HKD', 'SGD', 'JPY'], dtype=object)

array(['GB', 'US', 'CA', 'AU', 'NO', 'IT', 'DE', 'IE', 'MX', 'ES', 'SE',
       'FR', 'NZ', 'CH', 'AT', 'BE', 'DK', 'HK', 'NL', 'LU', 'SG', 'JP'],
      dtype=object)

Unnamed: 0_level_0,name,category,main_category,currency,deadline,goal,launched,pledged,state,backers,country,usdpledged,usd_pledged_real,usd_goal_real
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1000002330,The Songs of Adelaide & Abullah,Poetry,Publishing,GBP,2015-10-09,1000.0,2015-08-11 12:12:28,0.0,0,0,GB,0.0,0.0,1533.95
1000003930,Greeting From Earth: ZGAC Arts Capsule For ET,Narrative Film,Film & Video,USD,2017-11-01,30000.0,2017-09-02 04:43:57,2421.0,0,15,US,100.0,2421.0,30000.0


## Clean indiegogo df

In [226]:
# Indiegogo code to fix dates
indiegogo_df['launched'] = pd.to_datetime(indiegogo_df['date_launch'] + ' ' + indiegogo_df['time_launch'])
indiegogo_df['deadline'] = pd.to_datetime(indiegogo_df['date_end'] + ' ' + indiegogo_df['time_end'])

# Clean up and check categories to compare to indiegogo and make similar later
display(indiegogo_df['main_category'].unique())

# Clean up and check currency to compare to indiegogo and make similar later
display(indiegogo_df['currency'].unique())

# View head
indiegogo_df.head(2)

array(['Transportation', 'Human Rights', 'Photography', 'Camera Gear',
       'Wellness', 'Productivity', 'Audio', 'Tabletop Games',
       'Energy & Green Tech', 'Podcasts, Blogs & Vlogs', 'Culture',
       'Fashion & Wearables', 'Local Businesses', 'Phones & Accessories',
       'Dance & Theater', 'Environment', 'Video Games', 'Home', 'Music',
       'Health & Fitness', 'Education', 'Art', 'Writing & Publishing',
       'Comics', 'Food & Beverages', 'Travel & Outdoors', 'Film',
       'Web Series & TV Shows'], dtype=object)

array(['USD', 'EUR', 'CAD', 'GBP', 'AUD', 'HKD', 'SEK', 'CHF', 'SGD',
       'DKK', 'NOK'], dtype=object)

Unnamed: 0_level_0,currency,main_category,year_end,month_end,day_end,time_end,amount_raised,funded_percent,in_demand,year_launch,...,jun,jul,aug,sep,oct,nov,dec,tperiod,launched,deadline
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3936,USD,Transportation,2010,5,12,23:59:00,840,16.80%,False,2010,...,0,0,0,0,0,0,0,1,2010-04-21 22:38:42,2010-05-12 23:59:00
5109,USD,Human Rights,2010,7,2,23:59:00,250,20.83%,False,2010,...,1,0,0,0,0,0,0,2,2010-06-10 17:47:35,2010-07-02 23:59:00


## Clean small kickstarter df

In [227]:
# Only look at US
kickstarter_small_most_backed_df = kickstarter_small_most_backed_df[kickstarter_small_most_backed_df['currency'] == 'usd']

# Clean up and check categories to compare to indiegogo and make similar later
display(kickstarter_small_most_backed_df['main_category'].unique())

# Clean up and check currency to compare to indiegogo and make similar later
display(kickstarter_small_most_backed_df['currency'].unique())

# View head
display(kickstarter_small_most_backed_df.head(2))

array(['Tabletop Games', 'Product Design', 'Web', 'Narrative Film',
       'Video Games', 'Gaming Hardware', 'Television', 'Hardware',
       'Music', 'Space Exploration', 'Radio &amp; Podcasts', 'Flight',
       'Technology', 'Sound', 'Animation', 'Games', 'Action', 'Gadgets',
       'Documentary', '3D Printing', 'DIY Electronics', 'Publishing',
       'Fashion', 'Comics', 'Typography', "Children's Books", 'Apparel',
       'Food', 'Crafts', 'Camera Equipment', 'Comedy', 'Design',
       'Graphic Design', 'Apps', 'Hip-Hop', 'World Music',
       'Film &amp; Video', 'Drinks', 'Wearables', 'Accessories',
       'Footwear', 'Science Fiction', 'Nonfiction', 'Robots', 'Software',
       'Chiptune', 'Webseries', 'Webcomics', 'Rock', 'Playing Cards',
       'Cookbooks', 'Fantasy', 'Photography', 'Anthologies', 'Faith',
       'Mixed Media', 'Art', 'Metal', 'Small Batch', 'Classical Music',
       'Architecture', 'Pop', 'Dance', 'Electronic Music', 'Art Books',
       'Graphic Novels', 'Music

array(['usd'], dtype=object)

Unnamed: 0_level_0,usd_pledged_real,blurb,by,main_category,currency,usd_goal_real,location,backers,num.backers.tier,pledge.tier,name,url
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,8782571.0,\nThis is a card game for people who are into ...,Elan Lee,Tabletop Games,usd,10000.0,"Los Angeles, CA",219382,"[15505, 202934, 200, 5]","[20.0, 35.0, 100.0, 500.0]",Exploding Kittens,/projects/elanlee/exploding-kittens
1,6465690.0,"\nAn unusually addicting, high-quality desk to...",Matthew and Mark McLachlan,Product Design,usd,15000.0,"Denver, CO",154926,"[788, 250, 43073, 21796, 41727, 21627, 12215, ...","[1.0, 14.0, 19.0, 19.0, 35.0, 35.0, 79.0, 79.0...",Fidget Cube: A Vinyl Desk Toy,/projects/antsylabs/fidget-cube-a-vinyl-desk-toy


## Create dfs from selected columns

In [228]:
# List of kickstart and indiegogo columns to drop
kickstarter_cols_drop = ['category', 'goal', 'pledged', 'usdpledged']
kickstarter_small_cols_drop = ['blurb', 'by', 'url']
indiegogo_cols_drop = ['year_end',
'month_end',
'day_end',
'amount_raised',
'in_demand',
'year_launch',
'month_launch',
'day_launch',
'tagline',
'url',
'australia',
'canada',
'switzerland',
'denmark',
'western_europe',
'great_britain',
'hong_kong',
'norway',
'sweden',
'singapore',
'united_states',
'education',
'productivity',
'energy_greentech',
'wellness',
'comics',
'fashion_wearables',
'video_games',
'photography',
'tv_shows',
'dance_theater',
'phones_accessories',
'audio',
'film',
'transportation',
'art',
'environment',
'writing_publishing',
'music',
'travel_outdoors',
'health_fitness',
'tabletop_games',
'home',
'local_business',
'food_beverage',
'culture',
'human_rights',
'podcasts_vlogs',
'camera_gear',
'time_end',
'time_launch',
'date_launch',
'date_end',
'jan',
'feb',
'mar',
'apr',
'may',
'jun',
'jul',
'aug',
'sep',
'oct',
'nov',
'dec',
'tperiod']

# Selected columns kickstarter df
kickstarter_selected_cols_large_recent_df = kickstarter_large_recent_df.drop(kickstarter_cols_drop, axis=1)

# View kickstarter selected head
display(kickstarter_selected_cols_large_recent_df.head(2))

# Selected columns kickstarter small df
kickstarter_selected_cols_small_most_backed_df = kickstarter_small_most_backed_df.drop(kickstarter_small_cols_drop, axis=1)

# View kickstarter small selected head
display(kickstarter_selected_cols_small_most_backed_df.head(2))

# Selected columns indiegogo df
indiegogo_selected_cols_df = indiegogo_df.drop(indiegogo_cols_drop, axis=1)

# View indiegogo selected head
display(indiegogo_selected_cols_df.head(2))

Unnamed: 0_level_0,name,main_category,currency,deadline,launched,state,backers,country,usd_pledged_real,usd_goal_real
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1000002330,The Songs of Adelaide & Abullah,Publishing,GBP,2015-10-09,2015-08-11 12:12:28,0,0,GB,0.0,1533.95
1000003930,Greeting From Earth: ZGAC Arts Capsule For ET,Film & Video,USD,2017-11-01,2017-09-02 04:43:57,0,15,US,2421.0,30000.0


Unnamed: 0_level_0,usd_pledged_real,main_category,currency,usd_goal_real,location,backers,num.backers.tier,pledge.tier,name
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,8782571.0,Tabletop Games,usd,10000.0,"Los Angeles, CA",219382,"[15505, 202934, 200, 5]","[20.0, 35.0, 100.0, 500.0]",Exploding Kittens
1,6465690.0,Product Design,usd,15000.0,"Denver, CO",154926,"[788, 250, 43073, 21796, 41727, 21627, 12215, ...","[1.0, 14.0, 19.0, 19.0, 35.0, 35.0, 79.0, 79.0...",Fidget Cube: A Vinyl Desk Toy


Unnamed: 0_level_0,currency,main_category,funded_percent,name,state,usd_pledged_real,usd_goal_real,launched,deadline
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
3936,USD,Transportation,16.80%,Join the Electric Revolution!!!,0,840.0,5000.0,2010-04-21 22:38:42,2010-05-12 23:59:00
5109,USD,Human Rights,20.83%,Relief Trip to Haiti,0,250.0,1200.0,2010-06-10 17:47:35,2010-07-02 23:59:00


## Add new kickstarter cols

In [229]:
# Duration
kickstarter_selected_cols_large_recent_df['duration'] = kickstarter_selected_cols_large_recent_df['deadline'] - kickstarter_selected_cols_large_recent_df['launched'] 
kickstarter_selected_cols_large_recent_df['duration'] = kickstarter_selected_cols_large_recent_df['duration'].dt.days

# Daily Goal 
kickstarter_selected_cols_large_recent_df['daily_goal'] = round(kickstarter_selected_cols_large_recent_df['usd_goal_real'] / kickstarter_selected_cols_large_recent_df['duration'],2)

# Daily Pledged
kickstarter_selected_cols_large_recent_df['daily_pledged'] = round(kickstarter_selected_cols_large_recent_df['usd_pledged_real'] / kickstarter_selected_cols_large_recent_df['duration'],2)

# Funded Percentage
kickstarter_selected_cols_large_recent_df['funded_percent'] = round(kickstarter_selected_cols_large_recent_df['usd_pledged_real'] / kickstarter_selected_cols_large_recent_df['usd_goal_real'],4)

# Average Backer Per Day
kickstarter_selected_cols_large_recent_df['avg_backer_per_day'] = round(kickstarter_selected_cols_large_recent_df['backers'] / kickstarter_selected_cols_large_recent_df['duration'],2)

# Pledged Per Person
kickstarter_selected_cols_large_recent_df['pledged_per_person'] = round(kickstarter_selected_cols_large_recent_df['usd_pledged_real'] / kickstarter_selected_cols_large_recent_df['backers'],2)
kickstarter_selected_cols_large_recent_df['pledged_per_person'] = kickstarter_selected_cols_large_recent_df['pledged_per_person'].fillna(0)

# Fix types
kickstarter_selected_cols_large_recent_df = kickstarter_selected_cols_large_recent_df.astype({"funded_percent": 'float', "avg_backer_per_day":'float', "pledged_per_person": 'float'})

# View head
display(kickstarter_selected_cols_large_recent_df.head(2))

Unnamed: 0_level_0,name,main_category,currency,deadline,launched,state,backers,country,usd_pledged_real,usd_goal_real,duration,daily_goal,daily_pledged,funded_percent,avg_backer_per_day,pledged_per_person
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
1000002330,The Songs of Adelaide & Abullah,Publishing,GBP,2015-10-09,2015-08-11 12:12:28,0,0,GB,0.0,1533.95,58,26.45,0.0,0.0,0.0,0.0
1000003930,Greeting From Earth: ZGAC Arts Capsule For ET,Film & Video,USD,2017-11-01,2017-09-02 04:43:57,0,15,US,2421.0,30000.0,59,508.47,41.03,0.0807,0.25,161.4


## Add new indiegogo cols

In [230]:
# Duration
indiegogo_selected_cols_df['duration'] = indiegogo_selected_cols_df['deadline'] - indiegogo_selected_cols_df['launched'] 
indiegogo_selected_cols_df['duration'] = indiegogo_selected_cols_df['duration'].dt.days

# Daily Goal 
indiegogo_selected_cols_df['daily_goal'] = round(indiegogo_selected_cols_df['usd_goal_real'] / indiegogo_selected_cols_df['duration'],2)

# Daily Pledged
indiegogo_selected_cols_df['daily_pledged'] = round(indiegogo_selected_cols_df['usd_pledged_real'] / indiegogo_selected_cols_df['duration'],2)

# Funded Percentage
indiegogo_selected_cols_df['funded_percent'] = round(indiegogo_selected_cols_df['usd_pledged_real'] / indiegogo_selected_cols_df['usd_goal_real'],4)

# Fix types
indiegogo_selected_cols_df = indiegogo_selected_cols_df.astype({"funded_percent": 'float'})

# View head
display(indiegogo_selected_cols_df.head(2))

Unnamed: 0_level_0,currency,main_category,funded_percent,name,state,usd_pledged_real,usd_goal_real,launched,deadline,duration,daily_goal,daily_pledged
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3936,USD,Transportation,0.168,Join the Electric Revolution!!!,0,840.0,5000.0,2010-04-21 22:38:42,2010-05-12 23:59:00,21,238.1,40.0
5109,USD,Human Rights,0.2083,Relief Trip to Haiti,0,250.0,1200.0,2010-06-10 17:47:35,2010-07-02 23:59:00,22,54.55,11.36


## Reorder cols

In [235]:
# Base column names for reordering
base_order = ['name', 'main_category', 'currency', 'usd_goal_real', 'usd_pledged_real', 'deadline', 'launched', 'state', 'funded_percent','duration', 'daily_goal', 'daily_pledged']
small_kickstarter_order = ['name', 'main_category', 'currency', 'usd_goal_real', 'usd_pledged_real', 'location', 'backers', 'num.backers.tier', 'pledge.tier']
kickstarter_order = base_order + ['country', 'avg_backer_per_day', 'pledged_per_person']
indiegogo_order = base_order

# Reorder each df
kickstarter_selected_cols_large_recent_df = kickstarter_selected_cols_large_recent_df[kickstarter_order]
kickstarter_selected_cols_small_most_backed_df = kickstarter_selected_cols_small_most_backed_df[small_kickstarter_order]
indiegogo_selected_cols_df = indiegogo_selected_cols_df[indiegogo_order]

# Display
display(kickstarter_selected_cols_large_recent_df.head(2))
display(kickstarter_selected_cols_small_most_backed_df.head(2))
display(indiegogo_selected_cols_df.head(2))

Unnamed: 0_level_0,name,main_category,currency,usd_goal_real,usd_pledged_real,deadline,launched,state,funded_percent,duration,daily_goal,daily_pledged,country,avg_backer_per_day,pledged_per_person
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1000002330,The Songs of Adelaide & Abullah,Publishing,GBP,1533.95,0.0,2015-10-09,2015-08-11 12:12:28,0,0.0,58,26.45,0.0,GB,0.0,0.0
1000003930,Greeting From Earth: ZGAC Arts Capsule For ET,Film & Video,USD,30000.0,2421.0,2017-11-01,2017-09-02 04:43:57,0,0.0807,59,508.47,41.03,US,0.25,161.4


Unnamed: 0_level_0,name,main_category,currency,usd_goal_real,usd_pledged_real,location,backers,num.backers.tier,pledge.tier
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0,Exploding Kittens,Tabletop Games,usd,10000.0,8782571.0,"Los Angeles, CA",219382,"[15505, 202934, 200, 5]","[20.0, 35.0, 100.0, 500.0]"
1,Fidget Cube: A Vinyl Desk Toy,Product Design,usd,15000.0,6465690.0,"Denver, CO",154926,"[788, 250, 43073, 21796, 41727, 21627, 12215, ...","[1.0, 14.0, 19.0, 19.0, 35.0, 35.0, 79.0, 79.0..."


Unnamed: 0_level_0,name,main_category,currency,usd_goal_real,usd_pledged_real,deadline,launched,state,funded_percent,duration,daily_goal,daily_pledged
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3936,Join the Electric Revolution!!!,Transportation,USD,5000.0,840.0,2010-05-12 23:59:00,2010-04-21 22:38:42,0,0.168,21,238.1,40.0
5109,Relief Trip to Haiti,Human Rights,USD,1200.0,250.0,2010-07-02 23:59:00,2010-06-10 17:47:35,0,0.2083,22,54.55,11.36


## Add SQL database and simple functions to load and access tables in that database

In [130]:
# Establishes Database Connection with a temporary SQL db (we can update to give it a name later)
database_connection_string = "sqlite:///"

engine = sqlalchemy.create_engine(database_connection_string)

# Function to load table into DB
# data is the dataframe we want to save, 
# table name is the name of the new table (as a string value), 
# and engine is the engine input established earlier
def new_table(data, table_name):
    data.to_sql(f"{table_name}", engine, index=True, if_exists="replace")


# Lets us load the table of our choice from the database, just set the function equal to a new dataframe variable and run 
# must set the table name as a string value
def load_full_table(table_name):
    new_df = pd.read_sql_table(f"{table_name}", con=engine )
    return new_df

new_table(kickstarter_selected_cols_large_recent_df, "kickstarter_large")
new_table(kickstarter_selected_cols_small_most_backed_df, "kickstarter_small")
new_table(indiegogo_selected_cols_df, "indie_gogo")
