In [9]:
import os
import time
import pandas as pd
from datetime import datetime


def cleanData(data, scrapeDate):
    # Remove ongoing projects (at the time of scraping)
    scrapeDateUNIX = time.mktime(datetime(scrapeDate[0], scrapeDate[1], scrapeDate[2], 12, 0, 0).timetuple())  # conversion to UNIX timestamp
    data = data[(data['deadline'] < scrapeDateUNIX)]

    # Modifying columns
    data = data.set_index('id')
    data['goal'] *= data['fx_rate']
    data['pledged'] *= data['fx_rate']

    # Creating new columns
    data['result'] = (data['pledged'] >= data['goal'])
    data['total failure'] = (data['pledged'] == 0)
    data['duration'] = ((data['deadline'] - data['launched_at']) / 3600 / 24)
    data['U.S.'] = (data['country'] == "US")

    # Modifying columns, again
    data['result'] = data['result'].replace({True: 1, False: 0})
    data['total failure'] = data['total failure'].replace({True: 1, False: 0})
    data['U.S.'] = data['U.S.'].replace({True: 1, False: 0})

    # Misc
    data = data.drop(columns=['blurb', 'category', 'converted_pledged_amount', 'country', 'created_at', 'currency', 'currency_symbol', 'currency_trailing_code', 'current_currency', 'deadline', 'disable_communication', 'friends', 'fx_rate', 'is_backing', 'is_starrable', 'is_starred', 'launched_at', 'location', 'name', 'permissions', 'photo', 'slug', 'source_url', 'spotlight', 'staff_pick', 'state', 'state_changed_at', 'static_usd_rate', 'unread_messages_count', 'unseen_activity_count', 'usd_pledged', 'usd_type'])
    data = data.rename(columns={"backers_count": "no_backers"})
    return data

def getDateFromFileName(fileName):
    split = fileName.split('-')
    del split[2]
    return '-'.join(split)

files = os.listdir("Kickstarter Data")
scrapeDates = [
    [2019, 6, 13],
    [2019, 7, 18],
    [2019, 8, 15],
    [2019, 9, 12],
    [2019, 10, 17],
    [2019, 11, 14],
    [2019, 12, 12],
    [2020, 1, 16],
    [2020, 2, 13],
    [2020, 3, 12],
    [2020, 4, 16],
    [2020, 5, 14],
    [2020, 6, 18],
    [2020, 7, 16],
    [2020, 8, 13],
    [2020, 9, 17],
    [2020, 10, 15],
    [2020, 11, 12],
    [2020, 12, 17],
    [2021, 1, 14],
    [2021, 2, 11],
    [2021, 3, 18],
    [2021, 4, 15],
    [2021, 5, 17],
    [2021, 6, 17],
    [2021, 7, 15],
    [2021, 8, 12],
    [2021, 9, 16],
    [2021, 10, 15],
    [2021, 11, 19],
    [2021, 12, 14],
    [2022, 1, 20],
    [2022, 2, 10],
    [2022, 3, 24],
    [2022, 4, 21],
    [2022, 5, 19],
    [2022, 6, 9],
    [2022, 7, 14],
    [2022, 8, 11]
]

allData = cleanData(pd.read_csv("Kickstarter Data\\" + files[0]), scrapeDates[0])

previousDate = getDateFromFileName(files.pop(0))
scrapeDate = scrapeDates.pop(0)
for file in files[:3]:
    date = getDateFromFileName(file)
    if date != previousDate:
        previousDate = date
        scrapeDate = scrapeDates.pop(0)

    allData = pd.concat([allData, cleanData(pd.read_csv("Kickstarter Data\\" + file), scrapeDate)])

allData

Unnamed: 0_level_0,no_backers,creator,goal,pledged,profile,urls,result,total failure,duration,U.S.
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1043160117,740,"{""id"":518230978,""name"":""Andy Swartz"",""slug"":""s...",3400.000000,18559.000000,"{""id"":2246288,""project_id"":2246288,""state"":""ac...","{""web"":{""project"":""https://www.kickstarter.com...",1,0,31.003067,1
765668830,3,"{""id"":2031793373,""name"":""Julian Mason"",""is_reg...",4000.000000,80.000000,"{""id"":2944476,""project_id"":2944476,""state"":""in...","{""web"":{""project"":""https://www.kickstarter.com...",0,0,58.959757,1
1156174822,1752,"{""id"":1216340396,""name"":""Robert J Woodhead"",""s...",30000.000000,135589.940000,"{""id"":1985482,""project_id"":1985482,""state"":""ac...","{""web"":{""project"":""https://www.kickstarter.com...",1,0,29.958333,1
885026214,185,"{""id"":672576444,""name"":""Tristan J Tarwater"",""i...",4000.000000,4652.000000,"{""id"":2309240,""project_id"":2309240,""state"":""ac...","{""web"":{""project"":""https://www.kickstarter.com...",1,0,30.000000,1
1866008711,111,"{""id"":694896694,""name"":""Tabitha Jensen"",""slug""...",4900.000000,5630.000000,"{""id"":3398789,""project_id"":3398789,""state"":""ac...","{""web"":{""project"":""https://www.kickstarter.com...",1,0,30.003738,1
...,...,...,...,...,...,...,...,...,...,...
806791969,68,"{""id"":1759352588,""name"":""Cary Wang"",""is_regist...",10271.204826,15112.166108,"{""id"":3137607,""project_id"":3137607,""state"":""ac...","{""web"":{""project"":""https://www.kickstarter.com...",1,0,60.041667,0
1032753050,89,"{""id"":216073473,""name"":""Zane Fielding"",""is_reg...",2800.000000,2825.430000,"{""id"":68766,""project_id"":68766,""state"":""inacti...","{""web"":{""project"":""https://www.kickstarter.com...",1,0,35.075231,1
1629091264,0,"{""id"":1907943637,""name"":""Mike Holthaus"",""is_re...",21000.000000,0.000000,"{""id"":1111261,""project_id"":1111261,""state"":""in...","{""web"":{""project"":""https://www.kickstarter.com...",0,1,39.124745,1
1032803584,1,"{""id"":655015056,""name"":""Julie Rodgers"",""is_reg...",550.000000,20.000000,"{""id"":1039712,""project_id"":1039712,""state"":""in...","{""web"":{""project"":""https://www.kickstarter.com...",0,0,30.000000,1
