In [1]:
import pandas as pd
import numpy as np
import json

In [2]:
#load dataset
data = pd.read_csv('Kickstarter_dataset/dataset.csv', low_memory = False)

#check the data
data.head()

Unnamed: 0,backers_count,blurb,category,converted_pledged_amount,country,country_displayable_name,created_at,creator,currency,currency_symbol,...,source_url,spotlight,staff_pick,state,state_changed_at,static_usd_rate,urls,usd_exchange_rate,usd_pledged,usd_type
0,0,Sharing the knowledge of the Transcendent Trut...,"{""id"":49,""name"":""Periodicals"",""analytics_name""...",0,US,the United States,1423788884,"{""id"":1203053572,""name"":""Arman Beisembayev (de...",USD,$,...,https://www.kickstarter.com/discover/categorie...,False,False,canceled,1427835176,1,"{""web"":{""project"":""https://www.kickstarter.com...",1,0.0,international
1,0,Hayati is a High Fashion magazine geared towar...,"{""id"":49,""name"":""Periodicals"",""analytics_name""...",0,US,the United States,1417529406,"{""id"":2111265406,""name"":""Hayati Magazine (dele...",USD,$,...,https://www.kickstarter.com/discover/categorie...,False,False,canceled,1417818077,1,"{""web"":{""project"":""https://www.kickstarter.com...",1,0.0,international
2,148,An optical illusion temporary tattoo. Point yo...,"{""id"":260,""name"":""Interactive Design"",""analyti...",1457,US,the United States,1457911892,"{""id"":2102688893,""name"":""Pablo Garcia"",""slug"":...",USD,$,...,https://www.kickstarter.com/discover/categorie...,True,True,successful,1459908000,1,"{""web"":{""project"":""https://www.kickstarter.com...",1,1457.01,international
3,57,A children's book in full color. It is writte...,"{""id"":22,""name"":""Illustration"",""analytics_name...",5627,US,the United States,1579779003,"{""id"":569631783,""name"":""Alan Hawley"",""is_regis...",USD,$,...,https://www.kickstarter.com/discover/categorie...,True,False,successful,1593605260,1,"{""web"":{""project"":""https://www.kickstarter.com...",1,5627.0,international
4,51,Help Lauren Elens bring a New Orleans-Style Sn...,"{""id"":311,""name"":""Food Trucks"",""analytics_name...",5050,US,the United States,1494356186,"{""id"":1343144681,""name"":""Lauren Elens"",""is_reg...",USD,$,...,https://www.kickstarter.com/discover/categorie...,True,False,successful,1496952855,1,"{""web"":{""project"":""https://www.kickstarter.com...",1,5050.0,international


In [3]:
#check data info
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230069 entries, 0 to 230068
Data columns (total 39 columns):
 #   Column                    Non-Null Count   Dtype 
---  ------                    --------------   ----- 
 0   backers_count             230069 non-null  object
 1   blurb                     230060 non-null  object
 2   category                  230069 non-null  object
 3   converted_pledged_amount  230069 non-null  object
 4   country                   230069 non-null  object
 5   country_displayable_name  230069 non-null  object
 6   created_at                230069 non-null  object
 7   creator                   230069 non-null  object
 8   currency                  230069 non-null  object
 9   currency_symbol           230069 non-null  object
 10  currency_trailing_code    230069 non-null  object
 11  current_currency          230069 non-null  object
 12  deadline                  230069 non-null  object
 13  disable_communication     230069 non-null  object
 14  frie

Description of each column:

1. backers_count - number of people who contributed funds to the project
2. blurb - short description of the project
3. category - json containing the parent category and sub-category of the project
4. converted_pledged_amount - amount of money pledged, converted to the currency in the 'current_currency' column
5. country - country the project creator is from
6. country_displayable_name - display name of the country where the project creator is from
6. created_at - date and time of when the project was initially created on Kickstarter
7. creator - json containing name of the project creator and other information about them, e.g. Kickstarter id number
8. currency - original currency the project goal was denominated in
9. currency_symbol - symbol of the original currency the project goal was denominated in
10. currency_trailing_code - code of the original currency the project goal was denominated in
11. current_currency - currency the project goal was converted to
12. deadline - date and time of when the project will close for donations
13. disable_communication - whether or not a project owner disabled communication with their backers
14. friends - unclear (null or empty)
15. fx_rate - foreign exchange rate between the original currency and the current_currency
16. goal - funding goal
17. id - id number of the project
18. is_backing - unclear (null or false)
19. is_starrable - whether or not a project can be starred (liked and saved) by users
20. is_starred - whether or not a project has been starred (liked and saved) by users
21. launched_at - date and time of when the project was launched for funding
22. location - contains the town or city of the project creator
23. name - name of the project
24. permissions - unclear (null or empty)
25. photo - contains a link and information to the project's photo/s
26. pledged - amount pledged in the current_currency
27. profile - details about the project's profile, including id number and various visual settings
28. slug - name of the project with hyphens instead of spaces
29. source_url - url for the project's category
30. spotlight - after a project has been successful, it is spotlighted on the Kickstarter website
31. staff_pick - whether a project was highlighted as a staff_pick when it was launched/live
32. state - whether a project was successful, failed, canceled, suspending or still live
33. state_changed_at - date and time of when a project's status was changed (same as the deadline for successful and failed projects)
34. static_usd_rate - conversion rate between the original currency and USD
35. urls - url to the project's page
36. usd_exchange_rate - conversion rate to USD
37. usd_pledged - amount pledged in USD
38. usd_type - domestic or international

In [4]:
#Check unique category of the project state
print(f"Unique project states: {data['state'].unique()}")

Unique project states: ['canceled' 'successful' 'failed' 'live' 'state']


In [5]:
#Since predictor model will be based on completed projects, drop projects with canceled, live or state status

data = data[~data['state'].isin(['canceled','live', 'state'])]

In [6]:
#check 
print(f"Unique project states: {data['state'].unique()}")

Unique project states: ['successful' 'failed']


In [7]:
#check data info
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 215063 entries, 2 to 230068
Data columns (total 39 columns):
 #   Column                    Non-Null Count   Dtype 
---  ------                    --------------   ----- 
 0   backers_count             215063 non-null  object
 1   blurb                     215061 non-null  object
 2   category                  215063 non-null  object
 3   converted_pledged_amount  215063 non-null  object
 4   country                   215063 non-null  object
 5   country_displayable_name  215063 non-null  object
 6   created_at                215063 non-null  object
 7   creator                   215063 non-null  object
 8   currency                  215063 non-null  object
 9   currency_symbol           215063 non-null  object
 10  currency_trailing_code    215063 non-null  object
 11  current_currency          215063 non-null  object
 12  deadline                  215063 non-null  object
 13  disable_communication     215063 non-null  object
 14  frie

In [8]:
#drop columns that will definitely not be used for prediction
data = data.drop(['creator', 'disable_communication', 'friends', 'permissions', 'photo', 'profile', 'source_url', 'urls', 
                  'currency_trailing_code','usd_type', 'is_backing', 'is_starrable', 'is_starred'], axis=1)

In [9]:
#check for duplicate projects (rows)
data[data.duplicated(subset=['id', 'state'], keep=False)]

Unnamed: 0,backers_count,blurb,category,converted_pledged_amount,country,country_displayable_name,created_at,currency,currency_symbol,current_currency,...,name,pledged,slug,spotlight,staff_pick,state,state_changed_at,static_usd_rate,usd_exchange_rate,usd_pledged
9,1127,"Remembering Elephants sequel, featuring rhino ...","{""id"":275,""name"":""Animals"",""analytics_name"":""A...",125841,GB,the United Kingdom,1482335513,GBP,£,USD,...,Remembering Rhinos,103447,remembering-rhinos,true,true,successful,1489303284,1.25158325,1.21647842,129472.532462749994
16,49,I'm trying to establish my first exhibition in...,"{""id"":275,""name"":""Animals"",""analytics_name"":""A...",2015,NO,Norway,1444827277,NOK,kr,USD,...,Dogs Best Man,17350,dogs-best-man,true,false,successful,1447273386,0.12345828,0.11619267,2142.001158
43,14,Heart Whiskers is a one-pin project with two s...,"{""id"":262,""name"":""Accessories"",""analytics_name...",300,US,the United States,1650317861,USD,$,USD,...,Heart Whiskers: Hard Enamel Cat Pin(s),300,heart-whiskers-hard-enamel-cat-pins,true,false,successful,1652554958,1,1,300.0
52,9,"Express your dark side with original, vegan fr...","{""id"":263,""name"":""Apparel"",""analytics_name"":""A...",658,IT,Italy,1649427736,EUR,€,USD,...,SAINT MAYHEM: alternative ethical fashion from...,626,saint-mayhem-alternative-ethical-fashion-from-...,true,true,successful,1651240801,1.09072069,1.05132793,682.79115194
53,18,Cute BTS inspired minimalistic clothing for AR...,"{""id"":263,""name"":""Apparel"",""analytics_name"":""A...",2042,CA,Canada,1619735758,CAD,$,USD,...,For ARMY by ARMY ♡: A BTS Inspired Streetwear ...,2621.29,for-army-by-army-a-bts-inspired-streetwear-col...,true,false,successful,1652914173,0.79085769,0.77906436,2073.0673542201
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230049,119,An art-filled calendar to help you sync with t...,"{""id"":325,""name"":""Calendars"",""analytics_name"":...",7646,US,the United States,1637078783,USD,$,USD,...,2022 Lunar Calendar,7646,2022-lunar-calendar,true,true,successful,1639332520,1,1,7646.0
230050,150,Join the zookeepers in Australia as they help ...,"{""id"":46,""name"":""Children's Books"",""analytics_...",4026,AU,Australia,1630932943,AUD,$,USD,...,The Zookeepers' Quest - The Koala and the Bush...,5650.32,the-zookeepers-quest-the-koala-and-the-bushfire,true,true,successful,1639814444,0.71229992,0.71264967,4024.7224839744
230051,7425,Epithet Erased's first full-length novel (and ...,"{""id"":387,""name"":""Comedy"",""analytics_name"":""Co...",358559,US,the United States,1637856535,USD,$,USD,...,Epithet Erased - Prison of Plastic,358559.140000000014,epithet-erased-prison-of-plastic,true,true,successful,1640537750,1,1,358559.140000000014
230059,1500,2X Capacity | Charge Level Indicator ｜Quick Re...,"{""id"":333,""name"":""Camera Equipment"",""analytics...",221290,US,the United States,1604055233,USD,$,USD,...,The Camera Battery Re-invented,221290,the-camera-battery-re-invented,true,false,successful,1608390005,1,1,221290.0


In [10]:
#drop duplicate rows
data.drop_duplicates(subset=['id', 'state'], keep='first', inplace = True, ignore_index = True)

In [11]:
#check shape of data
data.shape

(189334, 26)

In [12]:
#extract category name
#check json
cat = data.iloc[0]['category']

cat

'{"id":260,"name":"Interactive Design","analytics_name":"Interactive Design","slug":"design/interactive design","position":4,"parent_id":7,"parent_name":"Design","color":2577151,"urls":{"web":{"discover":"http://www.kickstarter.com/discover/categories/design/interactive%20design"}}}'

In [13]:
#define a function to extract the parent category and sub-category name
def extract_category(df):
    '''This function takes a dataframe as input and 
    extracts the sub_category and parent_category of the project from the category json'''
    
    #get category json for all projects
    category_info = [x for x in df['category']]
    
    #empty list to hold sub_category, parent_category
    sub_category = []
    parent_category = []
    
    for i in range(len(category_info)):
                    
        #get the sub_category and parent_category values and add to list
        sub_category.append((json.loads(category_info[i]))['name'])
        try:
            parent_category.append((json.loads(category_info[i]))['parent_name'])
        except KeyError:
            #fill missing values with sub_category
            parent_category.append((json.loads(category_info[i]))['name'])
            
    df['sub_category'] = sub_category
    df['parent_category'] = parent_category
    df = df.drop('category', axis = 1)
    return df

In [14]:
#apply function to data dataframe
data = extract_category(data)

#check
data.columns

Index(['backers_count', 'blurb', 'converted_pledged_amount', 'country',
       'country_displayable_name', 'created_at', 'currency', 'currency_symbol',
       'current_currency', 'deadline', 'fx_rate', 'goal', 'id', 'launched_at',
       'location', 'name', 'pledged', 'slug', 'spotlight', 'staff_pick',
       'state', 'state_changed_at', 'static_usd_rate', 'usd_exchange_rate',
       'usd_pledged', 'sub_category', 'parent_category'],
      dtype='object')

In [15]:
#check sub_categories
data['sub_category'].value_counts()

Web                 3805
Comedy              2952
Webseries           2394
Graphic Design      2391
Electronic Music    2390
                    ... 
Quilts                90
Residencies           88
Chiptune              52
Games                 39
Taxidermy             11
Name: sub_category, Length: 161, dtype: int64

In [16]:
#check parent_categories
data['parent_category'].value_counts()

Music           26275
Film & Video    26197
Art             20311
Technology      19711
Publishing      19380
Food            15336
Games           11496
Fashion         10381
Comics           7181
Design           6979
Crafts           6656
Photography      6654
Theater          5421
Journalism       4244
Dance            3112
Name: parent_category, dtype: int64

In [17]:
#convert date columns to integer type first and then to datetime type
data['created_at'] = data['created_at'].astype(int) 
data['created_at'] = pd.to_datetime(data['created_at'], unit='s')

data['deadline'] = data['deadline'].astype(int)
data['deadline'] = pd.to_datetime(data['deadline'], unit='s')

data['launched_at'] = data['launched_at'].astype(int)
data['launched_at'] = pd.to_datetime(data['launched_at'], unit='s')

data['state_changed_at'] = data['state_changed_at'].astype(int)
data['state_changed_at'] = pd.to_datetime(data['state_changed_at'], unit='s')

In [18]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 189334 entries, 0 to 189333
Data columns (total 27 columns):
 #   Column                    Non-Null Count   Dtype         
---  ------                    --------------   -----         
 0   backers_count             189334 non-null  object        
 1   blurb                     189332 non-null  object        
 2   converted_pledged_amount  189334 non-null  object        
 3   country                   189334 non-null  object        
 4   country_displayable_name  189334 non-null  object        
 5   created_at                189334 non-null  datetime64[ns]
 6   currency                  189334 non-null  object        
 7   currency_symbol           189334 non-null  object        
 8   current_currency          189334 non-null  object        
 9   deadline                  189334 non-null  datetime64[ns]
 10  fx_rate                   189334 non-null  object        
 11  goal                      189334 non-null  object        
 12  id

In [19]:
#check dataset
data.head()

Unnamed: 0,backers_count,blurb,converted_pledged_amount,country,country_displayable_name,created_at,currency,currency_symbol,current_currency,deadline,...,slug,spotlight,staff_pick,state,state_changed_at,static_usd_rate,usd_exchange_rate,usd_pledged,sub_category,parent_category
0,148,An optical illusion temporary tattoo. Point yo...,1457,US,the United States,2016-03-13 23:31:32,USD,$,USD,2016-04-06 02:00:00,...,memento-mori-tattoo,True,True,successful,2016-04-06 02:00:00,1,1,1457.01,Interactive Design,Design
1,57,A children's book in full color. It is writte...,5627,US,the United States,2020-01-23 11:30:03,USD,$,USD,2020-07-01 12:07:40,...,the-hero,True,False,successful,2020-07-01 12:07:40,1,1,5627.0,Illustration,Art
2,51,Help Lauren Elens bring a New Orleans-Style Sn...,5050,US,the United States,2017-05-09 18:56:26,USD,$,USD,2017-06-08 20:14:13,...,sara-lous-new-orleans-style-sno-balls,True,False,successful,2017-06-08 20:14:15,1,1,5050.0,Food Trucks,Food
3,44,I'm a Taco Cart brought out of Mexico. The hum...,5580,US,the United States,2016-08-22 16:30:07,USD,$,USD,2017-04-22 18:36:25,...,colorocko-taco,True,False,successful,2017-04-22 18:36:25,1,1,5580.0,Food Trucks,Food
4,19,A full length concert video of Team 4 of the 2...,610,US,the United States,2019-08-01 17:06:23,USD,$,USD,2019-08-08 20:15:23,...,school-of-rock-allstars-team-4-concert-video,True,False,successful,2019-08-08 20:15:23,1,1,610.0,Rock,Music


In [20]:
#check countries in data
data['country'].value_counts()

US    128337
GB     21559
CA      8902
AU      4493
DE      3729
MX      3087
FR      3012
IT      2648
ES      2332
NL      1677
HK      1623
SE      1412
DK       874
SG       824
NZ       817
JP       796
CH       703
IE       607
BE       592
AT       510
NO       439
PL       175
GR        91
LU        62
SI        33
Name: country, dtype: int64

In [21]:
#keep countries with at least 1000 projects and convert the rest to 'Other'
data.replace({"DK" :"Other", "SG": "Other", "NZ": "Other", "JP": "Other", "CH": "Other", "IE": "Other",
              "BE": "Other", "AT": "Other", "NO": "Other", "PL": "Other", "GR": "Other", "LU": "Other",
              "SI": "Other"}, inplace = True)

#check
data['country'].value_counts()

US       128337
GB        21559
CA         8902
Other      6523
AU         4493
DE         3729
MX         3087
FR         3012
IT         2648
ES         2332
NL         1677
HK         1623
SE         1412
Name: country, dtype: int64

In [22]:
#convert goal amount to USD using static_usd_rate as that's the rate used to compute usd_pledged

data['goal'] = data['goal'].astype(float)
data['static_usd_rate'] = data['static_usd_rate'].astype(float)

data['goal_usd'] = data['goal'] * data['static_usd_rate']

In [23]:
data.columns

Index(['backers_count', 'blurb', 'converted_pledged_amount', 'country',
       'country_displayable_name', 'created_at', 'currency', 'currency_symbol',
       'current_currency', 'deadline', 'fx_rate', 'goal', 'id', 'launched_at',
       'location', 'name', 'pledged', 'slug', 'spotlight', 'staff_pick',
       'state', 'state_changed_at', 'static_usd_rate', 'usd_exchange_rate',
       'usd_pledged', 'sub_category', 'parent_category', 'goal_usd'],
      dtype='object')

In [24]:
#clean project title in 'name' column by converting to lower case and remove punctuations
def clean_text(df, column_name):
    '''This function takes a dataframe and column name as input and converts the column to lowercase, removes punctuation and 
    returns a dataframe with the cleaned column and word count of the column'''
    
    texts = [x for x in df[column_name]]
  
    new_texts = []
    word_count = []
    
    punctuation = "!@~`#$%^&*()_-+=]\"[{}:;'?/|\><,."
    
    for text in texts:
        text = str(text).lower()
        text_list = text.split()
        for char in text:
            if char in punctuation:
                text = text.replace(char, '')
        new_texts.append(text)
        word_count.append(len(text_list))
    df[column_name] = new_texts
    df[f"{column_name}_count"] = word_count
    return df

In [25]:
#apply function to clean 'name' column
data = clean_text(data, 'name')

#check
data.head()

Unnamed: 0,backers_count,blurb,converted_pledged_amount,country,country_displayable_name,created_at,currency,currency_symbol,current_currency,deadline,...,staff_pick,state,state_changed_at,static_usd_rate,usd_exchange_rate,usd_pledged,sub_category,parent_category,goal_usd,name_count
0,148,An optical illusion temporary tattoo. Point yo...,1457,US,the United States,2016-03-13 23:31:32,USD,$,USD,2016-04-06 02:00:00,...,True,successful,2016-04-06 02:00:00,1.0,1,1457.01,Interactive Design,Design,100.0,3
1,57,A children's book in full color. It is writte...,5627,US,the United States,2020-01-23 11:30:03,USD,$,USD,2020-07-01 12:07:40,...,False,successful,2020-07-01 12:07:40,1.0,1,5627.0,Illustration,Art,5000.0,2
2,51,Help Lauren Elens bring a New Orleans-Style Sn...,5050,US,the United States,2017-05-09 18:56:26,USD,$,USD,2017-06-08 20:14:13,...,False,successful,2017-06-08 20:14:15,1.0,1,5050.0,Food Trucks,Food,5000.0,6
3,44,I'm a Taco Cart brought out of Mexico. The hum...,5580,US,the United States,2016-08-22 16:30:07,USD,$,USD,2017-04-22 18:36:25,...,False,successful,2017-04-22 18:36:25,1.0,1,5580.0,Food Trucks,Food,5000.0,2
4,19,A full length concert video of Team 4 of the 2...,610,US,the United States,2019-08-01 17:06:23,USD,$,USD,2019-08-08 20:15:23,...,False,successful,2019-08-08 20:15:23,1.0,1,610.0,Rock,Music,500.0,8


In [26]:
#apply function to 'blurb' column
data = clean_text(data, 'blurb')

#check
data.head()

Unnamed: 0,backers_count,blurb,converted_pledged_amount,country,country_displayable_name,created_at,currency,currency_symbol,current_currency,deadline,...,state,state_changed_at,static_usd_rate,usd_exchange_rate,usd_pledged,sub_category,parent_category,goal_usd,name_count,blurb_count
0,148,an optical illusion temporary tattoo point you...,1457,US,the United States,2016-03-13 23:31:32,USD,$,USD,2016-04-06 02:00:00,...,successful,2016-04-06 02:00:00,1.0,1,1457.01,Interactive Design,Design,100.0,3,16
1,57,a childrens book in full color it is written ...,5627,US,the United States,2020-01-23 11:30:03,USD,$,USD,2020-07-01 12:07:40,...,successful,2020-07-01 12:07:40,1.0,1,5627.0,Illustration,Art,5000.0,2,13
2,51,help lauren elens bring a new orleansstyle sno...,5050,US,the United States,2017-05-09 18:56:26,USD,$,USD,2017-06-08 20:14:13,...,successful,2017-06-08 20:14:15,1.0,1,5050.0,Food Trucks,Food,5000.0,6,14
3,44,im a taco cart brought out of mexico the human...,5580,US,the United States,2016-08-22 16:30:07,USD,$,USD,2017-04-22 18:36:25,...,successful,2017-04-22 18:36:25,1.0,1,5580.0,Food Trucks,Food,5000.0,2,26
4,19,a full length concert video of team 4 of the 2...,610,US,the United States,2019-08-01 17:06:23,USD,$,USD,2019-08-08 20:15:23,...,successful,2019-08-08 20:15:23,1.0,1,610.0,Rock,Music,500.0,8,24


In [27]:
#drop columns 
data = data.drop(['slug','converted_pledged_amount', 'country_displayable_name', 'currency', 'currency_symbol',  
                  'current_currency','fx_rate', 'goal', 'location', 'pledged', 'static_usd_rate', 'usd_exchange_rate'], 
                  axis = 1)

In [28]:
#check data
data.head()

Unnamed: 0,backers_count,blurb,country,created_at,deadline,id,launched_at,name,spotlight,staff_pick,state,state_changed_at,usd_pledged,sub_category,parent_category,goal_usd,name_count,blurb_count
0,148,an optical illusion temporary tattoo point you...,US,2016-03-13 23:31:32,2016-04-06 02:00:00,888748006,2016-03-17 22:24:43,memento mori tattoo,True,True,successful,2016-04-06 02:00:00,1457.01,Interactive Design,Design,100.0,3,16
1,57,a childrens book in full color it is written ...,US,2020-01-23 11:30:03,2020-07-01 12:07:40,1635691370,2020-06-01 12:07:40,the hero,True,False,successful,2020-07-01 12:07:40,5627.0,Illustration,Art,5000.0,2,13
2,51,help lauren elens bring a new orleansstyle sno...,US,2017-05-09 18:56:26,2017-06-08 20:14:13,887441191,2017-05-09 20:14:13,sara lous new orleans style snoballs,True,False,successful,2017-06-08 20:14:15,5050.0,Food Trucks,Food,5000.0,6,14
3,44,im a taco cart brought out of mexico the human...,US,2016-08-22 16:30:07,2017-04-22 18:36:25,1015963105,2017-03-23 18:36:25,colorocko taco,True,False,successful,2017-04-22 18:36:25,5580.0,Food Trucks,Food,5000.0,2,26
4,19,a full length concert video of team 4 of the 2...,US,2019-08-01 17:06:23,2019-08-08 20:15:23,1328231502,2019-08-01 20:15:23,school of rock allstars team 4 concert video,True,False,successful,2019-08-08 20:15:23,610.0,Rock,Music,500.0,8,24


In [29]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 189334 entries, 0 to 189333
Data columns (total 18 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   backers_count     189334 non-null  object        
 1   blurb             189334 non-null  object        
 2   country           189334 non-null  object        
 3   created_at        189334 non-null  datetime64[ns]
 4   deadline          189334 non-null  datetime64[ns]
 5   id                189334 non-null  object        
 6   launched_at       189334 non-null  datetime64[ns]
 7   name              189334 non-null  object        
 8   spotlight         189334 non-null  object        
 9   staff_pick        189334 non-null  object        
 10  state             189334 non-null  object        
 11  state_changed_at  189334 non-null  datetime64[ns]
 12  usd_pledged       189334 non-null  object        
 13  sub_category      189334 non-null  object        
 14  pare

In [30]:
#convert to csv
data.to_csv("data_cleaned.csv", index = False)