In [38]:
import pandas as pd
import json
import numpy as np
import pprint as pp

## Reading the dataframe

In [19]:
file_path = "data/2021_3/Kickstarter.csv"

In [22]:
df = pd.read_csv(file_path)

## Columns categorization

In [184]:
ready_cols = ["id", "name", "backers_count", "blurb", "converted_pledged_amount", "country", "created_at", "currency",
              "deadline", "disable_communication", "friends", "goal", "launched_at", "pledged", "state", "state_changed_at", "usd_type"]
not_understood_cols = ["currency_trailing_code", "fx_rate", "is_backing", "is_starrable", "is_starred", "permissions", "spotlight", "staff_pick", "static_usd_rate"]
dict_cols = ["category", "creator", "location", "photo", "profile", "urls"]
useless_cols = ["country_displayable_name", "currency_symbol", "current_currency", "slug", "source_url", "usd_pledged"]

len(set(ready_cols+not_understood_cols+dict_cols+useless_cols)) == len(set(df.columns))


True

### Categories
We have category and sub category:

In [180]:
def get_categories(entry):
    category_dict = json.loads(entry["category"])
    if "parent_name" in category_dict.keys():
        return pd.Series({
            "category": category_dict["parent_name"],
            "sub_category": category_dict["name"]
        })
    else:
        return pd.Series({
            "category": category_dict["name"]
        })

### Creator
Better to scrape those information again.

### Location
We need only the country; but we have it already in country.

### Photo
Better to scrape those information again.

### Profile
Duplicated information.

### Urls
here we have the URL and the reward page.

In [171]:
def get_urls(entry):
    return pd.Series({
        "project_url": json.loads(entry["urls"])["web"]["project"],
        "rewards_url": json.loads(entry["urls"])["web"]["rewards"]
    })

## creating the dataframe to save


In [174]:
tidy_df = pd.concat((df[ready_cols], df.apply(get_categories, axis=1), df.apply(get_urls, axis=1)), axis=1)

In [183]:
tidy_df


Unnamed: 0,id,name,backers_count,blurb,converted_pledged_amount,country,created_at,currency,deadline,disable_communication,...,goal,launched_at,pledged,state,state_changed_at,usd_type,category,sub_category,project_url,rewards_url
0,123246984,Soaps in Texas,4,"Soaps made with love, care, creativity and you...",41,US,1512430228,USD,1515102534,False,...,150.0,1512510534,41.00,failed,1515102534,international,Crafts,DIY,https://www.kickstarter.com/projects/texassoap...,https://www.kickstarter.com/projects/texassoap...
1,1207132794,Whiskey Pens,35,"Pens made from Whiskey barrels, Jack Daniel's ...",2205,US,1528377129,USD,1531325825,False,...,900.0,1528733825,2205.00,successful,1531325825,international,Crafts,Woodworking,https://www.kickstarter.com/projects/202206651...,https://www.kickstarter.com/projects/202206651...
2,1899686686,The Posh Factory,310,Finally we have a building but we need your he...,8861,US,1443014021,USD,1447620375,False,...,8500.0,1443728775,8861.00,successful,1447620376,international,Dance,Spaces,https://www.kickstarter.com/projects/988974145...,https://www.kickstarter.com/projects/988974145...
3,1081353908,Home Made,1,Everything home made in one store. From jewelr...,100,US,1412436646,USD,1415406440,False,...,50000.0,1412810840,100.00,failed,1415406440,international,Crafts,DIY,https://www.kickstarter.com/projects/179435353...,https://www.kickstarter.com/projects/179435353...
4,84306631,Goofy Goat Soaps LLC,36,"Growing My Natural, Eco-Friendly, and Fun Bath...",1026,US,1599615992,USD,1602385860,False,...,800.0,1600148240,1026.00,successful,1602385860,international,Crafts,Candles,https://www.kickstarter.com/projects/adella/go...,https://www.kickstarter.com/projects/adella/go...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3681,790842514,Carnies Run Red,1,A 1980 slasher styled film based in modern tim...,23,US,1462852441,USD,1466459893,False,...,5500.0,1463867893,23.00,failed,1466459893,international,Film & Video,Horror,https://www.kickstarter.com/projects/193861478...,https://www.kickstarter.com/projects/193861478...
3682,316016111,Bed-Stuy Film Festival,1,Bed-Stuy Film Festival (BSFF) is an on-locatio...,10,US,1423059118,USD,1428427870,False,...,10000.0,1423247470,10.00,failed,1428427872,international,Film & Video,Festivals,https://www.kickstarter.com/projects/949574320...,https://www.kickstarter.com/projects/949574320...
3683,2013899067,What Happened To Elisa Lam? A Documentary,377,A cinematic exploration of the most mysterious...,30490,US,1481502184,USD,1490740364,False,...,30000.0,1488151964,30490.81,successful,1490740364,international,Film & Video,Documentary,https://www.kickstarter.com/projects/428442824...,https://www.kickstarter.com/projects/428442824...
3684,1507647522,Yout - short film,16,‘Yout' is a teen comedy set in Essex. Its an o...,385,GB,1487692862,GBP,1490729221,False,...,300.0,1488140821,307.00,successful,1490729222,international,Film & Video,Shorts,https://www.kickstarter.com/projects/454319080...,https://www.kickstarter.com/projects/454319080...
