In [None]:
# Import the required libraries and dependencies
import pandas as pd
from pathlib import Path

## Import data 

In [None]:
# Import small kickstarter most backed dataset
kickstarter_small_most_backed_df = pd.read_csv(
    Path('./Resources/kickstarter_data/most_backed.csv')
)

# Fix ID column
kickstarter_small_most_backed_df.rename(columns={'Unnamed: 0': 'ID'}, inplace=True)

# Set the index as the ID
kickstarter_small_most_backed_df.set_index('ID', inplace=True)

# View head
kickstarter_small_most_backed_df.head(2)

## Rename cols

In [None]:
# Rename the columns to match the kickstarter large df
kickstarter_small_most_backed_df.rename(columns={'amt.pledged' : 'usd_pledged_real', 'title' : 'name', 'category' : 'main_category', 'goal' : 'usd_goal_real', 'num.backers' : 'backers'}, inplace=True)

## Clean df

In [None]:
# Only look at US
%run ./Resources/Utils/consts.ipynb
kickstarter_small_most_backed_df = kickstarter_small_most_backed_df[kickstarter_small_most_backed_df.loc[:,'location'].isin(city_lat_lon_dict)]

# Clean currency
kickstarter_small_most_backed_df.loc[:, 'currency'] = kickstarter_small_most_backed_df.loc[:, 'currency'].apply(lambda x : x.upper())

## Create dfs from selected columns

In [None]:
# List of columns to drop
kickstarter_small_cols_drop = ['blurb', 'by', 'url']

# Selected columns  df
kickstarter_selected_cols_small_most_backed_df = kickstarter_small_most_backed_df.drop(kickstarter_small_cols_drop, axis=1)

## Add new cols

In [None]:
# Add lat/long depends on running ./Resources/Utils/consts.ipynb first
kickstarter_selected_cols_small_most_backed_df['lat'] = kickstarter_selected_cols_small_most_backed_df['location'].apply(lambda x: city_lat_lon_dict[x][0])
kickstarter_selected_cols_small_most_backed_df['long'] = kickstarter_selected_cols_small_most_backed_df['location'].apply(lambda x: city_lat_lon_dict[x][1])

## Reorder cols

In [None]:
# Base column names for reordering
small_kickstarter_order = ['name', 'main_category', 'currency', 'usd_goal_real', 'usd_pledged_real', 'location', 'backers', 'num.backers.tier', 'pledge.tier']

# Reorder each df
kickstarter_selected_cols_small_most_backed_df = kickstarter_selected_cols_small_most_backed_df[small_kickstarter_order]

## Write clean data to files

In [None]:
kickstarter_selected_cols_small_most_backed_df.to_csv('./Resources/kickstarter_data_clean/ks-projects-small.csv')