In [31]:
import pandas as pd
import json

## Deliverable 1: Extract
----
### Option 1: Use Python Dictionary Methods

1. Import the `backer_info.csv` file into a DataFrame.
2. Iterate through the DataFrame and convert each row to a dictionary. 
3. Iterate through each dictionary and do the following:
    * Extract the dictionary values from the keys using Python list comprehension.
    * Add the values for each row to a new list. 
4. Create a new DataFrame with the retrieved data. 
5. Export the DataFrame as `backers_data.csv`.

In [32]:
# Get the backers_info from the crowdfunding_info sheet. 
pd.set_option('max_colwidth', 400)
df=pd.read_csv('backer_info.csv')


In [33]:
# Iterate through the backers DataFrame and convert each row to a dictionary.
list=[]
for index,row in df.iterrows():
    # Iterate through each dictionary (row) and get the values for each row using list comprehension.
    rowValues=row.to_dict()
    # Append the list of values for each row to a list. 
    list.append(json.loads(rowValues['backer_info']))

In [34]:
# Create a backers_df DataFrame with the following columns: 'backer_id','cf_id', 'name', and 'email' 
# using the list of values lists. 
dff = pd.DataFrame(list)
dff

Unnamed: 0,backer_id,cf_id,name,email
0,av166,968,Angelo Vincent,avincent@live.com
1,ha127,563,Hubert Arnold,harnold@yandex.com
2,lg794,65,Loris Goulet,lgoulet@yandex.com
3,tb566,563,Teodora Brunelli,tbrunelli@outlook.com
4,lh506,563,Lexie Hunt,lhunt@live.com
...,...,...,...,...
8170,st581,65,Serita Thebault,sthebault@yandex.com
8171,gf637,563,Glenn Foerstner,gfoerstner@yahoo.com
8172,rc983,1114,Robt Collin,rcollin@outlook.com
8173,cz381,65,Corina Zappa,czappa@outlook.com


In [35]:
# Export the DataFrame as a CSV file using encoding='utf8'.
dff.to_csv('data.csv',encoding='utf8')#index column

## Deliverable 1: Extract
----
### Option 2: Use regex 

1. Import the `backer_info.csv` file into a DataFrame. 
2. Extract the "backer_id", "cf_id", "name", and "email using regular expressions."
3. Create a new DataFrame with the retrieved data.
4. Export the DataFrame as `backers_data.csv`.

In [36]:
# Get the backers_info from the crowdfunding_info sheet. 
pd.set_option('max_colwidth', 400)


In [37]:
# Extract the alpha-numeric "backer_id" from the backer_info column using a regex expression 
# and add it as a new column called "backer_id".



In [38]:
# Extract the two to four-digit "cf_id" number from the backer_info column. 
# and add it as a new column called "cf_id".


In [39]:
# Extract the name from the backer_info column and add it as a new column called "name".


In [40]:
# Extract the email from the backer_info column and add it as a new column called "email".


In [41]:
# Create a new DataFrame with the appropriate columns.


In [42]:
# Export the DataFrame as a CSV file using encoding='utf8'.


## Deliverable 2: Transform and Clean Data
----
1. Check the data types of the columns and convert the "cf_id" column to an integer, if necessary.
2. Split the name in the "name" column into first and last names, and add them to "first_name" and "last_name" columns in the DataFrame. 
3. Drop the "name" column in the DataFrame.
4. Place the columns in the following order; "backer_id", "cf_id", "first_name", "last_name" and "email".

In [43]:
# Check data types.
dff.dtypes


backer_id    object
cf_id         int64
name         object
email        object
dtype: object

In [44]:
# Convert cf_id to an integer if necessary.
dff.dtypes

backer_id    object
cf_id         int64
name         object
email        object
dtype: object

In [45]:
# Split the "name" column into "first_name" and "last_name" columns.
dff[['first_name','last_name']] = dff['name'].str.split(expand=True)
dff.head(10) 


Unnamed: 0,backer_id,cf_id,name,email,first_name,last_name
0,av166,968,Angelo Vincent,avincent@live.com,Angelo,Vincent
1,ha127,563,Hubert Arnold,harnold@yandex.com,Hubert,Arnold
2,lg794,65,Loris Goulet,lgoulet@yandex.com,Loris,Goulet
3,tb566,563,Teodora Brunelli,tbrunelli@outlook.com,Teodora,Brunelli
4,lh506,563,Lexie Hunt,lhunt@live.com,Lexie,Hunt
5,lh382,563,Lambert Huber,lhuber@live.com,Lambert,Huber
6,em444,563,Emmy Morin,emorin@live.com,Emmy,Morin
7,at582,1572,Armonda Trani,atrani@live.com,Armonda,Trani
8,id772,1911,Ilana Duke,iduke@outlook.com,Ilana,Duke
9,lw275,65,Leigha Wright,lwright@live.com,Leigha,Wright


In [46]:
#  Drop the name column
dff = dff.iloc[:,[0,1,4,5,3]]
dff.head(10)
dff.dtypes
# Reorder the columns


backer_id     object
cf_id          int64
first_name    object
last_name     object
email         object
dtype: object

In [47]:
# Export the DataFrame as a CSV file using encoding='utf8'.
dff.to_csv('backers.csv',encoding='utf8',index=False)