In [103]:
import pandas as pd
import numpy as np

### Download the data and load it to Pandas. 

You can find them [here](https://drive.google.com/file/d/1NY6cmF9Shjw-dD7BD6bNmfcIVz-kQcFR/view?usp=sharing).

In [104]:
sales1 = pd.read_csv('data/sales1.csv')
sales2 = pd.read_csv('data/sales2.csv')

## Challenge: 

#### 1. Combine these two sales CSV together into a single dataframe.
#### 2. Then compute how much money consumers spent on each book in each currency.

**Notes:**
- First table is in USD
- Second table actually consists of 3 smaller tables. First 9 rows are for USD, next 8 rows are for GBP, and final 7 are for EUR.

In [105]:
sales1.head(3)

Unnamed: 0,Book title,Number sold,Sales price,Royalty paid
0,The Bricklayer’s Bible,8,2.99,0.55
1,Swimrand,2,1.99,0.35
2,Pining For The Fisheries of Yore,28,2.99,0.55


In [106]:
sales2.head(3)

Unnamed: 0,Title,Units sold,List price,Royalty
0,,,,
1,Sales report for Q4,,,
2,E-Book Reader US Store,,,


In [132]:
# create Revenue column with values equal to the product of 'Number sold' and 'Sales price'
sales1['Revenue'] = sales1.apply(lambda x: x['Number sold'] * x['Sales price'], axis=1)

# create 'Currency' column to display origin of sale
sales1['Currency'] = 'USD'

# drop all columns except 'Book title' and 'Revenue'
sales1_clean = sales1.drop(columns=['Number sold', 'Sales price', 'Royalty paid'])

# clean sales2 table
sales1_clean['Book title'] = sales1_clean['Book title'].str.strip()
sales1_clean['Book title'] = sales1_clean['Book title'].str.casefold()
sales1_clean['Book title'] = sales1_clean['Book title'].str.replace('’','')
sales1_clean

Unnamed: 0,Book title,Revenue,Currency
0,the bricklayers bible,23.92,USD
1,swimrand,3.98,USD
2,pining for the fisheries of yore,83.72,USD
3,the duck goes here,101.66,USD
4,the tower commission report,46.0,USD


In [133]:
# create Revunue column with values equal to the proudct of 'Units sold' and 'List price'
sales2['Revenue'] = sales2.apply(lambda x: x['Units sold'] * x['List price'], axis=1)
sales2['Currency'] = 'USD'
sales2['Currency'][9:] = 'GBP'
sales2['Currency'][17:] = 'EUR'

# drop all columns except 'Book title' and 'Revenue'
sales2_clean = sales2.drop(columns=['Units sold', 'List price', 'Royalty']).dropna().rename(columns={'Title':'Book title'})

# clean sales2 table
sales2_clean['Book title'] = sales2_clean['Book title'].str.strip()
sales2_clean['Book title'] = sales2_clean['Book title'].str.casefold()
sales2_clean['Book title'] = sales2_clean['Book title'].str.replace('\'','')
sales2_clean

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sales2['Currency'][9:] = 'GBP'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sales2['Currency'][17:] = 'EUR'


Unnamed: 0,Book title,Revenue,Currency
3,pining for the fisheries of yore,280.0,USD
4,swimrand,2.99,USD
5,the bricklayers bible,59.5,USD
6,the duck goes here,101.66,USD
7,the tower commission report,38.0,USD
13,pining for the fisheries of yore,140.53,GBP
14,the bricklayers bible,50.83,GBP
15,the tower commission report,26.0,GBP
21,swimrand,15.92,EUR
22,the duck goes here,23.88,EUR


In [140]:
# concatenate both tables and sum the revenues based on currency
total_sales = pd.concat([sales1_clean, sales2_clean]).groupby(['Currency', 'Book title']).sum()
total_sales

Unnamed: 0_level_0,Unnamed: 1_level_0,Revenue
Currency,Book title,Unnamed: 2_level_1
EUR,swimrand,15.92
EUR,the duck goes here,23.88
GBP,pining for the fisheries of yore,140.53
GBP,the bricklayers bible,50.83
GBP,the tower commission report,26.0
USD,pining for the fisheries of yore,363.72
USD,swimrand,6.97
USD,the bricklayers bible,83.42
USD,the duck goes here,203.32
USD,the tower commission report,84.0
