In [1]:
# Import Dependencies
import pandas as pd

In [2]:
# Read file into DataFrame
book_sales_df = pd.read_csv('Resources/book_sales.csv')
book_sales_df.head(20)

Unnamed: 0,book_name,date_ending,total_sales
0,The Hobbit,8/31/23,150
1,The Fellowship of the Ring (The Lord of the Ri...,8/31/23,80
2,"The Two Towers (The Lord of the Rings, Part 2)",8/31/23,100
3,"The Return of the King (The Lord of the Rings,...",8/31/23,100
4,Foundation,8/31/23,60
5,Foundation and Empire,8/31/23,40
6,Second Foundation,8/31/23,35
7,Foundation's Edge,8/31/23,20
8,Foundation and Earth,8/31/23,0
9,The Hobbit,9/30/23,100


In [3]:
# Show the unique values in the book_name column.
book_sales_df["book_name"].unique()

array(['The Hobbit',
       'The Fellowship of the Ring (The Lord of the Rings, Part 1)',
       'The Two Towers (The Lord of the Rings, Part 2)',
       'The Return of the King (The Lord of the Rings, Part 3)',
       'Foundation', 'Foundation and Empire', 'Second Foundation',
       "Foundation's Edge", 'Foundation and Earth'], dtype=object)

In [4]:
# Show the unique values in the date_ending column.
book_sales_df["date_ending"].unique()

array(['8/31/23', '9/30/23', '10/31/23', '11/30/23', '12/31/23'],
      dtype=object)

### Using the `pivot()` function.
---
`pd.pivot(data, columns, index=<a column>, values=<a column>` or `df.pivot(columns, index=<a column>, values=<a column>`


In [5]:
#  Pivot on the date_ending with the book_name as the index, and pass the "total_sales" as the values.
pivot_date_short_form = pd.pivot(book_sales_df, columns="date_ending",index="book_name",values="total_sales" )
# Show the table.
pivot_date_short_form

date_ending,10/31/23,11/30/23,12/31/23,8/31/23,9/30/23
book_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Foundation,75,50,125,60,100
Foundation and Earth,25,30,20,0,20
Foundation and Empire,75,60,75,40,50
Foundation's Edge,25,30,30,20,30
Second Foundation,50,100,50,35,40
"The Fellowship of the Ring (The Lord of the Rings, Part 1)",125,100,175,80,150
The Hobbit,200,200,250,150,100
"The Return of the King (The Lord of the Rings, Part 3)",200,50,200,100,125
"The Two Towers (The Lord of the Rings, Part 2)",225,100,150,100,75


In [6]:
# Reorder columns
pivot_date_short_form.iloc[:,[3,4,0,1,2]]

date_ending,8/31/23,9/30/23,10/31/23,11/30/23,12/31/23
book_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Foundation,60,100,75,50,125
Foundation and Earth,0,20,25,30,20
Foundation and Empire,40,50,75,60,75
Foundation's Edge,20,30,25,30,30
Second Foundation,35,40,50,100,50
"The Fellowship of the Ring (The Lord of the Rings, Part 1)",80,150,125,100,175
The Hobbit,150,100,200,200,250
"The Return of the King (The Lord of the Rings, Part 3)",100,125,200,50,200
"The Two Towers (The Lord of the Rings, Part 2)",100,75,225,100,150


In [7]:
#  Pivot on the book_name with the date_ending as the index, and pass the "total_sales" as the values.
pivot_books_long_form = pd.pivot(book_sales_df, columns="book_name",index="date_ending",values="total_sales" )
pivot_books_long_form

book_name,Foundation,Foundation and Earth,Foundation and Empire,Foundation's Edge,Second Foundation,"The Fellowship of the Ring (The Lord of the Rings, Part 1)",The Hobbit,"The Return of the King (The Lord of the Rings, Part 3)","The Two Towers (The Lord of the Rings, Part 2)"
date_ending,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
10/31/23,75,25,75,25,50,125,200,200,225
11/30/23,50,30,60,30,100,100,200,50,100
12/31/23,125,20,75,30,50,175,250,200,150
8/31/23,60,0,40,20,35,80,150,100,100
9/30/23,100,20,50,30,40,150,100,125,75


In [8]:
# We can't use sort_values to sort the index. We have to use `reindex`.  
# Get the index values to copy.
pivot_books_long_form.index

Index(['10/31/23', '11/30/23', '12/31/23', '8/31/23', '9/30/23'], dtype='object', name='date_ending')

In [9]:
# Reindex in monthly ascending order. 
pivot_books_long_form.reindex(['8/31/23', '9/30/23','10/31/23', '11/30/23', '12/31/23'])

book_name,Foundation,Foundation and Earth,Foundation and Empire,Foundation's Edge,Second Foundation,"The Fellowship of the Ring (The Lord of the Rings, Part 1)",The Hobbit,"The Return of the King (The Lord of the Rings, Part 3)","The Two Towers (The Lord of the Rings, Part 2)"
date_ending,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
8/31/23,60,0,40,20,35,80,150,100,100
9/30/23,100,20,50,30,40,150,100,125,75
10/31/23,75,25,75,25,50,125,200,200,225
11/30/23,50,30,60,30,100,100,200,50,100
12/31/23,125,20,75,30,50,175,250,200,150


### Using the `pivot_table()` function.
---

- `pd.pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=None, margins=False, dropna=True, margins_name='All', observed=False, sort=True)`
- `pivot_table()` performs the mean aggregation by defualt

In [10]:
# Using the `pivot_table()` function, get the total book sales for each book. 
# Make the columns the book title. 
pivot_table_books_sum = pd.pivot_table(book_sales_df, 
                                       values='total_sales',
                                       columns='book_name', 
                                       aggfunc='sum')

# Show the table.
pivot_table_books_sum

book_name,Foundation,Foundation and Earth,Foundation and Empire,Foundation's Edge,Second Foundation,"The Fellowship of the Ring (The Lord of the Rings, Part 1)",The Hobbit,"The Return of the King (The Lord of the Rings, Part 3)","The Two Towers (The Lord of the Rings, Part 2)"
total_sales,410,95,300,135,275,630,900,675,650


In [11]:
# Using the `pivot_table()` function, get the average book sales for each book. 
# Make the columns the book title. 
pivot_table_books_avg = book_sales_df.pivot_table(values='total_sales',
                                                  columns='book_name',
                                                  aggfunc='mean')
# Show the table.
pivot_table_books_avg

book_name,Foundation,Foundation and Earth,Foundation and Empire,Foundation's Edge,Second Foundation,"The Fellowship of the Ring (The Lord of the Rings, Part 1)",The Hobbit,"The Return of the King (The Lord of the Rings, Part 3)","The Two Towers (The Lord of the Rings, Part 2)"
total_sales,82,19,60,27,55,126,180,135,130


In [12]:
# Rename the index: "total_sales" to "Avg_Sales".
pivot_table_books_avg.rename(index={'total_sales': 'Avg_Sales'})

book_name,Foundation,Foundation and Earth,Foundation and Empire,Foundation's Edge,Second Foundation,"The Fellowship of the Ring (The Lord of the Rings, Part 1)",The Hobbit,"The Return of the King (The Lord of the Rings, Part 3)","The Two Towers (The Lord of the Rings, Part 2)"
Avg_Sales,82,19,60,27,55,126,180,135,130


### Mutiple aggregations.

In [13]:
# Get the total and average book sales for each book. 
# Make the books the columns, and the mean and sum of the total sales under each book. 
avg_sum_books = pd.pivot_table(book_sales_df, 
                               values='total_sales',
                               columns='book_name',
                               aggfunc=('sum', 'mean'))
# Show the table
avg_sum_books

book_name,Foundation,Foundation and Earth,Foundation and Empire,Foundation's Edge,Second Foundation,"The Fellowship of the Ring (The Lord of the Rings, Part 1)",The Hobbit,"The Return of the King (The Lord of the Rings, Part 3)","The Two Towers (The Lord of the Rings, Part 2)"
mean,82.0,19.0,60.0,27.0,55.0,126.0,180.0,135.0,130.0
sum,410.0,95.0,300.0,135.0,275.0,630.0,900.0,675.0,650.0


In [14]:
# Using the pivot_table function get the average and the total of the book sales 
# for each date. Make the date the index and round to one decimal place. 
date_ending_pivot_table = book_sales_df.pivot_table(index="date_ending", 
                                             values="total_sales",
                                            aggfunc=('mean','sum')).round(1)
# Show the table
date_ending_pivot_table

Unnamed: 0_level_0,mean,sum
date_ending,Unnamed: 1_level_1,Unnamed: 2_level_1
10/31/23,111.1,1000
11/30/23,80.0,720
12/31/23,119.4,1075
8/31/23,65.0,585
9/30/23,76.7,690


In [16]:
# Reindex in monthly ascending order. 
date_ending_pivot_table.reindex(['8/31/23', '9/30/23','10/31/23', '11/30/23', '12/31/23'])

Unnamed: 0_level_0,mean,sum
date_ending,Unnamed: 1_level_1,Unnamed: 2_level_1
8/31/23,65.0,585
9/30/23,76.7,690
10/31/23,111.1,1000
11/30/23,80.0,720
12/31/23,119.4,1075
