In [0]:
import pandas as pd
import numpy as np

# Assignment 1: Groupby

Can you return a table containing the top 10 stores by total transactions in the data?

Make sure they’re sorted from highest to lowest.

Thanks!

In [0]:
# Read in transactions data -- parse dates specified here for help with later problem

transactions = pd.read_csv("/Volumes/dbx_catalog/default/sample_files/transactions.csv", parse_dates=["date"])

transactions.head()

In [0]:
transactions.groupby(['store_nbr'])[['transactions']].sum().sort_values(by='transactions', ascending=False).iloc[:10]

# Assignment 2: Groupby Multiple Columns

Can you get me the total transactions by store and month? 

Sort the table from first month to last, then by highest transactions to lowest within each month. 


In [0]:
# helper code to extract month date part from date column

transactions["month"] = transactions["date"].dt.month

transactions.head()

In [0]:
transactions.groupby(['store_nbr','month'])[['transactions']].sum().sort_values(['month','transactions'], ascending=[True,False])

# Assignment 3: Multi-Index DataFrames


Can you help me access rows and columns with multiple indices? I’ve been struggling with multi-index DataFrames.

Access:
* Grab Store 3, Month 1
* Then, select the column storing the mean of transactions

Fix:
* Drop the outer layer of the column Index
* Reset the row index so it is the default integer index

In [0]:
# Ross' grouped DataFrame code, run this first

grouped = (
    transactions.groupby(["store_nbr", "month"])
    .agg({"transactions": ["sum", "mean"]})
    .sort_values(by=["month", ("transactions", "sum")], ascending=[True, False])
)


In [0]:
# df grouped by store and month with two aggregation columns

grouped.head()

In [0]:
grouped.loc[(3, 1)]

In [0]:
grouped.iloc[4]



In [0]:
grouped.loc[:, ('transactions', 'mean')].head()

In [0]:
grouped.iloc[:, 1].head(1)

In [0]:
grouped.droplevel(0, axis=1).reset_index().head()

In [0]:
grouped.tail()

# Assignment 4: The Agg Method

Calculate the mean of target met by store, and the sum of bonuses to be paid to each store.

Sort them by highest to lowest bonus payout.

Then, do the same for day of week and month.

In [0]:
# Recreate table from section 3

transactions = transactions.assign(
    target_pct=transactions["transactions"] / 2500,
    met_target=(transactions["transactions"] / 2500) >= 1,
    bonus_payable=((transactions["transactions"] / 2500) >= 1) * 100,
    month=transactions.date.dt.month,
    day_of_week=transactions.date.dt.dayofweek,
)

transactions

In [0]:
transactions.groupby(['store_nbr']).agg({'met_target' : 'mean', 'bonus_payable' : 'sum'}).sort_values('bonus_payable' , ascending=False).head()

In [0]:
transactions.groupby(['month']).agg({'met_target' : 'mean', 'bonus_payable' : 'sum'}).sort_values('bonus_payable', ascending=False).head()

In [0]:
transactions.groupby(['day_of_week']).agg({'met_target' : 'mean', 'bonus_payable' : 'sum'}).sort_values('bonus_payable', ascending=False).head()


# Assignment 4: Transform

Calculate the mean of transactions by store number and day of week while keeping row numbers. 

Then compare the performance of each row to its day of week average. (difference between transactions and daily avg)

In [0]:
import pandas as pd

In [0]:
transactions.assign(
    avg_store_transactions = (
    transactions.
    groupby(['store_nbr', 'day_of_week'])['transactions'].
    transform('mean')
    ),
difference = lambda x: x['transactions'] - x['avg_store_transactions']
)

# Assignment 5: Pivot

Pivot transactions with store number as index, columns day of week, with the sum of bonus payable as cells.

Filter to stores that had a non-zero bonus payable and create a heatmap.

Then unpivot (melt) the table so we have one row for each store and day of the week with the corresponding total owed. 


In [0]:
transactions = transactions.assign(avg_store_transactions=(transactions.groupby(['store_nbr', 'day_of_week'])['transactions'].transform('mean')), difference=lambda x: x['transactions'] - x['avg_store_transactions'])

In [0]:
# Use transactions table (ok if includes columns from assignment 4 or not)

transactions.head()

In [0]:
transactions.pivot_table(index='store_nbr',
                         columns='day_of_week',
                         aggfunc= 'sum'
                        ).head()