## Data to Aggregate

In [1]:
orders = [
 (9423517, '2021-08-04', 9001),
 (4626232, '2021-08-04', 9003),
 (9423534, '2021-08-04', 9001),
 (9423679, '2021-08-05', 9002),
 (4626377, '2021-08-05', 9003),
 (4626412, '2021-08-05', 9004),
 (9423783, '2021-08-06', 9002),
 (4626490, '2021-08-06', 9004)
]

In [2]:
import pandas as pd
df_orders = pd.DataFrame(orders, columns=['OrderNo','Date','Empno'])

In [3]:
details = [
 (9423517, 'Jeans', 'Rip Curl', 87.0, 1),
 (9423517, 'Jacket', 'The North Face', 112.0, 1),
 (4626232, 'Socks', 'Vans', 15.0, 1),
 (4626232, 'Jeans', 'Quiksilver', 82.0, 1),
 (9423534, 'Socks', 'DC', 10.0, 2),
 (9423534, 'Socks', 'Quiksilver', 12.0, 2),
 (9423679, 'T-shirt', 'Patagonia', 35.0, 1),
 (4626377, 'Hoody', 'Animal', 44.0, 1),
 (4626377, 'Cargo Shorts', 'Animal', 38.0, 1),
 (4626412, 'Shirt', 'Volcom', 78.0, 1),
 (9423783, 'Boxer Shorts', 'Superdry', 30.0, 2),
 (9423783, 'Shorts', 'Globe', 26.0, 1),
 (4626490, 'Cargo Shorts', 'Billabong', 54.0, 1),
 (4626490, 'Sweater', 'Dickies', 56.0, 1)
]

In [4]:
df_details = pd.DataFrame(details, columns=['OrderNo','Item','Brand','Price','Quantity'])

In [5]:
emps = [
 (9001, 'Jeff Russell', 'LA'),
 (9002, 'Nick Boorman', 'San Francisco'),
 (9003, 'Tom Heints', 'NYC'),
 (9004, 'Maya Silver', 'Philadelphia')
]

In [6]:
df_emps = pd.DataFrame(emps, columns=['Empno','Empname','Location'])

In [7]:
locations = [
 ('LA', 'West'),
 ('San Francisco', 'West'),
 ('NYC', 'East'),
 ('Philadelphia', 'East')
]

In [8]:
df_locations = pd.DataFrame(locations, columns=['Location','Region'])

## Combining Dataframes

In [9]:
df_sales = df_orders.merge(df_details)

In [10]:
df_sales.head(3)

Unnamed: 0,OrderNo,Date,Empno,Item,Brand,Price,Quantity
0,9423517,2021-08-04,9001,Jeans,Rip Curl,87.0,1
1,9423517,2021-08-04,9001,Jacket,The North Face,112.0,1
2,4626232,2021-08-04,9003,Socks,Vans,15.0,1


In [11]:
df_sales['Total'] = df_sales['Price']*df_sales['Quantity']

In [12]:
df_sales.head(3)

Unnamed: 0,OrderNo,Date,Empno,Item,Brand,Price,Quantity,Total
0,9423517,2021-08-04,9001,Jeans,Rip Curl,87.0,1,87.0
1,9423517,2021-08-04,9001,Jacket,The North Face,112.0,1,112.0
2,4626232,2021-08-04,9003,Socks,Vans,15.0,1,15.0


In [13]:
df_sales = df_sales[['Date','Empno','Total']]

In [14]:
df_sales.head(3)

Unnamed: 0,Date,Empno,Total
0,2021-08-04,9001,87.0
1,2021-08-04,9001,112.0
2,2021-08-04,9003,15.0


In [15]:
df_sales_emps = df_sales.merge(df_emps)

In [16]:
df_sales_emps.head(3)

Unnamed: 0,Date,Empno,Total,Empname,Location
0,2021-08-04,9001,87.0,Jeff Russell,LA
1,2021-08-04,9001,112.0,Jeff Russell,LA
2,2021-08-04,9003,15.0,Tom Heints,NYC


In [17]:
df_result = df_sales_emps.merge(df_locations)

In [18]:
df_result.head(3)

Unnamed: 0,Date,Empno,Total,Empname,Location,Region
0,2021-08-04,9001,87.0,Jeff Russell,LA,West
1,2021-08-04,9001,112.0,Jeff Russell,LA,West
2,2021-08-04,9003,15.0,Tom Heints,NYC,East


In [19]:
df_result = df_result[['Date','Region','Total']]

In [20]:
df_result.head(3)

Unnamed: 0,Date,Region,Total
0,2021-08-04,West,87.0
1,2021-08-04,West,112.0
2,2021-08-04,East,15.0


## Grouping and aggregating the data

In [21]:
df_date_region = df_result.groupby(['Date','Region']).sum()

In [22]:
df_date_region

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2021-08-04,East,97.0
2021-08-04,West,243.0
2021-08-05,East,160.0
2021-08-05,West,35.0
2021-08-06,East,110.0
2021-08-06,West,86.0


## Viewing Specific Aggregations by Multiindex

In [23]:
print(df_date_region.index)

MultiIndex([('2021-08-04', 'East'),
            ('2021-08-04', 'West'),
            ('2021-08-05', 'East'),
            ('2021-08-05', 'West'),
            ('2021-08-06', 'East'),
            ('2021-08-06', 'West')],
           names=['Date', 'Region'])


In [24]:
type(df_date_region)

pandas.core.frame.DataFrame

In [25]:
df_date_region[df_date_region.index.isin([('2021-08-05','West')])]

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2021-08-05,West,35.0


In [26]:
df_date_region[df_date_region.index.isin([('2021-08-05','West'),('2021-08-05','East')])]

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2021-08-05,East,160.0
2021-08-05,West,35.0


## Slicing a range of aggregated values

In [27]:
df_date_region[('2021-08-04','East'):('2021-08-05','West')]

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2021-08-04,East,97.0
2021-08-04,West,243.0
2021-08-05,East,160.0
2021-08-05,West,35.0


In [28]:
df_date_region[('2021-08-04'):('2021-08-05')]

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2021-08-04,East,97.0
2021-08-04,West,243.0
2021-08-05,East,160.0
2021-08-05,West,35.0


## Slicing within aggregation levels

In [29]:
df_date_region.loc[(slice('2021-08-04','2021-08-05'),slice(None))]

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2021-08-04,East,97.0
2021-08-04,West,243.0
2021-08-05,East,160.0
2021-08-05,West,35.0


In [31]:
df_date_region.loc[(slice('2021-08-04','2021-08-05'),slice('East')),:]

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2021-08-04,East,97.0
2021-08-05,East,160.0


## Adding a grand total

In [31]:
#ps = pd.Series(df_date_region.sum(axis = 0))
#print(ps)

In [32]:
ps = df_date_region.sum(axis = 0)
print(ps)

Total    731.0
dtype: float64


In [33]:
ps.name=('All','All')

In [34]:
ps

Total    731.0
Name: (All, All), dtype: float64

In [35]:
df_date_region

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Date,Region,Unnamed: 2_level_1
2021-08-04,East,97.0
2021-08-04,West,243.0
2021-08-05,East,160.0
2021-08-05,West,35.0
2021-08-06,East,110.0
2021-08-06,West,86.0


In [36]:
df_date_region_total = df_date_region

In [37]:
df_date_region_total.loc['total'] = df_date_region_total.iloc[:, :-1] = ps

In [38]:
df_date_region_total

Unnamed: 0,Total
"(2021-08-04, East)",97.0
"(2021-08-04, West)",243.0
"(2021-08-05, East)",160.0
"(2021-08-05, West)",35.0
"(2021-08-06, East)",110.0
"(2021-08-06, West)",86.0
total,731.0


## adding subtotals
# THIS DOES NOT WORK. NO APPEND FUNCTION FOR DATAFRAME

In [39]:
df_totals = pd.DataFrame()

In [43]:
df_totals = pd.DataFrame()
for date, date_df in df_date_region.groupby(level=0):
  df_totals = df_totals.append(date_df)
  ps = date_df.sum(axis = 0)
  ps.name=(date,'All')
  df_totals = df_totals.append(ps)


AttributeError: 'DataFrame' object has no attribute 'append'

## Selecting all rows in a group

In [48]:
# This provies all detail rows associated with a grouping row
group = df_result.groupby(['Date','Region'])
group.get_group(('2021-08-05','East'))

Unnamed: 0,Date,Region,Total
7,2021-08-05,East,44.0
8,2021-08-05,East,38.0
9,2021-08-05,East,78.0
