In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('mpg.csv')

In [3]:
df

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino
...,...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86,2790,15.6,82,1,ford mustang gl
394,44.0,4,97.0,52,2130,24.6,82,2,vw pickup
395,32.0,4,135.0,84,2295,11.6,82,1,dodge rampage
396,28.0,4,120.0,79,2625,18.6,82,1,ford ranger


# **`.groupby()`**

The `.groupby()` method is used to group data in a DataFrame based on one or more columns. It allows for aggregation, transformation, and analysis of grouped data, enabling operations like summing, counting, or averaging within groups.

---

### **Key Features**
- Groups data by one or more keys (columns).
- Enables aggregation functions like `sum()`, `mean()`, `count()`, etc., to operate on grouped data.
- Supports transformation and filtering of grouped data.

---

### **Usage**
- **Aggregation**: Summarize data for each group using functions like `sum`, `mean`, or `count`.
- **Iteration**: Iterate over groups for customized operations.
- **Transformation**: Apply transformations to each group independently.
    
---

In [4]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [5]:
df['model_year'].unique()

array([70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82], dtype=int64)

In [6]:
df['model_year'].value_counts()

73    40
78    36
76    34
82    31
75    30
70    29
79    29
80    29
81    29
71    28
72    28
77    28
74    27
Name: model_year, dtype: int64

## groupby() method

In [7]:
# Creates a groupby object waiting for an aggregate method
df.groupby('model_year')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001CD33CD0A00>

In [8]:
# model_year becomes the index! It is NOT a column name,it is now the name of the index
df.groupby('model_year').mean() 


Unnamed: 0_level_0,mpg,cylinders,displacement,weight,acceleration,origin
model_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70,17.689655,6.758621,281.413793,3372.793103,12.948276,1.310345
71,21.25,5.571429,209.75,2995.428571,15.142857,1.428571
72,18.714286,5.821429,218.375,3237.714286,15.125,1.535714
73,17.1,6.375,256.875,3419.025,14.3125,1.375
74,22.703704,5.259259,171.740741,2877.925926,16.203704,1.666667
75,20.266667,5.6,205.533333,3176.8,16.05,1.466667
76,21.573529,5.647059,197.794118,3078.735294,15.941176,1.470588
77,23.375,5.464286,191.392857,2997.357143,15.435714,1.571429
78,24.061111,5.361111,177.805556,2861.805556,15.805556,1.611111
79,25.093103,5.827586,206.689655,3055.344828,15.813793,1.275862


In [9]:
df.groupby('model_year').mean().columns

Index(['mpg', 'cylinders', 'displacement', 'weight', 'acceleration', 'origin'], dtype='object')

In [10]:
df.groupby('model_year').mean()['mpg'] # Selecting the particluar column value return by grp by and mean 

model_year
70    17.689655
71    21.250000
72    18.714286
73    17.100000
74    22.703704
75    20.266667
76    21.573529
77    23.375000
78    24.061111
79    25.093103
80    33.696552
81    30.334483
82    31.709677
Name: mpg, dtype: float64

## Groupby Multiple Columns
Let's explore average mpg per year per cylinder count

In [11]:
df.groupby(['model_year', 'cylinders']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg,displacement,weight,acceleration,origin
model_year,cylinders,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70,4,25.285714,107.0,2292.571429,16.0,2.285714
70,6,20.5,199.0,2710.5,15.5,1.0
70,8,14.111111,367.555556,3940.055556,11.194444,1.0
71,4,27.461538,101.846154,2056.384615,16.961538,1.923077
71,6,18.0,243.375,3171.875,14.75,1.0
71,8,13.428571,371.714286,4537.714286,12.214286,1.0
72,3,19.0,70.0,2330.0,13.5,3.0
72,4,23.428571,111.535714,2382.642857,17.214286,1.928571
72,8,13.615385,344.846154,4228.384615,13.0,1.0
73,3,18.0,70.0,2124.0,13.5,3.0


In [12]:
df.groupby(['model_year', 'cylinders']).mean()['mpg'] # Here the model_year and the cylinders columns now act as a MultiIndex

model_year  cylinders
70          4            25.285714
            6            20.500000
            8            14.111111
71          4            27.461538
            6            18.000000
            8            13.428571
72          3            19.000000
            4            23.428571
            8            13.615385
73          3            18.000000
            4            22.727273
            6            19.000000
            8            13.200000
74          4            27.800000
            6            17.857143
            8            14.200000
75          4            25.250000
            6            17.583333
            8            15.666667
76          4            26.766667
            6            20.000000
            8            14.666667
77          3            21.500000
            4            29.107143
            6            19.500000
            8            16.000000
78          4            29.576471
            5            20.30000

In [13]:
df.groupby(['model_year', 'cylinders']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg,mpg,mpg,mpg,mpg,mpg,mpg,mpg,displacement,displacement,...,acceleration,acceleration,origin,origin,origin,origin,origin,origin,origin,origin
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
model_year,cylinders,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
70,4,7.0,25.285714,1.112697,24.0,24.5,25.0,26.0,27.0,7.0,107.0,...,17.5,20.5,7.0,2.285714,0.48795,2.0,2.0,2.0,2.5,3.0
70,6,4.0,20.5,1.732051,18.0,20.25,21.0,21.25,22.0,4.0,199.0,...,15.625,16.0,4.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
70,8,18.0,14.111111,2.609685,9.0,14.0,14.5,15.0,18.0,18.0,367.555556,...,12.0,18.5,18.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
71,4,13.0,27.461538,3.502746,22.0,25.0,27.0,30.0,35.0,13.0,101.846154,...,19.0,20.5,13.0,1.923077,0.862316,1.0,1.0,2.0,3.0,3.0
71,6,8.0,18.0,1.069045,16.0,17.75,18.0,19.0,19.0,8.0,243.375,...,15.5,15.5,8.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
71,8,7.0,13.428571,0.786796,12.0,13.0,14.0,14.0,14.0,7.0,371.714286,...,12.5,13.5,7.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
72,3,1.0,19.0,,19.0,19.0,19.0,19.0,19.0,1.0,70.0,...,13.5,13.5,1.0,3.0,,3.0,3.0,3.0,3.0,3.0
72,4,14.0,23.428571,3.056249,18.0,21.25,23.0,25.75,28.0,14.0,111.535714,...,18.0,23.5,14.0,1.928571,0.828742,1.0,1.0,2.0,2.75,3.0
72,8,13.0,13.615385,1.502135,11.0,13.0,13.0,14.0,17.0,13.0,344.846154,...,13.5,16.0,13.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
73,3,1.0,18.0,,18.0,18.0,18.0,18.0,18.0,1.0,70.0,...,13.5,13.5,1.0,3.0,,3.0,3.0,3.0,3.0,3.0


In [14]:
df.groupby(['model_year', 'cylinders']).describe().transpose

<bound method DataFrame.transpose of                        mpg                                                    \
                     count       mean       std   min     25%    50%     75%   
model_year cylinders                                                           
70         4           7.0  25.285714  1.112697  24.0  24.500  25.00  26.000   
           6           4.0  20.500000  1.732051  18.0  20.250  21.00  21.250   
           8          18.0  14.111111  2.609685   9.0  14.000  14.50  15.000   
71         4          13.0  27.461538  3.502746  22.0  25.000  27.00  30.000   
           6           8.0  18.000000  1.069045  16.0  17.750  18.00  19.000   
           8           7.0  13.428571  0.786796  12.0  13.000  14.00  14.000   
72         3           1.0  19.000000       NaN  19.0  19.000  19.00  19.000   
           4          14.0  23.428571  3.056249  18.0  21.250  23.00  25.750   
           8          13.0  13.615385  1.502135  11.0  13.000  13.00  14.000   
73 

# MultiIndex

## The MultiIndex Object

In [15]:
year_cyl = df.groupby(['model_year', 'cylinders']).mean()

# Indexing with the Hierarchical Index


In [16]:
year_cyl.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg,displacement,weight,acceleration,origin
model_year,cylinders,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70,4,25.285714,107.0,2292.571429,16.0,2.285714
70,6,20.5,199.0,2710.5,15.5,1.0
70,8,14.111111,367.555556,3940.055556,11.194444,1.0
71,4,27.461538,101.846154,2056.384615,16.961538,1.923077
71,6,18.0,243.375,3171.875,14.75,1.0


In [17]:
year_cyl.index.names

FrozenList(['model_year', 'cylinders'])

In [18]:
year_cyl.index.levels

FrozenList([[70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82], [3, 4, 5, 6, 8]])

## Grab Based on Outside Index 

In [19]:
year_cyl.loc[70]


Unnamed: 0_level_0,mpg,displacement,weight,acceleration,origin
cylinders,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4,25.285714,107.0,2292.571429,16.0,2.285714
6,20.5,199.0,2710.5,15.5,1.0
8,14.111111,367.555556,3940.055556,11.194444,1.0


In [20]:
year_cyl.loc[[70, 71, 72]]

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg,displacement,weight,acceleration,origin
model_year,cylinders,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70,4,25.285714,107.0,2292.571429,16.0,2.285714
70,6,20.5,199.0,2710.5,15.5,1.0
70,8,14.111111,367.555556,3940.055556,11.194444,1.0
71,4,27.461538,101.846154,2056.384615,16.961538,1.923077
71,6,18.0,243.375,3171.875,14.75,1.0
71,8,13.428571,371.714286,4537.714286,12.214286,1.0
72,3,19.0,70.0,2330.0,13.5,3.0
72,4,23.428571,111.535714,2382.642857,17.214286,1.928571
72,8,13.615385,344.846154,4228.384615,13.0,1.0


## Grab a Single Row

In [21]:
year_cyl.loc[(70,4)]

mpg               25.285714
displacement     107.000000
weight          2292.571429
acceleration      16.000000
origin             2.285714
Name: (70, 4), dtype: float64

In [22]:
year_cyl.loc[71]

Unnamed: 0_level_0,mpg,displacement,weight,acceleration,origin
cylinders,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4,27.461538,101.846154,2056.384615,16.961538,1.923077
6,18.0,243.375,3171.875,14.75,1.0
8,13.428571,371.714286,4537.714286,12.214286,1.0


In [23]:
year_cyl.loc[(71,6)]

mpg               18.000
displacement     243.375
weight          3171.875
acceleration      14.750
origin             1.000
Name: (71, 6), dtype: float64

---
# Grab Based on Cross-section with .xs()

This method takes a `key` argument to select data at a particular
level of a MultiIndex.

Parameters
----------
    key : label or tuple of label
        Label contained in the index, or partially in a MultiIndex.
    axis : {0 or 'index', 1 or 'columns'}, default 0
        Axis to retrieve cross-section on.
    level : object, defaults to first n levels (n=1 or len(key))
        In case of a key partially contained in a MultiIndex, indicate
        which levels are used. Levels can be referred by label or position.

In [24]:
year_cyl.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg,displacement,weight,acceleration,origin
model_year,cylinders,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70,4,25.285714,107.0,2292.571429,16.0,2.285714
70,6,20.5,199.0,2710.5,15.5,1.0
70,8,14.111111,367.555556,3940.055556,11.194444,1.0
71,4,27.461538,101.846154,2056.384615,16.961538,1.923077
71,6,18.0,243.375,3171.875,14.75,1.0


In [26]:
year_cyl.index

MultiIndex([(70, 4),
            (70, 6),
            (70, 8),
            (71, 4),
            (71, 6),
            (71, 8),
            (72, 3),
            (72, 4),
            (72, 8),
            (73, 3),
            (73, 4),
            (73, 6),
            (73, 8),
            (74, 4),
            (74, 6),
            (74, 8),
            (75, 4),
            (75, 6),
            (75, 8),
            (76, 4),
            (76, 6),
            (76, 8),
            (77, 3),
            (77, 4),
            (77, 6),
            (77, 8),
            (78, 4),
            (78, 5),
            (78, 6),
            (78, 8),
            (79, 4),
            (79, 5),
            (79, 6),
            (79, 8),
            (80, 3),
            (80, 4),
            (80, 5),
            (80, 6),
            (81, 4),
            (81, 6),
            (81, 8),
            (82, 4),
            (82, 6)],
           names=['model_year', 'cylinders'])

In [27]:
year_cyl.xs(key=70, axis=0, level='model_year')

Unnamed: 0_level_0,mpg,displacement,weight,acceleration,origin
cylinders,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
4,25.285714,107.0,2292.571429,16.0,2.285714
6,20.5,199.0,2710.5,15.5,1.0
8,14.111111,367.555556,3940.055556,11.194444,1.0


In [30]:
# Mean column values for 4 cylinders per year

four_cyl = year_cyl.xs(key=4, level='cylinders')
four_cyl


Unnamed: 0_level_0,mpg,displacement,weight,acceleration,origin
model_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
70,25.285714,107.0,2292.571429,16.0,2.285714
71,27.461538,101.846154,2056.384615,16.961538,1.923077
72,23.428571,111.535714,2382.642857,17.214286,1.928571
73,22.727273,109.272727,2338.090909,17.136364,2.0
74,27.8,96.533333,2151.466667,16.4,2.2
75,25.25,114.833333,2489.25,15.833333,2.166667
76,26.766667,106.333333,2306.6,16.866667,1.866667
77,29.107143,106.5,2205.071429,16.064286,1.857143
78,29.576471,112.117647,2296.764706,16.282353,2.117647
79,31.525,113.583333,2357.583333,15.991667,1.583333


### Careful note!

Keep in mind, its usually much easier to filter out values **before** running a groupby() call, so you should attempt to filter out any values/categories you don't want to use. For example, its much easier to remove **4** cylinder cars before the groupby() call, very difficult to this sort of thing after a group by.

In [35]:
six_eight_cyl = df[df['cylinders'].isin([6,8])].groupby(['model_year', 'cylinders']).mean()

In [36]:
six_eight_cyl.xs(key=6, level='cylinders')

Unnamed: 0_level_0,mpg,displacement,weight,acceleration,origin
model_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
70,20.5,199.0,2710.5,15.5,1.0
71,18.0,243.375,3171.875,14.75,1.0
73,19.0,212.25,2917.125,15.6875,1.25
74,17.857143,230.428571,3320.0,16.857143,1.0
75,17.583333,233.75,3398.333333,17.708333,1.0
76,20.0,221.4,3349.6,17.0,1.3
77,19.5,220.4,3383.0,16.9,1.4
78,19.066667,213.25,3314.166667,16.391667,1.166667
79,22.95,205.666667,3025.833333,15.433333,1.0
80,25.9,196.5,3145.5,15.05,2.0


In [37]:
year_cyl.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg,displacement,weight,acceleration,origin
model_year,cylinders,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
70,4,25.285714,107.0,2292.571429,16.0,2.285714
70,6,20.5,199.0,2710.5,15.5,1.0
70,8,14.111111,367.555556,3940.055556,11.194444,1.0
71,4,27.461538,101.846154,2056.384615,16.961538,1.923077
71,6,18.0,243.375,3171.875,14.75,1.0


---

# **`.swaplevel()`**

The `.swaplevel()` method is used to interchange two levels of a MultiIndex in a Pandas DataFrame or Series. It is particularly useful for reorganizing or reordering hierarchical indexing.

---

### **Key Parameters**
- **`i`**:  
  Specifies the first level to swap (default: 0).

- **`j`**:  
  Specifies the second level to swap (default: 1).

- **`axis`**:  
  Determines whether to apply the swap on rows (`axis=0`, default) or columns (`axis=1`).

---

### **Usage**
- Reorder levels in a MultiIndex to facilitate analysis or aggregation.
- Combine with other methods like `.sort_index()` for further operations.

In [39]:
year_cyl.swaplevel().head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg,displacement,weight,acceleration,origin
cylinders,model_year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
4,70,25.285714,107.0,2292.571429,16.0,2.285714
6,70,20.5,199.0,2710.5,15.5,1.0
8,70,14.111111,367.555556,3940.055556,11.194444,1.0
4,71,27.461538,101.846154,2056.384615,16.961538,1.923077
6,71,18.0,243.375,3171.875,14.75,1.0


## Sorting MultiIndex

---

# **`.sort_index()`**

The `.sort_index()` method sorts a DataFrame or Series by its index (row or column labels). It is useful for organizing data and ensuring a predictable order of indices.

---

### **Key Parameters**
- **`axis`**:  
  Determines whether to sort by rows (`axis=0`, default) or columns (`axis=1`).

- **`level`**:  
  Specifies the level(s) to sort when working with a MultiIndex.

- **`ascending`**:  
  Sorts in ascending order if `True` (default) or descending order if `False`.

- **`inplace`**:  
  If `True`, modifies the original DataFrame/Series.

- **`na_position`**:  
  Places `NaN` values at the beginning (`'first'`) or end (`'last'`, default).

---

### **Usage**
- Organize data by row or column indices.
- Sort specific levels in MultiIndex structures.
- Handle missing values with custom placement.

In [40]:
year_cyl.sort_index(level='model_year', ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,mpg,displacement,weight,acceleration,origin
model_year,cylinders,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
82,6,28.333333,225.0,2931.666667,16.033333,1.0
82,4,32.071429,118.571429,2402.321429,16.703571,1.714286
81,8,26.6,350.0,3725.0,19.0,1.0
81,6,23.428571,184.0,3093.571429,15.442857,1.714286
81,4,32.814286,108.857143,2275.47619,16.466667,2.095238
80,6,25.9,196.5,3145.5,15.05,2.0
80,5,36.4,121.0,2950.0,19.9,2.0
80,4,34.612,111.0,2360.08,17.144,2.2
80,3,23.7,70.0,2420.0,12.5,3.0
79,8,18.63,321.4,3862.9,15.4,1.0


---

# **`.agg()`**

The `.agg()` method is used to apply one or more aggregation functions to a DataFrame or Series. It provides flexibility to compute multiple statistics for groups of data or entire datasets in a single operation.

---

### **Key Features**
- Supports built-in aggregation functions (e.g., `sum`, `mean`, `min`, `max`).
- Allows applying custom functions for aggregation.
- Handles column-wise or row-wise aggregations in DataFrames.
- Compatible with `.groupby()` for grouped aggregations.

---

### **Usage**
- **Single Aggregation**: Apply a single function to all columns or a specific column.
- **Multiple Aggregations**: Apply different functions to different columns or a combination of functions to the same column.

---

```python
# Apply a single aggregation function to all columns
df.agg('sum')

# Apply multiple aggregation functions to a column
df['column_name'].agg(['mean', 'std'])

# Apply different functions to different columns
df.agg({'column1': 'sum', 'column2': ['min', 'max']})


In [41]:
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130,3504,12.0,70,1,chevrolet chevelle malibu
1,15.0,8,350.0,165,3693,11.5,70,1,buick skylark 320
2,18.0,8,318.0,150,3436,11.0,70,1,plymouth satellite
3,16.0,8,304.0,150,3433,12.0,70,1,amc rebel sst
4,17.0,8,302.0,140,3449,10.5,70,1,ford torino


In [42]:
# These strings need to match up with built-in method names
df.agg(['mean', 'median'])

  df.agg(['mean', 'median'])


Unnamed: 0,mpg,cylinders,displacement,weight,acceleration,model_year,origin
mean,23.514573,5.454774,193.425879,2970.424623,15.56809,76.01005,1.572864
median,23.0,4.0,148.5,2803.5,15.5,76.0,1.0


In [44]:
df.agg(['mean', 'median'])['mpg']

  df.agg(['mean', 'median'])['mpg']


mean      23.514573
median    23.000000
Name: mpg, dtype: float64

In [45]:
df.agg(['sum','mean'])[['mpg','weight']]

  df.agg(['sum','mean'])[['mpg','weight']]


Unnamed: 0,mpg,weight
sum,9358.8,1182229.0
mean,23.514573,2970.425


### Specify aggregate methods per column

**agg()** is very powerful,allowing you to pass in a dictionary where the keys are the columns and the values are a list of aggregate methods.

In [46]:
df.agg({
    'mpg': ['mean', 'max'],
    'weight': ['mean','std']
})

Unnamed: 0,mpg,weight
mean,23.514573,2970.424623
max,46.6,
std,,846.841774


## agg() with groupby()

In [50]:
df.groupby('model_year').agg({'mpg': ['mean', 'max'],'weight': ['mean','std']})

Unnamed: 0_level_0,mpg,mpg,weight,weight
Unnamed: 0_level_1,mean,max,mean,std
model_year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
70,17.689655,27.0,3372.793103,852.868663
71,21.25,35.0,2995.428571,1061.830859
72,18.714286,28.0,3237.714286,974.52096
73,17.1,29.0,3419.025,974.809133
74,22.703704,32.0,2877.925926,949.308571
75,20.266667,33.0,3176.8,765.179781
76,21.573529,33.0,3078.735294,821.371481
77,23.375,36.0,2997.357143,912.825902
78,24.061111,43.1,2861.805556,626.023907
79,25.093103,37.3,3055.344828,747.881497
