# EXPLICIT INDEXES

In [47]:
import pandas as pd
dogs_df = pd.read_csv("dogs.csv")
print(dogs_df)

      name        breed  color  height_cm  weight_kg date_of_birth
0    Bella     Labrador  Brown         56         24    2013-07-01
1  Charlie       Poodle  Black         43         24    2016-09-16
2     Lucy    Chow Chow  Brown         46         24    2014-08-25
3   Cooper    Schnauzer   Gray         49         17    2011-12-11
4      Max     Labrador  Black         59         29    2017-01-20
5   Stella    Chihuahua    Tan         18          2    2015-04-20
6   Bernie  St. Bernard  White         77         74    2018-02-27


In [48]:
dogs_df.columns

Index(['name', 'breed', 'color', 'height_cm', 'weight_kg', 'date_of_birth'], dtype='object')

In [49]:
dogs_df.index

RangeIndex(start=0, stop=7, step=1)

## Setting a column as the index

In [50]:
dogs_ind = dogs_df.set_index("name")
print(dogs_ind)

               breed  color  height_cm  weight_kg date_of_birth
name                                                           
Bella       Labrador  Brown         56         24    2013-07-01
Charlie       Poodle  Black         43         24    2016-09-16
Lucy       Chow Chow  Brown         46         24    2014-08-25
Cooper     Schnauzer   Gray         49         17    2011-12-11
Max         Labrador  Black         59         29    2017-01-20
Stella     Chihuahua    Tan         18          2    2015-04-20
Bernie   St. Bernard  White         77         74    2018-02-27


## Removing an index

In [51]:
dogs_ind.reset_index()

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,24,2013-07-01
1,Charlie,Poodle,Black,43,24,2016-09-16
2,Lucy,Chow Chow,Brown,46,24,2014-08-25
3,Cooper,Schnauzer,Gray,49,17,2011-12-11
4,Max,Labrador,Black,59,29,2017-01-20
5,Stella,Chihuahua,Tan,18,2,2015-04-20
6,Bernie,St. Bernard,White,77,74,2018-02-27


## Dropping an index

In [52]:
dogs_ind.reset_index(drop=True)

Unnamed: 0,breed,color,height_cm,weight_kg,date_of_birth
0,Labrador,Brown,56,24,2013-07-01
1,Poodle,Black,43,24,2016-09-16
2,Chow Chow,Brown,46,24,2014-08-25
3,Schnauzer,Gray,49,17,2011-12-11
4,Labrador,Black,59,29,2017-01-20
5,Chihuahua,Tan,18,2,2015-04-20
6,St. Bernard,White,77,74,2018-02-27


## Indexes make subsetting simpler

In [53]:
print(dogs_ind)

               breed  color  height_cm  weight_kg date_of_birth
name                                                           
Bella       Labrador  Brown         56         24    2013-07-01
Charlie       Poodle  Black         43         24    2016-09-16
Lucy       Chow Chow  Brown         46         24    2014-08-25
Cooper     Schnauzer   Gray         49         17    2011-12-11
Max         Labrador  Black         59         29    2017-01-20
Stella     Chihuahua    Tan         18          2    2015-04-20
Bernie   St. Bernard  White         77         74    2018-02-27


In [54]:
dogs_df[dogs_df["name"].isin(["Bella", "Cooper"])]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,24,2013-07-01
3,Cooper,Schnauzer,Gray,49,17,2011-12-11


In [55]:
dogs_ind.loc[["Bella", "Stella"]]

Unnamed: 0_level_0,breed,color,height_cm,weight_kg,date_of_birth
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bella,Labrador,Brown,56,24,2013-07-01
Stella,Chihuahua,Tan,18,2,2015-04-20


In [56]:
dogs_ind2 = dogs_df.set_index("breed")
dogs_ind2

Unnamed: 0_level_0,name,color,height_cm,weight_kg,date_of_birth
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Bella,Brown,56,24,2013-07-01
Poodle,Charlie,Black,43,24,2016-09-16
Chow Chow,Lucy,Brown,46,24,2014-08-25
Schnauzer,Cooper,Gray,49,17,2011-12-11
Labrador,Max,Black,59,29,2017-01-20
Chihuahua,Stella,Tan,18,2,2015-04-20
St. Bernard,Bernie,White,77,74,2018-02-27


In [57]:
dogs_ind2.loc[["Labrador", "Chihuahua"]]

Unnamed: 0_level_0,name,color,height_cm,weight_kg,date_of_birth
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Bella,Brown,56,24,2013-07-01
Labrador,Max,Black,59,29,2017-01-20
Chihuahua,Stella,Tan,18,2,2015-04-20


## Subsetting on duplicated index values

In [58]:
dogs_ind2.loc[["Labrador"]]

Unnamed: 0_level_0,name,color,height_cm,weight_kg,date_of_birth
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Bella,Brown,56,24,2013-07-01
Labrador,Max,Black,59,29,2017-01-20


## Multi-level indexes a.k.a hierarchical index

In [59]:
dogs_ind3 = dogs_df.set_index(["breed", "color"])
dogs_ind3

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16
Chow Chow,Brown,Lucy,46,24,2014-08-25
Schnauzer,Gray,Cooper,49,17,2011-12-11
Labrador,Black,Max,59,29,2017-01-20
Chihuahua,Tan,Stella,18,2,2015-04-20
St. Bernard,White,Bernie,77,74,2018-02-27


## Subset the outer level with a list

In [60]:
dogs_ind3.loc[["Labrador", "Chihuahua"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Bella,56,24,2013-07-01
Labrador,Black,Max,59,29,2017-01-20
Chihuahua,Tan,Stella,18,2,2015-04-20


## subset inner levels with  a list of tuples

In [61]:
dogs_ind3.loc[[("Labrador", "Brown"), ("Chihuahua", "Tan")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Bella,56,24,2013-07-01
Chihuahua,Tan,Stella,18,2,2015-04-20


## Sorting by index values

In [62]:
dogs_ind3.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Tan,Stella,18,2,2015-04-20
Chow Chow,Brown,Lucy,46,24,2014-08-25
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16
Schnauzer,Gray,Cooper,49,17,2011-12-11
St. Bernard,White,Bernie,77,74,2018-02-27


## Controlling sort_index

In [63]:
dogs_ind3.sort_index(level=["color", "breed"], ascending=[True, False])

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Poodle,Black,Charlie,43,24,2016-09-16
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Chow Chow,Brown,Lucy,46,24,2014-08-25
Schnauzer,Gray,Cooper,49,17,2011-12-11
Chihuahua,Tan,Stella,18,2,2015-04-20
St. Bernard,White,Bernie,77,74,2018-02-27


## Slicing and subsetting with .loc and .iloc

## Sort the index before you slice

In [64]:
dogs_srt = dogs_df.set_index(["breed", "color"]).sort_index()

In [65]:
dogs_srt

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Tan,Stella,18,2,2015-04-20
Chow Chow,Brown,Lucy,46,24,2014-08-25
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16
Schnauzer,Gray,Cooper,49,17,2011-12-11
St. Bernard,White,Bernie,77,74,2018-02-27


## Slicing the outer index level

In [66]:
dogs_srt.loc["Chow Chow" : "Poodle"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chow Chow,Brown,Lucy,46,24,2014-08-25
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16


In [67]:
dogs_srt.loc[["Chow Chow", "Poodle"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chow Chow,Brown,Lucy,46,24,2014-08-25
Poodle,Black,Charlie,43,24,2016-09-16


In [68]:
dogs_srt.loc["Chihuahua": "Poodle"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Tan,Stella,18,2,2015-04-20
Chow Chow,Brown,Lucy,46,24,2014-08-25
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16


In [69]:
dogs_srt

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Tan,Stella,18,2,2015-04-20
Chow Chow,Brown,Lucy,46,24,2014-08-25
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16
Schnauzer,Gray,Cooper,49,17,2011-12-11
St. Bernard,White,Bernie,77,74,2018-02-27


In [70]:
dogs_srt.loc[("Chihuahua", "Tan") : ("Poodle", "Black")]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Tan,Stella,18,2,2015-04-20
Chow Chow,Brown,Lucy,46,24,2014-08-25
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16


## Slicing columns

In [71]:
dogs_srt.loc[:, "name": "weight_kg"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chihuahua,Tan,Stella,18,2
Chow Chow,Brown,Lucy,46,24
Labrador,Black,Max,59,29
Labrador,Brown,Bella,56,24
Poodle,Black,Charlie,43,24
Schnauzer,Gray,Cooper,49,17
St. Bernard,White,Bernie,77,74


## Slice twice

In [72]:
dogs_srt.loc[("Chihuahua", "Tan") : ("Schnauzer", "Gray"), "name": "height_cm"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1
Chihuahua,Tan,Stella,18
Chow Chow,Brown,Lucy,46
Labrador,Black,Max,59
Labrador,Brown,Bella,56
Poodle,Black,Charlie,43
Schnauzer,Gray,Cooper,49


In [73]:
dogs_srt.loc[("Chihuahua", "Tan") : ("Schnauzer", "Gray"), "name": "weight_kg"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Chihuahua,Tan,Stella,18,2
Chow Chow,Brown,Lucy,46,24
Labrador,Black,Max,59,29
Labrador,Brown,Bella,56,24
Poodle,Black,Charlie,43,24
Schnauzer,Gray,Cooper,49,17


In [74]:
dogs_srt.loc[("Chihuahua", "Tan") : ("Schnauzer", "Gray"), "name": "date_of_birth"]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Tan,Stella,18,2,2015-04-20
Chow Chow,Brown,Lucy,46,24,2014-08-25
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16
Schnauzer,Gray,Cooper,49,17,2011-12-11


In [75]:
dogs = dogs_srt.set_index("date_of_birth").sort_index()

In [76]:
dogs

Unnamed: 0_level_0,name,height_cm,weight_kg
date_of_birth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2011-12-11,Cooper,49,17
2013-07-01,Bella,56,24
2014-08-25,Lucy,46,24
2015-04-20,Stella,18,2
2016-09-16,Charlie,43,24
2017-01-20,Max,59,29
2018-02-27,Bernie,77,74


## Slicing by Date

get dogs with date of birth between 2014-08-25 and 2016-09-16

In [77]:
dogs.loc["2014-08-25": "2016-09-16"]

Unnamed: 0_level_0,name,height_cm,weight_kg
date_of_birth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-08-25,Lucy,46,24
2015-04-20,Stella,18,2
2016-09-16,Charlie,43,24


In [78]:
dogs.loc["2014" : "2016"]

Unnamed: 0_level_0,name,height_cm,weight_kg
date_of_birth,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-08-25,Lucy,46,24
2015-04-20,Stella,18,2


## Working with pivot tables

In [79]:
dogs_height_by_breed_vs_color = dogs_df.pivot_table("height_cm", index= "breed", columns="color",fill_value=0)
print(dogs_height_by_breed_vs_color)

color        Black  Brown  Gray   Tan  White
breed                                       
Chihuahua      0.0    0.0   0.0  18.0    0.0
Chow Chow      0.0   46.0   0.0   0.0    0.0
Labrador      59.0   56.0   0.0   0.0    0.0
Poodle        43.0    0.0   0.0   0.0    0.0
Schnauzer      0.0    0.0  49.0   0.0    0.0
St. Bernard    0.0    0.0   0.0   0.0   77.0


In [80]:
dogs_height_by_breed_vs_color.loc["Chihuahua" : "St. Bernard"]

color,Black,Brown,Gray,Tan,White
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,0.0,0.0,0.0,18.0,0.0
Chow Chow,0.0,46.0,0.0,0.0,0.0
Labrador,59.0,56.0,0.0,0.0,0.0
Poodle,43.0,0.0,0.0,0.0,0.0
Schnauzer,0.0,0.0,49.0,0.0,0.0
St. Bernard,0.0,0.0,0.0,0.0,77.0


## The axis argument

In [81]:
dogs_height_by_breed_vs_color.mean(axis="index")

color
Black    17.000000
Brown    17.000000
Gray      8.166667
Tan       3.000000
White    12.833333
dtype: float64

## Calculating summary stats across columns

In [82]:
dogs_height_by_breed_vs_color.mean(axis="columns")

breed
Chihuahua       3.6
Chow Chow       9.2
Labrador       23.0
Poodle          8.6
Schnauzer       9.8
St. Bernard    15.4
dtype: float64

# THE END 