In [1]:
import pandas as pd
dogs = pd.read_csv("dogs_data.csv")

In [2]:
dogs.columns

Index(['name', 'breed', 'color', 'height_cm', 'weight_kg', 'date_of_birth'], dtype='object')

In [3]:
dogs.index

RangeIndex(start=0, stop=7, step=1)

# Setting a column as the index

In [5]:
dogs_ind = dogs.set_index("name")

In [6]:
print(dogs_ind)

               breed  color  height_cm  weight_kg date_of_birth
name                                                           
Bella       Labrador  Brown         56         24    2013-07-01
Charlie       Poodle  Black         43         24    2016-09-16
Lucy       Chow Chow  Brown         46         24    2014-08-25
Cooper     Schnauzer   Gray         49         17    2011-12-11
Max         Labrador  Black         59         29    2017-01-20
Stella     Chihuahua    Tan         18          2    2015-04-20
Bernie   St. Bernard  White         77         74    2018-02-27


 # Removing an index

In [10]:
dogs_ind.reset_index()

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,24,2013-07-01
1,Charlie,Poodle,Black,43,24,2016-09-16
2,Lucy,Chow Chow,Brown,46,24,2014-08-25
3,Cooper,Schnauzer,Gray,49,17,2011-12-11
4,Max,Labrador,Black,59,29,2017-01-20
5,Stella,Chihuahua,Tan,18,2,2015-04-20
6,Bernie,St. Bernard,White,77,74,2018-02-27


# Dropping an index

In [11]:
dogs_ind.reset_index(drop=True)

Unnamed: 0,breed,color,height_cm,weight_kg,date_of_birth
0,Labrador,Brown,56,24,2013-07-01
1,Poodle,Black,43,24,2016-09-16
2,Chow Chow,Brown,46,24,2014-08-25
3,Schnauzer,Gray,49,17,2011-12-11
4,Labrador,Black,59,29,2017-01-20
5,Chihuahua,Tan,18,2,2015-04-20
6,St. Bernard,White,77,74,2018-02-27


#  Indexes make subsetting simpler

In [12]:
dogs[dogs["name"].isin(["Bella","Stella"])]

Unnamed: 0,name,breed,color,height_cm,weight_kg,date_of_birth
0,Bella,Labrador,Brown,56,24,2013-07-01
5,Stella,Chihuahua,Tan,18,2,2015-04-20


In [13]:
dogs_ind.loc[["Bella","Stella"]]

Unnamed: 0_level_0,breed,color,height_cm,weight_kg,date_of_birth
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Bella,Labrador,Brown,56,24,2013-07-01
Stella,Chihuahua,Tan,18,2,2015-04-20


# Index values don't need to be unique

In [16]:
dogs_ind2 = dogs.set_index("breed")
print(dogs_ind2)

                name  color  height_cm  weight_kg date_of_birth
breed                                                          
Labrador       Bella  Brown         56         24    2013-07-01
Poodle       Charlie  Black         43         24    2016-09-16
Chow Chow       Lucy  Brown         46         24    2014-08-25
Schnauzer     Cooper   Gray         49         17    2011-12-11
Labrador         Max  Black         59         29    2017-01-20
Chihuahua     Stella    Tan         18          2    2015-04-20
St. Bernard   Bernie  White         77         74    2018-02-27


In [17]:
dogs_ind2.loc["Labrador"]

Unnamed: 0_level_0,name,color,height_cm,weight_kg,date_of_birth
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Bella,Brown,56,24,2013-07-01
Labrador,Max,Black,59,29,2017-01-20


# Multi-level indexes a.k.a. hierarchical indexes

In [18]:
dogs_ind3 = dogs.set_index(["breed","color"])

In [19]:
print(dogs_ind3)

                      name  height_cm  weight_kg date_of_birth
breed       color                                             
Labrador    Brown    Bella         56         24    2013-07-01
Poodle      Black  Charlie         43         24    2016-09-16
Chow Chow   Brown     Lucy         46         24    2014-08-25
Schnauzer   Gray    Cooper         49         17    2011-12-11
Labrador    Black      Max         59         29    2017-01-20
Chihuahua   Tan     Stella         18          2    2015-04-20
St. Bernard White   Bernie         77         74    2018-02-27


In [20]:
dogs_ind3.loc[["Labrador","Chihuahua"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Bella,56,24,2013-07-01
Labrador,Black,Max,59,29,2017-01-20
Chihuahua,Tan,Stella,18,2,2015-04-20


# Subset inner levels with a list of tuples

In [21]:
dogs_ind3.loc[[("Labrador","Brown"),("Chihuahua","Tan")]]

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Labrador,Brown,Bella,56,24,2013-07-01
Chihuahua,Tan,Stella,18,2,2015-04-20


# Sorting by index values

In [22]:
dogs_ind3.sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Chihuahua,Tan,Stella,18,2,2015-04-20
Chow Chow,Brown,Lucy,46,24,2014-08-25
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Poodle,Black,Charlie,43,24,2016-09-16
Schnauzer,Gray,Cooper,49,17,2011-12-11
St. Bernard,White,Bernie,77,74,2018-02-27


In [24]:
dogs_ind3.sort_index(level=["color","breed"],ascending=[True,False])

Unnamed: 0_level_0,Unnamed: 1_level_0,name,height_cm,weight_kg,date_of_birth
breed,color,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Poodle,Black,Charlie,43,24,2016-09-16
Labrador,Black,Max,59,29,2017-01-20
Labrador,Brown,Bella,56,24,2013-07-01
Chow Chow,Brown,Lucy,46,24,2014-08-25
Schnauzer,Gray,Cooper,49,17,2011-12-11
Chihuahua,Tan,Stella,18,2,2015-04-20
St. Bernard,White,Bernie,77,74,2018-02-27
