Use the set_index() function to take a list of columns and promote those columns to an index.

In [1]:
# set_index() is destructive and doesn't keep the current index.

import pandas as pd

df = pd.read_csv('datasets/btc_usd_data.csv', index_col=0)
df.head()

Unnamed: 0_level_0,Close,Sentiment,Purchase,Interest
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
07-15-2010,0.0,0,True,4
07-16-2010,0.04951,1,False,7
07-17-2010,0.08585,1,True,3
07-18-2010,0.09307,2,True,6
07-19-2010,0.08181,1,True,2


In [6]:
# We don't want to index the DataFrame by Date, but instead by interest. But lets assume we want to keep the 
# Date for later. So let's preserve the Date into a new column. 

# We copy the indexed data into its own column
df['Date'] = df.index

# Then we set the index to another column
df = df.set_index('Interest')
df.head()

Unnamed: 0_level_0,Close,Sentiment,Purchase,Date
Interest,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4,0.0,0,True,0
7,0.04951,1,False,1
3,0.08585,1,True,2
6,0.09307,2,True,3
2,0.08181,1,True,4


In [5]:
# We can get rid of the index completely by calling the function reset_index(). This promotes
# the index into a column and creates a default numbered index.
df = df.reset_index()
df.head()

Unnamed: 0,Interest,Close,Sentiment,Purchase,Date
0,4,0.0,0,True,4
1,7,0.04951,1,False,7
2,3,0.08585,1,True,3
3,6,0.09307,2,True,6
4,2,0.08181,1,True,2


In [26]:
# Let's change datasets and look at some census data
df = pd.read_csv('datasets/census.csv')
df.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,NAME,SEX,ORIGIN,RACE,AGE,ESTIMATESBASE2020,POPESTIMATE2020,POPESTIMATE2021
0,40,3,6,1,Alabama,0,0,1,0,34988,34770,34057
1,40,3,6,1,Alabama,0,0,1,1,36181,35829,35219
2,40,3,6,1,Alabama,0,0,1,2,37465,37229,36143
3,40,3,6,1,Alabama,0,0,1,3,38422,38100,37671
4,40,3,6,1,Alabama,0,0,1,4,39384,39339,38508


In [10]:
# I want to see all the unique values of a given column
df['SUMLEV'].unique()

array([40])

In [11]:
# If we wantd to exclude SUMLEV of 50, for instance we could use
# df = df[df['SUMLEV'] == 50]

In [27]:
# Let's reduce the data 
columns_to_keep = ['REGION', 'DIVISION', 'NAME', 'POPESTIMATE2020', 'POPESTIMATE2021']
df = df[columns_to_keep]
df.head()

Unnamed: 0,REGION,DIVISION,NAME,POPESTIMATE2020,POPESTIMATE2021
0,3,6,Alabama,34770,34057
1,3,6,Alabama,35829,35219
2,3,6,Alabama,37229,36143
3,3,6,Alabama,38100,37671
4,3,6,Alabama,39339,38508


In [28]:
# We can have a dual index
df = df.set_index(['NAME', 'REGION'])
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,DIVISION,POPESTIMATE2020,POPESTIMATE2021
NAME,REGION,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,3,6,34770,34057
Alabama,3,6,35829,35219
Alabama,3,6,37229,36143
Alabama,3,6,38100,37671
Alabama,3,6,39339,38508


In [29]:
# How can we query this DataFrame?
df.loc['Hawaii', '4']

Unnamed: 0_level_0,Unnamed: 1_level_0,DIVISION,POPESTIMATE2020,POPESTIMATE2021
NAME,REGION,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Hawaii,4,9,3107,2934
Hawaii,4,9,3175,2986
Hawaii,4,9,3168,3106
Hawaii,4,9,3226,3117
Hawaii,4,9,3416,3164
Hawaii,...,...,...,...
Hawaii,4,9,24,34
Hawaii,4,9,20,21
Hawaii,4,9,12,17
Hawaii,4,9,3,13


In [30]:
# To compare in a multi-index df you can use tuples into loc.
df.loc[[('Alabama'), ('Hawaii')]]

Unnamed: 0_level_0,Unnamed: 1_level_0,DIVISION,POPESTIMATE2020,POPESTIMATE2021
NAME,REGION,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alabama,3,6,34770,34057
Alabama,3,6,35829,35219
Alabama,3,6,37229,36143
Alabama,3,6,38100,37671
Alabama,3,6,39339,38508
...,...,...,...,...
Hawaii,4,9,24,34
Hawaii,4,9,20,21
Hawaii,4,9,12,17
Hawaii,4,9,3,13
