# DataFrame: change index, multi-index, index-hierarchy

In [2]:
import numpy as np
import pandas as pd

# retun random numbers from "standared NORMAL distribution" centered around 0
from numpy.random import randn

np.random.seed(101)
rnd_20 = randn(5, 4)

fd = pd.DataFrame(data=rnd_20, index=["r1", "r2", "r3", "r4", "r5"], columns=["c1", "c2", "c3", "c4"])

## ------------    changing index    ------------
### reset_index

In [2]:
fd

Unnamed: 0,c1,c2,c3,c4
r1,2.70685,0.628133,0.907969,0.503826
r2,0.651118,-0.319318,-0.848077,0.605965
r3,-2.018168,0.740122,0.528813,-0.589001
r4,0.188695,-0.758872,-0.933237,0.955057
r5,0.190794,1.978757,2.605967,0.683509


In [3]:
# resetting index to default
fd.reset_index()
# notice the old-index "r1", "r2", "r3", "r4", "r5" moved to a column
# now the actual index reset 0,1,2,3,4

Unnamed: 0,index,c1,c2,c3,c4
0,r1,2.70685,0.628133,0.907969,0.503826
1,r2,0.651118,-0.319318,-0.848077,0.605965
2,r3,-2.018168,0.740122,0.528813,-0.589001
3,r4,0.188695,-0.758872,-0.933237,0.955057
4,r5,0.190794,1.978757,2.605967,0.683509


In [4]:
# Note that: it doesn't occurs "inplace"
    # to make the change, use "inplace"
fd
# use
# fd.reset_index(inplace=True)

Unnamed: 0,c1,c2,c3,c4
r1,2.70685,0.628133,0.907969,0.503826
r2,0.651118,-0.319318,-0.848077,0.605965
r3,-2.018168,0.740122,0.528813,-0.589001
r4,0.188695,-0.758872,-0.933237,0.955057
r5,0.190794,1.978757,2.605967,0.683509


### set_index

In [1]:
# Awesome trick to create a list
    # calling split() on a string!!
    # don't need to typ ',' or ""
newind = "CA NY WY OR CO".split()   # split on a blank space
newind

['CA', 'NY', 'WY', 'OR', 'CO']

In [3]:
# we insert this "newind" to our DataFrame,
    # notice the dimension must match
fd['States'] = newind
fd

Unnamed: 0,c1,c2,c3,c4,States
r1,2.70685,0.628133,0.907969,0.503826,CA
r2,0.651118,-0.319318,-0.848077,0.605965,NY
r3,-2.018168,0.740122,0.528813,-0.589001,WY
r4,0.188695,-0.758872,-0.933237,0.955057,OR
r5,0.190794,1.978757,2.605967,0.683509,CO


#### column as index: setting a column as index
    # instead of resetting we want the column "States" to be the index of our DataFrame
    # use set_index() instead of reset_index()


In [4]:
fd.set_index("States")  # overrides the old-index
# note: we need to apply 'inpalce'
# also we cannot retain information from the old-index (as in reset_index)

Unnamed: 0_level_0,c1,c2,c3,c4
States,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2.70685,0.628133,0.907969,0.503826
NY,0.651118,-0.319318,-0.848077,0.605965
WY,-2.018168,0.740122,0.528813,-0.589001
OR,0.188695,-0.758872,-0.933237,0.955057
CO,0.190794,1.978757,2.605967,0.683509
