In [1]:
import numpy as np
import pandas as pd

In [2]:
# can we have multiple index? Let's try
index_val = [('cse',2019),('cse',2020),('cse',2021),('cse',2022),('ece',2019),('ece',2020),('ece',2021),('ece',2022)]
a = pd.Series([1,2,3,4,5,6,7,8],index=index_val)
a

(cse, 2019)    1
(cse, 2020)    2
(cse, 2021)    3
(cse, 2022)    4
(ece, 2019)    5
(ece, 2020)    6
(ece, 2021)    7
(ece, 2022)    8
dtype: int64

In [3]:
a['cse']

KeyError: 'cse'

In [6]:
# The solution -> multiindex series(also known as Hierarchical Indexing)
# multiple index levels within a single index
# how to create multiindex object
# 1. pd.MultiIndex.from_tuples()
mlidx=pd.MultiIndex.from_tuples(index_val)

In [10]:
mlidx.levels[1],mlidx.levels[0]

(Index([2019, 2020, 2021, 2022], dtype='int64'),
 Index(['cse', 'ece'], dtype='object'))

In [13]:
# 2.pd.MultiIndex.from_product()
mlidx2=pd.MultiIndex.from_product([['cse','ece'],[2019,2034,1032,2020]])

MultiIndex([('cse', 2019),
            ('cse', 2034),
            ('cse', 1032),
            ('cse', 2020),
            ('ece', 2019),
            ('ece', 2034),
            ('ece', 1032),
            ('ece', 2020)],
           )

# Multi Indexing in Series

In [16]:
s=pd.Series([1,2,3,4,5,6,7,8],index=mlidx)
s

cse  2019    1
     2020    2
     2021    3
     2022    4
ece  2019    5
     2020    6
     2021    7
     2022    8
dtype: int64

In [17]:
# how to fetch the items from series
s['cse']

2019    1
2020    2
2021    3
2022    4
dtype: int64

In [78]:
t=s.unstack() # it will covert most inner index into columns 
t

Unnamed: 0,2019,2020,2021,2022
cse,1,2,3,4
ece,5,6,7,8


In [39]:
t.stack() # it is reversed of unstack

cse  2019    1
     2020    2
     2021    3
     2022    4
ece  2019    5
     2020    6
     2021    7
     2022    8
dtype: int64

# MiltiIndex DataFrame

In [40]:
df=pd.DataFrame([
    [1,2],
    [3,4],
    [5,6],
    [7,8],
    [9,10],
    [111,12],
    [13,14],
    [15,16]],index=mlidx
            ,columns=['age','percentage']
               )

In [41]:
df

Unnamed: 0,Unnamed: 1,age,percentage
cse,2019,1,2
cse,2020,3,4
cse,2021,5,6
cse,2022,7,8
ece,2019,9,10
ece,2020,111,12
ece,2021,13,14
ece,2022,15,16


In [42]:
df.loc['cse']

Unnamed: 0,age,percentage
2019,1,2
2020,3,4
2021,5,6
2022,7,8


In [43]:
df.loc['cse',2019]

age           1
percentage    2
Name: (cse, 2019), dtype: int64

In [44]:
df['age']

cse  2019      1
     2020      3
     2021      5
     2022      7
ece  2019      9
     2020    111
     2021     13
     2022     15
Name: age, dtype: int64

# MultiIndexing on Columns

In [96]:
ct=pd.DataFrame(
    [[1,2,3,4,5,6,7,8],
     [10,20,30,40,50,60,70,80]],
    index=['a','b'],
    columns=mlidx)

In [67]:
ct['cse'][[2020,2022]]

Unnamed: 0,2020,2022
a,2,4
b,20,40


In [69]:
ct.loc['a']

cse  2019    1
     2020    2
     2021    3
     2022    4
ece  2019    5
     2020    6
     2021    7
     2022    8
Name: a, dtype: int64

In [70]:
ct

Unnamed: 0_level_0,cse,cse,cse,cse,ece,ece,ece,ece
Unnamed: 0_level_1,2019,2020,2021,2022,2019,2020,2021,2022
a,1,2,3,4,5,6,7,8
b,10,20,30,40,50,60,70,80


# MultiIndexing with column and Index both

In [79]:
pd.DataFrame(
    [[1,2,3,4,5,6,7,8],
     [10,20,30,40,50,60,70,80],
    [100,200,300,400,500,600,700,800],
     [12,13,14,15,16,17,18,19]],
    index=pd.MultiIndex.from_product([['Solapur','Pune'],['a','b']]),
    columns=mlidx)

Unnamed: 0_level_0,Unnamed: 1_level_0,cse,cse,cse,cse,ece,ece,ece,ece
Unnamed: 0_level_1,Unnamed: 1_level_1,2019,2020,2021,2022,2019,2020,2021,2022
Solapur,a,1,2,3,4,5,6,7,8
Solapur,b,10,20,30,40,50,60,70,80
Pune,a,100,200,300,400,500,600,700,800
Pune,b,12,13,14,15,16,17,18,19


# Working with MultiIndexing Table

In [81]:
# head()
ct.head()

Unnamed: 0_level_0,cse,cse,cse,cse,ece,ece,ece,ece
Unnamed: 0_level_1,2019,2020,2021,2022,2019,2020,2021,2022
a,1,2,3,4,5,6,7,8
b,10,20,30,40,50,60,70,80


In [83]:
# shape
ct.shape

(2, 8)

In [84]:
#info
ct.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2 entries, a to b
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype
---  ------       --------------  -----
 0   (cse, 2019)  2 non-null      int64
 1   (cse, 2020)  2 non-null      int64
 2   (cse, 2021)  2 non-null      int64
 3   (cse, 2022)  2 non-null      int64
 4   (ece, 2019)  2 non-null      int64
 5   (ece, 2020)  2 non-null      int64
 6   (ece, 2021)  2 non-null      int64
 7   (ece, 2022)  2 non-null      int64
dtypes: int64(8)
memory usage: 252.0+ bytes


In [85]:
ct.duplicated()

a    False
b    False
dtype: bool

In [86]:
ct.isnull().sum()

cse  2019    0
     2020    0
     2021    0
     2022    0
ece  2019    0
     2020    0
     2021    0
     2022    0
dtype: int64

# extracting single rows

In [107]:

tp=ct.stack()
tp
# tp.loc[('a',2022)]

  tp=ct.stack()


Unnamed: 0,Unnamed: 1,cse,ece
a,2019,1,5
a,2020,2,6
a,2021,3,7
a,2022,4,8
b,2019,10,50
b,2020,20,60
b,2021,30,70
b,2022,40,80


In [109]:
# multiple rows
tp.loc[('a',2019):('b',2020):2]

Unnamed: 0,Unnamed: 1,cse,ece
a,2019,1,5
a,2021,3,7
b,2019,10,50


In [110]:
tp.iloc[0]

cse    1
ece    5
Name: (a, 2019), dtype: int64

In [111]:
tp.iloc[0:5:2]

Unnamed: 0,Unnamed: 1,cse,ece
a,2019,1,5
a,2021,3,7
b,2019,10,50


# extracting single cols

In [120]:
tp=tp.unstack().unstack()
tp['a']

Unnamed: 0,2019,2020,2021,2022
cse,1,2,3,4
ece,5,6,7,8
