In [2]:
import numpy as np
import pandas as pd

### Series is 1D and DataFrames are 2D objects

- But why?
- And what exactly is index?

In [3]:
# can we have multiple index? Let's try
index_val = [('cse',2019),('cse',2020),('cse',2021),('cse',2022),('ece',2019),('ece',2020),('ece',2021),('ece',2022)]
a = pd.Series([1,2,3,4,5,6,7,8],index=index_val)
a

Unnamed: 0,0
"(cse, 2019)",1
"(cse, 2020)",2
"(cse, 2021)",3
"(cse, 2022)",4
"(ece, 2019)",5
"(ece, 2020)",6
"(ece, 2021)",7
"(ece, 2022)",8


In [4]:
# The problem?
# a['cse']

KeyError: 'cse'

In [5]:
# The solution -> multiindex series(also known as Hierarchical Indexing)
# multiple index levels within a single index

In [9]:
# how to create multiindex object
# 1. pd.MultiIndex.from_tuples()
index_val = [('cse',2019),('cse',2020),('cse',2021),('cse',2022),('ece',2019),('ece',2020),('ece',2021),('ece',2022)]
multiindex = pd.MultiIndex.from_tuples(index_val)
print(multiindex.levels)
print(multiindex.levels[0])
print(multiindex.levels[1])
# 2. pd.MultiIndex.from_product()
pd.MultiIndex.from_product([['cse','ece'],[2019,2020,2021,2022]])

[['cse', 'ece'], [2019, 2020, 2021, 2022]]
Index(['cse', 'ece'], dtype='object')
Index([2019, 2020, 2021, 2022], dtype='int64')


MultiIndex([('cse', 2019),
            ('cse', 2020),
            ('cse', 2021),
            ('cse', 2022),
            ('ece', 2019),
            ('ece', 2020),
            ('ece', 2021),
            ('ece', 2022)],
           )

In [10]:
# levels inside multiindex object


In [13]:
# creating a series with multiindex object
# heirarchical index (tree structure3)
w=pd.Series([1,2,3,4,5,6,7,8],index=multiindex)

In [19]:
# how to fetch items from such series
print(w['cse',2022])
print(w[0:3])
print(w['cse'])

4
cse  2019    1
     2020    2
     2021    3
dtype: int64
2019    1
2020    2
2021    3
2022    4
dtype: int64


In [21]:
# unstack
temp=w.unstack() # :- Md series to dataframe

In [23]:
# stack :- dataframe to multiIndex
w= temp.stack()
w

Unnamed: 0,Unnamed: 1,0
cse,2019,1
cse,2020,2
cse,2021,3
cse,2022,4
ece,2019,5
ece,2020,6
ece,2021,7
ece,2022,8


In [None]:
# then whynot just use dataframe instead of multiindex series
# because dataframe is 2d only while multiindex can be of anydimension
# converting a 4d multiindex to a dataframe would be more efficient
# that's why multiindex is handy

In [24]:
# Multiindex dataframe
branch_df1 = pd.DataFrame(
    [
        [1,2],
        [3,4],
        [5,6],
        [7,8],
        [9,10],
        [11,12],
        [13,14],
        [15,16],
    ],
    index = multiindex,
    columns = ['avg_package','students']
)

branch_df1

Unnamed: 0,Unnamed: 1,avg_package,students
cse,2019,1,2
cse,2020,3,4
cse,2021,5,6
cse,2022,7,8
ece,2019,9,10
ece,2020,11,12
ece,2021,13,14
ece,2022,15,16


In [27]:
print(branch_df1.loc['cse'])
print(branch_df1['avg_package'])

      avg_package  students
2019            1         2
2020            3         4
2021            5         6
2022            7         8
cse  2019     1
     2020     3
     2021     5
     2022     7
ece  2019     9
     2020    11
     2021    13
     2022    15
Name: avg_package, dtype: int64


In [28]:
# Are columns really different from index?

In [30]:
# multiindex df from columns perpective
branch_df2 = pd.DataFrame(
    [
        [1,2,0,0],
        [3,4,0,0],
        [5,6,0,0],
        [7,8,0,0],
    ],
    index = [2019,2020,2021,2022],
    columns = pd.MultiIndex.from_product([['delhi','mumbai'],['avg_package','students']])
)

branch_df2

Unnamed: 0_level_0,delhi,delhi,mumbai,mumbai
Unnamed: 0_level_1,avg_package,students,avg_package,students
2019,1,2,0,0
2020,3,4,0,0
2021,5,6,0,0
2022,7,8,0,0


In [31]:
branch_df2['mumbai']['avg_package']

Unnamed: 0,avg_package
2019,0
2020,0
2021,0
2022,0


In [32]:
branch_df2.loc[2019]

Unnamed: 0,Unnamed: 1,2019
delhi,avg_package,1
delhi,students,2
mumbai,avg_package,0
mumbai,students,0
