In [1]:
import pandas as pd
import numpy as np

## Series in 1D and DataFrames are 2D Objects

-  But Why?
- and what exactly is index?

In [6]:
# can we have multiple index?
index_val = [('cse', 2019),
            ('cse', 2020),
            ('cse', 2021),
            ('cse', 2022),
            ('ece', 2019),
            ('ece', 2020),
            ('ece', 2021),
            ('ece', 2020)]
a = pd.Series(range(1, 9), index = index_val)
a

(cse, 2019)    1
(cse, 2020)    2
(cse, 2021)    3
(cse, 2022)    4
(ece, 2019)    5
(ece, 2020)    6
(ece, 2021)    7
(ece, 2020)    8
dtype: int64

In [7]:
# The problem
a['cse']

KeyError: 'cse'

In [9]:
# The solution -> multi index series(also known as hierarchical indexing)
# multiple index levels within a single index

In [14]:
# 1. pd.MultiIndex.from_tuple()
index_val = [('cse', 2019),
            ('cse', 2020),
            ('cse', 2021),
            ('cse', 2022),
            ('ece', 2019),
            ('ece', 2020),
            ('ece', 2021),
            ('ece', 2020)]

multiindex = pd.MultiIndex.from_tuples(index_val)
# multiindex is a multi index object which as the capability of decouple
multiindex.levels

FrozenList([['cse', 'ece'], [2019, 2020, 2021, 2022]])

In [17]:
# 2. pd.MultiIndex.from_product()
pd.MultiIndex.from_product([['cse', 'ece'], [2019, 2020, 2021, 2022]])
s=pd.Series(range(1, 9), index = multiindex)

In [19]:
# how to fetch items from such a series
s[('ece', 2019)]

  s[('ece', 2019)]


ece  2019    5
dtype: int64

In [20]:
s['cse']

2019    1
2020    2
2021    3
2022    4
dtype: int64

In [30]:
# How to convert multistack series to dataframe? using unstack
# converts innermost index to columns
# s.unstack()


In [29]:
# stack
# temp.stack()
s

cse  2019    1
     2020    2
     2021    3
     2022    4
ece  2019    5
     2020    6
     2021    7
     2020    8
dtype: int64

Multi indexing helps to represent higher dimesnion object to lower dimension object.

In [31]:
branch_df1 = pd.DataFrame([[9, 10],
                          [10, 11],
                          [9, 8],
                          [7, 10],
                          [3, 6],
                          [5, 10],
                          [7, 19],
                          [7, 11]],
                         index = multiindex,
                         columns=['avg_package', 'students'])

In [32]:
branch_df1

Unnamed: 0,Unnamed: 1,avg_package,students
cse,2019,9,10
cse,2020,10,11
cse,2021,9,8
cse,2022,7,10
ece,2019,3,6
ece,2020,5,10
ece,2021,7,19
ece,2020,7,11


In [34]:
branch_df1.loc['cse']

Unnamed: 0,avg_package,students
2019,9,10
2020,10,11
2021,9,8
2022,7,10


In [35]:
branch_df1.loc[:, 'avg_package']

cse  2019     9
     2020    10
     2021     9
     2022     7
ece  2019     3
     2020     5
     2021     7
     2020     7
Name: avg_package, dtype: int64