In [2]:
#Create a sample DataFrame with MultiIndex
import pandas as pd

index = pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b'), ('B', 'c'), ('B', 'd')], names=['Level 1', 'Level 2'])
index

MultiIndex([('A', 'a'),
            ('A', 'b'),
            ('B', 'c'),
            ('B', 'd')],
           names=['Level 1', 'Level 2'])

In [7]:
import numpy as np

data = np.random.randn(4, 2)
df = pd.DataFrame(data, index=index, columns=['Col 1', 'Col 2'])

# Index the DataFrame using the MultiIndex
df.loc['A']
df.loc[('A', 'a')]
df.loc[('A', 'b'), 'Col 2']

df

Unnamed: 0_level_0,Unnamed: 1_level_0,Col 1,Col 2
Level 1,Level 2,Unnamed: 2_level_1,Unnamed: 3_level_1
A,a,1.592079,-2.697517
A,b,2.570915,0.784224
B,c,-0.091895,0.862597
B,d,1.241737,-0.293372


In [9]:
idx = pd.MultiIndex.from_product([['bar', 'baz', 'foo', 'qux'],['one','two']])
df = pd.DataFrame(np.random.randn(8, 2), index=idx, columns=['A', 'B'])
print (df)


                A         B
bar one -0.597478 -0.295039
    two -0.270259  0.964436
baz one -0.057240 -0.814922
    two -1.417292 -0.374904
foo one -2.609070 -0.321747
    two -0.327385  1.327334
qux one -0.728194 -1.659986
    two  0.372543 -1.186670


In [25]:
# Save dataframe to pickled pandas object
df.to_pickle(r'C:\Users\ABIRAMI\OneDrive\Documents\2023-job_search\GIT-Repos\pandas\data.plk') # where to save it usually as a .plk
# Load dataframe from pickled pandas object
df1= pd.read_pickle(r'C:\Users\ABIRAMI\OneDrive\Documents\2023-job_search\GIT-Repos\pandas\data.plk')
print(df1)
"""The .pkl format allows you to save a Python object as a binary file that can be loaded back into memory later, preserving the state of the object."""

Col_First                    A                   B          
Col_Second                   i        ii         i        ii
Row_First Row_Second                                        
bar       one        -1.172825  0.031973 -0.279515  0.269246
          two        -1.585104  0.000000  0.599287  0.000000
baz       one         1.022545 -0.938410 -0.196348 -0.971413
          two        -2.366085  0.000000 -0.794023  0.000000
foo       one         0.048697 -1.484124  0.367449  2.407361
          two         1.140819  0.000000  1.322084  0.000000
qux       one        -2.561750  0.851424  1.556351  0.960853
          two        -2.200716  0.000000 -1.360663  0.000000


'The .pkl format allows you to save a Python object as a binary file that can be loaded back into memory later, preserving the state of the object.'

In [13]:
#Create a DataFrame from a list of dictionaries
L = [{'Name': 'John', 'Last Name': 'Smith'},
 {'Name': 'Mary', 'Last Name': 'Wood'}]
pd.DataFrame(L)

Unnamed: 0,Name,Last Name
0,John,Smith
1,Mary,Wood


In [14]:
#Missing values are filled with NaNs
L = [{'Name': 'John', 'Last Name': 'Smith', 'Age': 37},
 {'Name': 'Mary', 'Last Name': 'Wood'}]
pd.DataFrame(L)


Unnamed: 0,Name,Last Name,Age
0,John,Smith,37.0
1,Mary,Wood,


In [16]:
"""Cross sections of different axes
with MultiIndex"""
arrays = [['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
 ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
idx_row = pd.MultiIndex.from_arrays(arrays, names=['Row_First', 'Row_Second'])
idx_col = pd.MultiIndex.from_product([['A','B'], ['i', 'ii']],
names=['Col_First','Col_Second'])
df = pd.DataFrame(np.random.randn(8,4), index=idx_row, columns=idx_col)
df

Unnamed: 0_level_0,Col_First,A,A,B,B
Unnamed: 0_level_1,Col_Second,i,ii,i,ii
Row_First,Row_Second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,-1.172825,0.031973,-0.279515,0.269246
bar,two,-1.585104,-2.381177,0.599287,-0.62825
baz,one,1.022545,-0.93841,-0.196348,-0.971413
baz,two,-2.366085,-1.253234,-0.794023,-1.220343
foo,one,0.048697,-1.484124,0.367449,2.407361
foo,two,1.140819,-1.902464,1.322084,2.39682
qux,one,-2.56175,0.851424,1.556351,0.960853
qux,two,-2.200716,-0.913521,-1.360663,0.250415


In [18]:
#row selection
df.xs('two', level='Row_Second', axis=0)

Col_First,A,A,B,B
Col_Second,i,ii,i,ii
Row_First,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
bar,-1.585104,-2.381177,0.599287,-0.62825
baz,-2.366085,-1.253234,-0.794023,-1.220343
foo,1.140819,-1.902464,1.322084,2.39682
qux,-2.200716,-0.913521,-1.360663,0.250415


In [19]:
#column selection
df.xs('ii', level=1, axis=1)

Unnamed: 0_level_0,Col_First,A,B
Row_First,Row_Second,Unnamed: 2_level_1,Unnamed: 3_level_1
bar,one,0.031973,0.269246
bar,two,-2.381177,-0.62825
baz,one,-0.93841,-0.971413
baz,two,-1.253234,-1.220343
foo,one,-1.484124,2.407361
foo,two,-1.902464,2.39682
qux,one,0.851424,0.960853
qux,two,-0.913521,0.250415


In [20]:
#slice in row
df.loc[(slice(None),'two'),:]

Unnamed: 0_level_0,Col_First,A,A,B,B
Unnamed: 0_level_1,Col_Second,i,ii,i,ii
Row_First,Row_Second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,two,-1.585104,-2.381177,0.599287,-0.62825
baz,two,-2.366085,-1.253234,-0.794023,-1.220343
foo,two,1.140819,-1.902464,1.322084,2.39682
qux,two,-2.200716,-0.913521,-1.360663,0.250415


In [21]:
#slice in column
df.loc[:,(slice(None),'ii')]

Unnamed: 0_level_0,Col_First,A,B
Unnamed: 0_level_1,Col_Second,ii,ii
Row_First,Row_Second,Unnamed: 2_level_2,Unnamed: 3_level_2
bar,one,0.031973,0.269246
bar,two,-2.381177,-0.62825
baz,one,-0.93841,-0.971413
baz,two,-1.253234,-1.220343
foo,one,-1.484124,2.407361
foo,two,-1.902464,2.39682
qux,one,0.851424,0.960853
qux,two,-0.913521,0.250415


In [22]:
#slice in both rows and columns
df.loc[(slice(None),'two'),(slice(None),'ii')]

Unnamed: 0_level_0,Col_First,A,B
Unnamed: 0_level_1,Col_Second,ii,ii
Row_First,Row_Second,Unnamed: 2_level_2,Unnamed: 3_level_2
bar,two,-2.381177,-0.62825
baz,two,-1.253234,-1.220343
foo,two,-1.902464,2.39682
qux,two,-0.913521,0.250415


In [23]:
#assigning values in slice which does not works with xs
df.loc[(slice(None),'two'),(slice(None),'ii')]=0
df

Unnamed: 0_level_0,Col_First,A,A,B,B
Unnamed: 0_level_1,Col_Second,i,ii,i,ii
Row_First,Row_Second,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
bar,one,-1.172825,0.031973,-0.279515,0.269246
bar,two,-1.585104,0.0,0.599287,0.0
baz,one,1.022545,-0.93841,-0.196348,-0.971413
baz,two,-2.366085,0.0,-0.794023,0.0
foo,one,0.048697,-1.484124,0.367449,2.407361
foo,two,1.140819,0.0,1.322084,0.0
qux,one,-2.56175,0.851424,1.556351,0.960853
qux,two,-2.200716,0.0,-1.360663,0.0
