In [1]:
#below imports are done to facilitate better outputs
from IPython.display import display, HTML ,display_html

CSS = """
.output {
    flex-direction: row;
}
"""

HTML('<style>{}</style>'.format(CSS))

def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)

In [2]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np
# A Series is a one-dimensional array-like object containing an array of data (of any
# NumPy data type) and an associated array of data labels, called its index. The simplest
# Series is formed from only an array of data:

series = Series([1,2,3,4,5])
print(series)

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [3]:
# Since we did not specify an index for the data, a default
# one consisting of the integers 0 through N - 1 (where N is the length of the data) is
# created.

print('values:',series.values)
print('index :',series.index)

values: [1 2 3 4 5]
index : RangeIndex(start=0, stop=5, step=1)


In [4]:
#Let us now give a desired index values
series = Series([1,2,3,4,5],index = ['ONE','TWO','THREE','FOUR','FIVE'])
#Length of passed values should be equal to length of index array else otherwise it will throw a ValueError
display(series)

ONE      1
TWO      2
THREE    3
FOUR     4
FIVE     5
dtype: int64

In [5]:
#Following is how we get singular values from array
series['TWO']
#Following is how we get multiple values from array 
#(This output is not a copy but a view of the array; any changes to the same will amount to changes to original array)
series[['ONE','FOUR','FIVE']]

ONE     1
FOUR    4
FIVE    5
dtype: int64

In [6]:
# Another way to think about a Series is as a fixed-length, ordered dict, as it is a mapping
# of index values to data values.

#We can perform scalar funcitons such as multiplication, division, addition etc on these series
print(series*5,'\n')
print(series[series > 3]) #getting values based on a condition: In here it is values grater than 3

#like in a dictionary in series also it is possible to test the presence of a element
print('TWO' in series,'\n')
print('EIGHT' in series,'\n')

#'Should you have data contained in a Python dict, you can create a Series from it by passing the dict'

dictionary = {'row_label1':'column_value1','row_label2':'column_value2','row_label3':'column_value3','row_label4':'column_value4'}
#'Keys go into indexes and values in to column'
#printing with actual indexes
print(Series(dictionary),'\n')
new_index_names = ['ONE','TWO','THREE','FOUR']
#upon applying indexes that did not exists previously the output will be NaN
print(Series(dictionary,index = new_index_names),'\n')

ONE       5
TWO      10
THREE    15
FOUR     20
FIVE     25
dtype: int64 

FOUR    4
FIVE    5
dtype: int64
True 

False 

row_label1    column_value1
row_label2    column_value2
row_label3    column_value3
row_label4    column_value4
dtype: object 

ONE      NaN
TWO      NaN
THREE    NaN
FOUR     NaN
dtype: object 



In [7]:
# A critical Series feature for many applications is that it automatically aligns differently indexed
# data in arithmetic operations:

series1 = Series({'ohio':1,'newyork':2,'seattle':3,'texas':4})
series2 = Series({'newyork':2,'seattle':3,'row_label4':4,'ohio':1,})
print(series1,'\n')
print(series2,'\n')
print(series1+series2,'\n')
#Not only does automatic alignment of indexes occur but the resulting output is a unioning of indexes with 
#NaN in results that were unique to both

#A Series’s index can be altered in place by assignment:
series1.index = ['gujarat','surat','baroda','varanasi']
print(series1,'\n')

# Both the Series object itself and its index have a attribute, which integrates with name other key areas of pandas functionality:
series1.name = 'numbers'
series1.index.name = 'state'
print(series1.name,series1.index.name)

ohio       1
newyork    2
seattle    3
texas      4
dtype: int64 

newyork       2
seattle       3
row_label4    4
ohio          1
dtype: int64 

newyork       4.0
ohio          2.0
row_label4    NaN
seattle       6.0
texas         NaN
dtype: float64 

gujarat     1
surat       2
baroda      3
varanasi    4
dtype: int64 

numbers state


In [8]:
#DATAFRAME

# The DataFrame has both a row and column index; it can be
# thought of as a dict of Series (one for all sharing the same index).

dictionary = {'column1':[1,2,3,4,5,6],'column2':[6,5,4,3,2,1],'column3':[1,2,3,'a','b','c']} #arrays must all be of same length
dataframe = DataFrame(dictionary)
display(dataframe)

#If you specify a sequence of columns, the DataFrame’s columns will be exactly what you pass:
display(DataFrame(dictionary,columns = ['column1','column3']))

Unnamed: 0,column1,column2,column3
0,1,6,1
1,2,5,2
2,3,4,3
3,4,3,a
4,5,2,b
5,6,1,c


Unnamed: 0,column1,column3
0,1,1
1,2,2
2,3,3
3,4,a
4,5,b
5,6,c


In [9]:
# a data frame column can be retrieved back as a series or even as a list 

print(dataframe['column1'],type(dataframe['column1']),'\n',list(dataframe['column1']),type(list(dataframe['column1'])))

0    1
1    2
2    3
3    4
4    5
5    6
Name: column1, dtype: int64 <class 'pandas.core.series.Series'> 
 [1, 2, 3, 4, 5, 6] <class 'list'>


In [10]:
#the new series / data frame column can be applied a scalar as well as other values
display(dataframe)
dataframe['column1'] = 6.5
display(dataframe)
dataframe['column2'] = np.arange(6)
display(dataframe)

Unnamed: 0,column1,column2,column3
0,1,6,1
1,2,5,2
2,3,4,3
3,4,3,a
4,5,2,b
5,6,1,c


Unnamed: 0,column1,column2,column3
0,6.5,6,1
1,6.5,5,2
2,6.5,4,3
3,6.5,3,a
4,6.5,2,b
5,6.5,1,c


Unnamed: 0,column1,column2,column3
0,6.5,0,1
1,6.5,1,2
2,6.5,2,3
3,6.5,3,a
4,6.5,4,b
5,6.5,5,c


In [11]:
# Assigning a column that doesn’t exist will create a new column. The del keyword will
# delete columns as with a dict:

dataframe['mehul'] = 'mehul'
display(dataframe)
del dataframe['mehul']
display(dataframe)

Unnamed: 0,column1,column2,column3,mehul
0,6.5,0,1,mehul
1,6.5,1,2,mehul
2,6.5,2,3,mehul
3,6.5,3,a,mehul
4,6.5,4,b,mehul
5,6.5,5,c,mehul


Unnamed: 0,column1,column2,column3
0,6.5,0,1
1,6.5,1,2
2,6.5,2,3
3,6.5,3,a
4,6.5,4,b
5,6.5,5,c


In [12]:
#nested dictionaries to dataframe

example_dict = {'dict1':{'k1':'v1','k2':'v2','k3':'v3','k4':'v4','k6':'v6'},'dict2':{'k1':'v1','k2':'v2','k3':'v3','k4':'v4','k5':'v5'}}
example_dataframe = DataFrame(example_dict)
display(example_dataframe)
display(example_dataframe.T)  #transposing the dataframe

#What happened above? All the K values in nested dicts were indexes corresponding to which the values in 
# both nested dicts got automatically arranged to matching indexes in their respective rows.
# The ones who did not find a match in other got Nan as a result

# The keys in the inner dicts are unioned and sorted to form the index in the result

Unnamed: 0,dict1,dict2
k1,v1,v1
k2,v2,v2
k3,v3,v3
k4,v4,v4
k6,v6,
k5,,v5


Unnamed: 0,k1,k2,k3,k4,k6,k5
dict1,v1,v2,v3,v4,v6,
dict2,v1,v2,v3,v4,,v5


In [13]:
#INDEX OBJECTS

# pandas’s Index objects are responsible for holding the axis labels and other metadata
# (like the axis name or names). Any array or other sequence of labels used when constructing
# a Series or DataFrame is internally converted to an Index:

# Index objects are immutable and thus can’t be modified by the user:

print(example_dataframe.index)
try:
    example_dataframe.index[1] = 'something'   #changing index value
except Exception as e:
    print(e)
# Immutability is important so that Index objects can be safely shared among data
# structures:    

Index(['k1', 'k2', 'k3', 'k4', 'k6', 'k5'], dtype='object')
Index does not support mutable operations


In [14]:
#MORE INDEXING

#REINDEXING  SERIES

new_series = example_dataframe['dict1']
print(new_series,'\n')
print(new_series.reindex(['k1','k2','2','3','k4','k5','k6','7','8','9'],fill_value = 'LLUN'),'\n')

#fill the missing values using fill_value

new_series1 = Series(['a','s','d','f','g'],index = [1,2,3,4,5])
print(new_series1.reindex([1,2,3,4,5,6,7,8,9],method = 'ffill')) 

#forward fill, will fill the last values encounterd for an 
#index in to the new indexes encoountered during the way

# columns of a dataframe cal be reindexed too using reindex
display(example_dataframe)
display(example_dataframe.reindex(columns = ['dictionary1','dict1','disctionary2','dict2']))

k1     v1
k2     v2
k3     v3
k4     v4
k6     v6
k5    NaN
Name: dict1, dtype: object 

k1      v1
k2      v2
2     LLUN
3     LLUN
k4      v4
k5     NaN
k6      v6
7     LLUN
8     LLUN
9     LLUN
Name: dict1, dtype: object 

1    a
2    s
3    d
4    f
5    g
6    g
7    g
8    g
9    g
dtype: object


Unnamed: 0,dict1,dict2
k1,v1,v1
k2,v2,v2
k3,v3,v3
k4,v4,v4
k6,v6,
k5,,v5


Unnamed: 0,dictionary1,dict1,disctionary2,dict2
k1,,v1,,v1
k2,,v2,,v2
k3,,v3,,v3
k4,,v4,,v4
k6,,v6,,
k5,,,,v5


In [15]:
#DROPPING ROWS(AXIS = 0) AND COLUMNS (AXIS = 1)

data = DataFrame(np.arange(16).reshape((4, 4)),index=['Ohio', 'Colorado', 'Utah', 'New York'],columns=['one', 'two', 'three', 'four'])
display_side_by_side(data,data.drop(['Ohio','Utah']),data.drop('two',axis = 1))
#All the methods return new objects without afecting original frames that is why we put them in display to show effects
#dropping columns

#SELECTING DATA SETS
#loc helps with row level referencing   (ix has been deprecated and loc is used instead)
print(data.loc['Colorado', ['two', 'three']])
print(data.loc[['Colorado', 'Utah'],['three','two','one']])

#General notation is data.loc[row,column]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
New York,12,13,14,15

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


two      5
three    6
Name: Colorado, dtype: int32
          three  two  one
Colorado      6    5    4
Utah         10    9    8


In [16]:
#ARITHMATIC ALIGNMENT

data_US = DataFrame(np.arange(16).reshape((4, 4)),index=['Ohio', 'Colorado', 'Utah', 'New York'],columns=['one', 'two', 'three', 'four'])
data_IND = DataFrame(np.arange(25).reshape((5, 5)),index=['Ohio', 'Colorado', 'Utah', 'New York','Gujarat'],columns=['one', 'two', 'three', 'four','five'])
display_side_by_side(data_US,data_IND,data_US+data_IND)  #It automatically adds NaN to not applicable non-common columns/rows between the 2

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15

Unnamed: 0,one,two,three,four,five
Ohio,0,1,2,3,4
Colorado,5,6,7,8,9
Utah,10,11,12,13,14
New York,15,16,17,18,19
Gujarat,20,21,22,23,24

Unnamed: 0,five,four,one,three,two
Colorado,,15.0,9.0,13.0,11.0
Gujarat,,,,,
New York,,33.0,27.0,31.0,29.0
Ohio,,6.0,0.0,4.0,2.0
Utah,,24.0,18.0,22.0,20.0


In [17]:
#FILLING NULL VALUES METHODS
display(data_US.add(data_IND,fill_value = 0)) #replaces NaN by 0 to give Non Null results

Unnamed: 0,five,four,one,three,two
Colorado,9.0,15.0,9.0,13.0,11.0
Gujarat,24.0,23.0,20.0,22.0,21.0
New York,19.0,33.0,27.0,31.0,29.0
Ohio,4.0,6.0,0.0,4.0,2.0
Utah,14.0,24.0,18.0,22.0,20.0


In [18]:
#FILLING NULL VALUES WHILE COLUMN REINDEXATION
display(data_US.reindex(columns = data_IND.columns,fill_value = 0)) #replaces NaN of column 5 with 0 as same was absent originally

Unnamed: 0,one,two,three,four,five
Ohio,0,1,2,3,0
Colorado,4,5,6,7,0
Utah,8,9,10,11,0
New York,12,13,14,15,0


In [19]:
#ARITHMATIC OPERATION BETWEEN DATAFRAME AND SERIES
frame = data_US
series = data_US.loc['Utah'] #only the visual representation of series is confusing. It is nothing but UTah Row with those 4 columns
result = frame - series
result1 = frame.sub(series['three'],axis = 1) #a column operation requires use of methods Simply using - sign is an operator where as using sub is called as method
display_side_by_side(frame,DataFrame(series),result,result1)

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15

Unnamed: 0,Utah
one,8
two,9
three,10
four,11

Unnamed: 0,one,two,three,four
Ohio,-8,-8,-8,-8
Colorado,-4,-4,-4,-4
Utah,0,0,0,0
New York,4,4,4,4

Unnamed: 0,one,two,three,four
Ohio,-10,-9,-8,-7
Colorado,-6,-5,-4,-3
Utah,-2,-1,0,1
New York,2,3,4,5


In [20]:
#APPLYING FUNCTION
#ANONYMOUS FUNCTIONS LAMBDA
# LAMBDA VARIABLE : OPERATION

function = lambda x: x.max()+x.min()  #for a particular column we are adding its min and max values
display_side_by_side(data_IND,DataFrame(data_IND.apply(function)))

Unnamed: 0,one,two,three,four,five
Ohio,0,1,2,3,4
Colorado,5,6,7,8,9
Utah,10,11,12,13,14
New York,15,16,17,18,19
Gujarat,20,21,22,23,24

Unnamed: 0,0
one,20
two,22
three,24
four,26
five,28


In [21]:
#APPLYING FUNCITON TO EVERY ELEMENT
function_map = lambda x : x**2    #we are squaring each elemtns in their own space
display_side_by_side(data_IND,DataFrame(data_IND.applymap(function_map)))

Unnamed: 0,one,two,three,four,five
Ohio,0,1,2,3,4
Colorado,5,6,7,8,9
Utah,10,11,12,13,14
New York,15,16,17,18,19
Gujarat,20,21,22,23,24

Unnamed: 0,one,two,three,four,five
Ohio,0,1,4,9,16
Colorado,25,36,49,64,81
Utah,100,121,144,169,196
New York,225,256,289,324,361
Gujarat,400,441,484,529,576


In [22]:
#SORTING AND RANKING  A SERIES
display_side_by_side(data_IND.sort_index(),data_IND.sort_index(axis = 1)) #sorting by rows, sorting by columns
display_side_by_side(data_IND.sort_values(by = ['three','four']))  #similar to group by over more than 1 columns

Unnamed: 0,one,two,three,four,five
Colorado,5,6,7,8,9
Gujarat,20,21,22,23,24
New York,15,16,17,18,19
Ohio,0,1,2,3,4
Utah,10,11,12,13,14

Unnamed: 0,five,four,one,three,two
Ohio,4,3,0,2,1
Colorado,9,8,5,7,6
Utah,14,13,10,12,11
New York,19,18,15,17,16
Gujarat,24,23,20,22,21


Unnamed: 0,one,two,three,four,five
Ohio,0,1,2,3,4
Colorado,5,6,7,8,9
Utah,10,11,12,13,14
New York,15,16,17,18,19
Gujarat,20,21,22,23,24


In [23]:
#RANKING
seriess = Series([-7,-5,-3,1,2,4,6,8,9,9,6,0])
print(seriess.rank) #similar to kinda dense rank
print(seriess.rank(method = 'first'))   #applies rank yo every element
print(seriess.rank(ascending = False))  #descending ranking

<bound method NDFrame.rank of 0    -7
1    -5
2    -3
3     1
4     2
5     4
6     6
7     8
8     9
9     9
10    6
11    0
dtype: int64>
0      1.0
1      2.0
2      3.0
3      5.0
4      6.0
5      7.0
6      8.0
7     10.0
8     11.0
9     12.0
10     9.0
11     4.0
dtype: float64
0     12.0
1     11.0
2     10.0
3      8.0
4      7.0
5      6.0
6      4.5
7      3.0
8      1.5
9      1.5
10     4.5
11     9.0
dtype: float64


In [24]:
#DUPLICATE INDEXES
data_US = DataFrame(np.arange(16).reshape((4, 4)),index=['Ohio', 'Colorado', 'Ohio', 'New York'],columns=['one', 'two', 'three', 'four'])
print(data_US.index.is_unique)
display(data_US)
display(data_US.loc['Ohio'])   #loc is necessary for row extraction by row index

False


Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Ohio,8,9,10,11
New York,12,13,14,15


Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Ohio,8,9,10,11


In [25]:
#DESCRIPTIVE STATISTICS

frame = DataFrame([[1.4, np.nan], [7.1, -4.5],[np.nan, np.nan], [0.75, -1.3]],index=['a', 'b', 'c', 'd'],columns=['one', 'two'])

# ROW SUM 
print(frame.sum(axis = 1),type(frame.sum(axis = 1)),'\n')

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64 <class 'pandas.core.series.Series'> 



In [26]:
#COLUMN SUM
print(frame.sum(),'\n')

one    9.25
two   -5.80
dtype: float64 



In [27]:
#SUM SKIPPING NULLS
print(frame.sum(skipna = False, axis = 1),'\n')     #aggregation outputs will be a series

a     NaN
b    2.60
c     NaN
d   -0.55
dtype: float64 



In [28]:
#MAXIMUM VALUES
print(frame.idxmax(),frame.idxmax(axis = 1),'\n')   #getting column as well as row max values

one    b
two    d
dtype: object a    one
b    one
c    NaN
d    one
dtype: object 



In [29]:
#CUMULATIVE SUM
print(frame.cumsum(),'\n')

    one  two
a  1.40  NaN
b  8.50 -4.5
c   NaN  NaN
d  9.25 -5.8 



In [30]:
#OVERALL SUMMARY STATISTIC
print(frame.describe(),'\n')                        #getting basic stats such as mean, max,min,std,quantiles etc

            one       two
count  3.000000  2.000000
mean   3.083333 -2.900000
std    3.493685  2.262742
min    0.750000 -4.500000
25%    1.075000 -3.700000
50%    1.400000 -2.900000
75%    4.250000 -2.100000
max    7.100000 -1.300000 



In [31]:
#FINDING CORRELATION BETWEEN 2 COLUMNS              #getting correlation between values of 2 columns of same data frame
print(frame.one.corr(frame.two),'\n')
print(frame.one.corr(frame.one),'\n')               #frame here represents a dataframe and frame.column is a series,
#hence it is possible to find corr between columns to 2 different data sets as it is nothing but a corr between 2 columns(series)
# arranged at same index levels.

-1.0 

1.0 



In [32]:
#GETTING UNIQUE VALUES, UNIQUE COUNTS OF EACH VALUE AND THEIR MEMBERSHIP
series = Series(['c', 'a', 'd', 'a', 'a', 'b', 'b', 'c', 'c'])
print(series.unique(),'\n')
print(series.value_counts(),'\n')
print(series.isin(['b','c']),'\n')                   #this will return True in places where b and c are present and false elsewhere in teh column/series

['c' 'a' 'd' 'b'] 

c    3
a    3
b    2
d    1
dtype: int64 

0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool 



In [34]:
#DROPPING NULL VALUES
frame = DataFrame([[1.4, np.nan], [7.1, -4.5],[np.nan, np.nan], [0.75, -1.3]],index=['a', 'b', 'c', 'd'],columns=['one', 'two'])
display_side_by_side(frame,frame.dropna(),frame.isnull(),frame.notnull())
#isnull and notnull return a dataframe only but with values masked by True or False depending upon their NaN status

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3

Unnamed: 0,one,two
b,7.1,-4.5
d,0.75,-1.3

Unnamed: 0,one,two
a,False,True
b,False,False
c,True,True
d,False,False

Unnamed: 0,one,two
a,True,False
b,True,True
c,False,False
d,True,True


In [37]:
#FILLING IN NULL VALUES
display_side_by_side(frame,frame.fillna('Mehul'),frame.fillna({'one':'mehul','two':'chhaya'}))

#to fill all null values by a single value simply use fillna
#to fill null values depending upon columns pass a dictionary to fillna method with keys as column names and values to be replaced as key's values

# Remember one thing: these methods return a copy of original object but do not make changes to original frames
# to make changes to orignial frame use fillna = True as an additional condition in fillna method

#to fill values with above values use method = ffill (forward fill) as another method
#to fill values with above values but only till next few places use limit = #few as additional argument

#Null values can also be replaced by stats such as mean, median mode etc by paasing them as values: fillna(frame['column'].mean())

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3

Unnamed: 0,one,two
a,1.4,Mehul
b,7.1,-4.5
c,Mehul,Mehul
d,0.75,-1.3

Unnamed: 0,one,two
a,1.4,chhaya
b,7.1,-4.5
c,mehul,chhaya
d,0.75,-1.3


In [62]:
#HIERARCHICAL INDEX
#A TYPICAK USE CASE WOULD BE IN HIERARCHICAL CATEGORIES IN WHICH DATA MIGHT BE DIVIDED
frame = DataFrame(np.arange(16).reshape((4, 4)), index = [['a','a','d','d'],[1,2,3,4]],columns = [['a','a','d','d'],[1,2,3,4]])
print(frame.index,'\n')
display(frame)
display(frame['d'])
display(frame.unstack())                        #This type of an output is called PANEL output
display(frame.unstack().stack())

MultiIndex([('a', 1),
            ('a', 2),
            ('d', 3),
            ('d', 4)],
           ) 



Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,d,d
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,3,4
a,1,0,1,2,3
a,2,4,5,6,7
d,3,8,9,10,11
d,4,12,13,14,15


Unnamed: 0,Unnamed: 1,3,4
a,1,2,3
a,2,6,7
d,3,10,11
d,4,14,15


Unnamed: 0_level_0,a,a,a,a,a,a,a,a,d,d,d,d,d,d,d,d
Unnamed: 0_level_1,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4
Unnamed: 0_level_2,1,2,3,4,1,2,3,4,1,2,3,4,1,2,3,4
a,0.0,4.0,,,1.0,5.0,,,2.0,6.0,,,3.0,7.0,,
d,,,8.0,12.0,,,9.0,13.0,,,10.0,14.0,,,11.0,15.0


Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,d,d
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,3,4
a,1,0.0,1.0,2.0,3.0
a,2,4.0,5.0,6.0,7.0
d,3,8.0,9.0,10.0,11.0
d,4,12.0,13.0,14.0,15.0


In [63]:
#CHANGIN THE LEVEL OF INDEXING OR SAY SWAPPING THE INDEXING
display(frame.swaplevel())

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,d,d
Unnamed: 0_level_1,Unnamed: 1_level_1,1,2,3,4
1,a,0,1,2,3
2,a,4,5,6,7
3,d,8,9,10,11
4,d,12,13,14,15


In [93]:
#SUMMARY STATISTICS BY LEVEL
frame.index.set_names(['ROW 1','ROW 2'])    #setting row level labels
print(frame,'\n')
print(frame.sum(),'\n')                     #on a column level
print(frame.sum(level = 0),'\n')            #on a row level

      a       d    
      1   2   3   4
a 1   0   1   2   3
  2   4   5   6   7
d 3   8   9  10  11
  4  12  13  14  15 

a  1    24
   2    28
d  3    32
   4    36
dtype: int64 

    a       d    
    1   2   3   4
a   4   6   8  10
d  20  22  24  26 



In [99]:
#RESETTING INDEX WITH SOME COLUMN
print(frame.reset_index())
print(frame.reset_index(0))
print(frame.reset_index(1))

  level_0 level_1   a       d    
                    1   2   3   4
0       a       1   0   1   2   3
1       a       2   4   5   6   7
2       d       3   8   9  10  11
3       d       4  12  13  14  15
  level_0   a       d    
            1   2   3   4
1       a   0   1   2   3
2       a   4   5   6   7
3       d   8   9  10  11
4       d  12  13  14  15
  level_1   a       d    
            1   2   3   4
a       1   0   1   2   3
a       2   4   5   6   7
d       3   8   9  10  11
d       4  12  13  14  15
