In [2]:
#Like all data structures, sorting forms an important operation on Series and DataFrames

import pandas as pd
import numpy as np
from pandas import Series, DataFrame

series_obj = Series(np.arange(4), index=list('dbca'))
series_obj

d    0
b    1
c    2
a    3
dtype: int32

In [3]:
frame_obj = DataFrame(np.random.randn(4,3), columns=list('bde'), index=['First','Second','Third','Fourth'])
frame_obj

Unnamed: 0,b,d,e
First,1.226883,0.381171,1.086009
Second,-0.314796,-1.600729,-0.378165
Third,-0.251698,1.965119,0.348131
Fourth,0.097762,-0.202875,-0.24831


In [5]:
#Sorting items lexicographically: sort_index
#For Series
series_obj.sort_index() #sorts the values in increasing order lexicographically

a    3
b    1
c    2
d    0
dtype: int32

In [6]:
#For DataFrame
frame_obj.sort_index() #sorts the values in columns in increasing order lexicographically

Unnamed: 0,b,d,e
First,1.226883,0.381171,1.086009
Fourth,0.097762,-0.202875,-0.24831
Second,-0.314796,-1.600729,-0.378165
Third,-0.251698,1.965119,0.348131


In [7]:
#By default axis value is 0, we can apply sort function to rows by altering axis to 1
frame_obj.sort_index(axis=1)

Unnamed: 0,b,d,e
First,1.226883,0.381171,1.086009
Second,-0.314796,-1.600729,-0.378165
Third,-0.251698,1.965119,0.348131
Fourth,0.097762,-0.202875,-0.24831


In [8]:
#By default data is sorted in ascending order, for descending we simply give the function a False value
frame_obj.sort_index(axis = 1, ascending=False)

Unnamed: 0,e,d,b
First,1.086009,0.381171,1.226883
Second,-0.378165,-1.600729,-0.314796
Third,0.348131,1.965119,-0.251698
Fourth,-0.24831,-0.202875,0.097762


In [16]:
#Sorting by values : sort_values
#For Series: 
#In case of Series, we use the sort_value method to sort the Series by its values
series_obj2 = Series([5,23,5.3,-3.4,np.nan,0])
series_obj2.sort_values() #Sets the NaN values to the bottom

3    -3.4
5     0.0
0     5.0
2     5.3
1    23.0
4     NaN
dtype: float64

In [17]:
#For DataFrame: by
#In case of DataFrame, we use the by argument in sort_values to sort by values
frame_obj

Unnamed: 0,b,d,e
First,1.226883,0.381171,1.086009
Second,-0.314796,-1.600729,-0.378165
Third,-0.251698,1.965119,0.348131
Fourth,0.097762,-0.202875,-0.24831


In [18]:
frame_obj.sort_values(by= 'b') #Sorts values by column b

Unnamed: 0,b,d,e
Second,-0.314796,-1.600729,-0.378165
Third,-0.251698,1.965119,0.348131
Fourth,0.097762,-0.202875,-0.24831
First,1.226883,0.381171,1.086009


In [21]:
#For multiple columns we pass the list of columns to the function
frame_obj.sort_values(by = ['d','e'])

Unnamed: 0,b,d,e
Second,-0.314796,-1.600729,-0.378165
Fourth,0.097762,-0.202875,-0.24831
First,1.226883,0.381171,1.086009
Third,-0.251698,1.965119,0.348131


In [33]:
#Ranking in Pandas Structures

#For Series
series_obj3 = Series([2,7.2,1.3,5,-2.5,7.2,11])
series_obj3

0     2.0
1     7.2
2     1.3
3     5.0
4    -2.5
5     7.2
6    11.0
dtype: float64

In [34]:
#Rank is set on by default average value 
series_obj3.rank() 

0    3.0
1    5.5
2    2.0
3    4.0
4    1.0
5    5.5
6    7.0
dtype: float64

In [36]:
#Finding rank based on order of observed data
series_obj3.rank(method='first')

0    3.0
1    5.0
2    2.0
3    4.0
4    1.0
5    6.0
6    7.0
dtype: float64

In [37]:
#Finding rank based on highest rank in the group
series_obj3.rank(method='max')

0    3.0
1    6.0
2    2.0
3    4.0
4    1.0
5    6.0
6    7.0
dtype: float64

In [38]:
#Finding rank based on lowest rank in the group
series_obj3.rank(method='min')

0    3.0
1    5.0
2    2.0
3    4.0
4    1.0
5    5.0
6    7.0
dtype: float64

In [39]:
#By default,rank is listed based on ascending order.For descending order:
series_obj3.rank(method='first',ascending=False)

0    5.0
1    2.0
2    6.0
3    4.0
4    7.0
5    3.0
6    1.0
dtype: float64

In [42]:
#In case of DataFrame we just pass the axis value for row or column
frame_obj.rank(method='first')

Unnamed: 0,b,d,e
First,4.0,3.0,4.0
Second,1.0,1.0,1.0
Third,2.0,4.0,3.0
Fourth,3.0,2.0,2.0


In [43]:
frame_obj.rank(method='min',ascending=False,axis=1)

Unnamed: 0,b,d,e
First,1.0,3.0,2.0
Second,1.0,3.0,2.0
Third,3.0,1.0,2.0
Fourth,1.0,2.0,3.0
