In [1]:
### Sorting a data set by criterion is a very important and built-in functions are available for it.
### To sort lexicographically by row or column index, we could use the sort_index method.

In [2]:
import pandas as pd
import numpy as np

In [4]:
a = np.matrix('2 24 3 4; 51 6 17 18; 69 10 61 121; 3 43 51 16')
df2 = pd.DataFrame(a, index = ['Chennai', 'Mumbai', 'Bangalore', 'Pune'], columns = ['2001','2002','2004','2005'])

In [5]:
df2

Unnamed: 0,2001,2002,2004,2005
Chennai,2,24,3,4
Mumbai,51,6,17,18
Bangalore,69,10,61,121
Pune,3,43,51,16


In [7]:
df2.sort_index()

Unnamed: 0,2001,2002,2004,2005
Bangalore,69,10,61,121
Chennai,2,24,3,4
Mumbai,51,6,17,18
Pune,3,43,51,16


In [9]:
df2.sort_index(axis=1)

Unnamed: 0,2001,2002,2004,2005
Chennai,2,24,3,4
Mumbai,51,6,17,18
Bangalore,69,10,61,121
Pune,3,43,51,16


In [10]:
df2['1995'] = [3,67,42,15]

In [11]:
df2

Unnamed: 0,2001,2002,2004,2005,1995
Chennai,2,24,3,4,3
Mumbai,51,6,17,18,67
Bangalore,69,10,61,121,42
Pune,3,43,51,16,15


In [12]:
df2.sort_index(axis=1)

Unnamed: 0,1995,2001,2002,2004,2005
Chennai,3,2,24,3,4
Mumbai,67,51,6,17,18
Bangalore,42,69,10,61,121
Pune,15,3,43,51,16


In [13]:
### Sort in descending

df2.sort_index(axis=1, ascending=False)

Unnamed: 0,2005,2004,2002,2001,1995
Chennai,4,3,24,2,3
Mumbai,18,17,6,51,67
Bangalore,121,61,10,69,42
Pune,16,51,43,3,15


In [14]:
s = pd.Series([4,-7,3,5])

In [15]:
s

0    4
1   -7
2    3
3    5
dtype: int64

In [16]:
s.order()

  if __name__ == '__main__':


1   -7
2    3
0    4
3    5
dtype: int64

In [17]:
s.sort_values()

1   -7
2    3
0    4
3    5
dtype: int64

In [18]:
### As you can see, the order() method is deprecated and hence using sort_values is recommended

In [19]:
s1 = pd.Series([5, np.nan, 7, np.nan, np.nan])

In [20]:
s1 

0    5.0
1    NaN
2    7.0
3    NaN
4    NaN
dtype: float64

In [21]:
### Any missing values are sorted to the end of the Series by default

In [22]:
s1.sort_values()

0    5.0
2    7.0
1    NaN
3    NaN
4    NaN
dtype: float64

In [23]:
df2

Unnamed: 0,2001,2002,2004,2005,1995
Chennai,2,24,3,4,3
Mumbai,51,6,17,18,67
Bangalore,69,10,61,121,42
Pune,3,43,51,16,15


In [24]:
### You may want to sort by values in one or more columns

df2.sort_index(by='2002')

  if __name__ == '__main__':


Unnamed: 0,2001,2002,2004,2005,1995
Mumbai,51,6,17,18,67
Bangalore,69,10,61,121,42
Chennai,2,24,3,4,3
Pune,3,43,51,16,15


In [25]:
### Since its deprecated, lets use sort_values() method

In [29]:
df2.sort_values(by=['2002'], axis = 0)

Unnamed: 0,2001,2002,2004,2005,1995
Mumbai,51,6,17,18,67
Bangalore,69,10,61,121,42
Chennai,2,24,3,4,3
Pune,3,43,51,16,15


In [28]:
df2

Unnamed: 0,2001,2002,2004,2005,1995
Chennai,2,24,3,4,3
Mumbai,51,6,17,18,67
Bangalore,69,10,61,121,42
Pune,3,43,51,16,15


In [30]:
### Ranking is closely related to sorting, assigning ranks from one through the number of valid data points in an array.
### It is similar to the indirect sort indices produced by numpy.arsort, except that ties are broken according to a rule.
### The rank methods for Series and DF are the place to look.
### By default, rank breaks ties by assigning each group the mean rank

In [32]:
s

0    4
1   -7
2    3
3    5
dtype: int64

In [33]:
s.rank()

0    3.0
1    1.0
2    2.0
3    4.0
dtype: float64

In [34]:
s.add([45,6,-67,12])

0    49
1    -1
2   -64
3    17
dtype: int64

In [36]:
s.rank()

0    3.0
1    1.0
2    2.0
3    4.0
dtype: float64

In [37]:
s.rank(method='first')

0    3.0
1    1.0
2    2.0
3    4.0
dtype: float64

In [40]:
s.rank(ascending=False, method='max')

0    2.0
1    4.0
2    3.0
3    1.0
dtype: float64

In [None]:
### I have no clue what ranking is for