In [1]:
import pandas as pd
import numpy as np

'''Sorting a dataset by some criterion is another important built-in operation. To sort
lexicographically by row or column index, use the sort_index method, which returns
a new, sorted object'''

obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [6]:
'''With a DataFrame, you can sort by index on either axis'''

df = pd.DataFrame(np.arange(8).reshape((2, 4)),index=['three', 'one'],
                     columns=['d', 'a', 'b', 'c'])
df.sort_index() 
# or df.sort_index(axis=0)

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [5]:
df.sort_index(axis=1)

Unnamed: 0,a,b,c,d
three,1,2,3,0
one,5,6,7,4


In [7]:
df.sort_index(axis=1,ascending=False)

Unnamed: 0,d,c,b,a
three,0,3,2,1
one,4,7,6,5


In [8]:
# Sort a serries by values
obj.sort_values()

d    0
a    1
b    2
c    3
dtype: int64

In [9]:
obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])
obj.sort_values()

4   -3.0
5    2.0
0    4.0
2    7.0
1    NaN
3    NaN
dtype: float64

In [17]:
'''
When sorting a DataFrame, you can use the data in one or more columns as the sort
keys. To do so, pass one or more column names to the by option of sort_values.
'''
df = pd.DataFrame({'b': [4, 7, -3, 2,5], 'a': [0, 1, 0, 1,0]})
df

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1
4,5,0


In [23]:
df.sort_values(by='b')
df.sort_values(by=['a','b'])

Unnamed: 0,b,a
2,-3,0
0,4,0
4,5,0
3,2,1
1,7,1


In [25]:
'''
Ranking assigns ranks from one through the number of valid data points in an array.
The rank methods for Series and DataFrame are the place to look; by default rank
breaks ties by assigning each group the mean rank
'''
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [26]:
'''
Ranks can also be assigned according to the order in which they’re observed in the
data
'''
obj.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [27]:
obj.rank(ascending=False, method='max')

0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64

In [28]:
frame = pd.DataFrame({'b': [4.3, 7, -3, 2], 'a': [0, 1, 0, 1],'c': [-2, 5, 8, -2.5]})
frame.rank(axis='columns')

Unnamed: 0,b,a,c
0,3.0,2.0,1.0
1,3.0,1.0,2.0
2,1.0,2.0,3.0
3,3.0,2.0,1.0
