In [2]:
import numpy as np
import pandas as pd

# Arithmetic and Data Alignment

## Sorting and Ranking

### Sorting index

In [3]:
df1 = pd.DataFrame(np.arange(12).reshape((3, 4)),
                   index=['three', 'one', 'two'],
                   columns=['c', 'a', 'b', 'd'])
df1

Unnamed: 0,c,a,b,d
three,0,1,2,3
one,4,5,6,7
two,8,9,10,11


Sort a DataFrame by its index:

In [4]:
df1.sort_index()

Unnamed: 0,c,a,b,d
one,4,5,6,7
three,0,1,2,3
two,8,9,10,11


Sort a DataFrame by columns name's in descending order.

In [5]:
df1.sort_index(axis=1, ascending=False)

Unnamed: 0,d,c,b,a
three,3,0,2,1
one,7,4,6,5
two,11,8,10,9


### Sort values

Sort a DataFrame's values by column:

In [6]:
df1.sort_values(by=['d', 'c'])

Unnamed: 0,c,a,b,d
three,0,1,2,3
one,4,5,6,7
two,8,9,10,11


In [7]:
# modify some values

df2 = df1.copy()

df2['b'][0] = 10
df2['d'][1] = 3

df2

Unnamed: 0,c,a,b,d
three,0,1,10,3
one,4,5,6,3
two,8,9,10,11


In [8]:
df2.sort_values(by=['d', 'b'])

Unnamed: 0,c,a,b,d
one,4,5,6,3
three,0,1,10,3
two,8,9,10,11


### Ranking

DataFrames can rank over rows or columns.

In [14]:
df3 = pd.DataFrame( {'foo' : [7, -5, 7, 4, 2, 0, 4, 7],
                     'bar' : [-5, 4, 2, 0, 4, 7, 7, 8],
                     'baz' : [-1, 2, 3, 0, 5, 9, 9, 5]} )
df3

Unnamed: 0,foo,bar,baz
0,7,-5,-1
1,-5,4,2
2,7,2,3
3,4,0,0
4,2,4,5
5,0,7,9
6,4,7,9
7,7,8,5


Rank a DataFrame over rows:

In [16]:
df3.rank()

Unnamed: 0,foo,bar,baz
0,7.0,1.0,1.0
1,1.0,4.5,3.0
2,7.0,3.0,4.0
3,4.5,2.0,2.0
4,3.0,4.5,5.5
5,2.0,6.5,7.5
6,4.5,6.5,7.5
7,7.0,8.0,5.5


Rank a DataFrame over columns:

In [18]:
df3

Unnamed: 0,foo,bar,baz
0,7,-5,-1
1,-5,4,2
2,7,2,3
3,4,0,0
4,2,4,5
5,0,7,9
6,4,7,9
7,7,8,5


In [17]:
df3.rank(axis=1)

Unnamed: 0,foo,bar,baz
0,3.0,1.0,2.0
1,1.0,3.0,2.0
2,3.0,1.0,2.0
3,3.0,1.5,1.5
4,1.0,2.0,3.0
5,1.0,2.0,3.0
6,1.0,2.0,3.0
7,2.0,3.0,1.0


## Axis Indexes with Duplicate Values

Select DataFrame elements

In [26]:
df4 = pd.DataFrame(np.random.randn(5, 4),
                  index=['foo', 'foo', 'bar', 'bar', 'baz'])
df4

Unnamed: 0,0,1,2,3
foo,1.159627,0.237311,1.411635,-1.291075
foo,0.620218,-0.02574,-1.792992,-0.363275
bar,0.446185,0.637069,-1.574358,-0.45109
bar,-0.787725,2.0615,1.149316,-0.441779
baz,-0.475877,0.398953,0.054429,0.077788


In [27]:
df4.loc['bar']

Unnamed: 0,0,1,2,3
bar,0.446185,0.637069,-1.574358,-0.45109
bar,-0.787725,2.0615,1.149316,-0.441779
