Outline
* Data Alignment
* Rank and Sort

In [2]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# Data Alignment

### Series

In [5]:
ser1 = Series([0, 1, 2], index=['A', 'B', 'C'])
ser1

A    0
B    1
C    2
dtype: int64

In [6]:
ser2 = Series([3, 4, 5, 6], index=['A', 'B', 'C', 'D'])
ser2

A    3
B    4
C    5
D    6
dtype: int64

In [7]:
ser1 + ser2

A    3.0
B    5.0
C    7.0
D    NaN
dtype: float64

### Dataframe

In [14]:
dframe1 = DataFrame(np.arange(4).reshape([2, 2]),
                    columns=list('AB'),
                    index=['NYC', 'LA'])
dframe1

Unnamed: 0,A,B
NYC,0,1
LA,2,3


In [15]:
dframe2 = DataFrame(np.arange(9).reshape([3, 3]),
                    columns=list('ADC'),
                    index=['NYC', 'SF', 'LA'])
dframe2

Unnamed: 0,A,D,C
NYC,0,1,2
SF,3,4,5
LA,6,7,8


In [16]:
dframe1 + dframe2

Unnamed: 0,A,B,C,D
LA,8.0,,,
NYC,0.0,,,
SF,,,,


In [17]:
# dframe1 + dframe2 and replace no value with default value
dframe1.add(dframe2, fill_value=0)

# dframe1和dframe2都沒有SF: B

Unnamed: 0,A,B,C,D
LA,8.0,3.0,8.0,7.0
NYC,0.0,1.0,2.0,1.0
SF,3.0,,5.0,4.0


In [34]:
ser3 = dframe2.ix[0]
ser3

A    0
D    1
C    2
Name: NYC, dtype: int64

In [35]:
dframe2 - ser3

# Column A全部 - 0
# Column B全部 - 1
# Column C全部 - 2

Unnamed: 0,A,D,C
NYC,0,0,0
SF,3,3,3
LA,6,6,6


In [36]:
dframe2

Unnamed: 0,A,D,C
NYC,0,1,2
SF,3,4,5
LA,6,7,8


In [37]:
ser4 = dframe2['A']
ser4

NYC    0
SF     3
LA     6
Name: A, dtype: int64

In [41]:
dframe2.sub(ser4, axis=0) # 0: index

# Index NYC全部 - 0
# Index SF全部 - 3
# Index LA全部 - 6

Unnamed: 0,A,D,C
NYC,0,1,2
SF,0,1,2
LA,0,1,2


# Rank and Sort

In [43]:
ser1 = Series(range(3), index=['C', 'A', 'B'])
ser1

C    0
A    1
B    2
dtype: int64

### Sorting

In [44]:
# Sort index
ser1.sort_index()

A    1
B    2
C    0
dtype: int64

In [46]:
# Sort value
ser1.sort_values()

C    0
A    1
B    2
dtype: int64

### Ranking

In [47]:
from numpy.random import randn

In [54]:
ser2 = Series(randn(10))
ser2

0   -0.533031
1   -0.677057
2   -1.481686
3   -1.288890
4    1.100107
5   -1.353441
6    0.374450
7   -0.414744
8   -0.409029
9    2.645407
dtype: float64

In [55]:
# Ranking
ser2.rank()

0     5.0
1     4.0
2     1.0
3     3.0
4     9.0
5     2.0
6     8.0
7     6.0
8     7.0
9    10.0
dtype: float64

In [66]:
# 先sort過一遍
ser2 = ser2.sort_values()
ser2

2   -1.481686
5   -1.353441
3   -1.288890
1   -0.677057
0   -0.533031
7   -0.414744
8   -0.409029
6    0.374450
4    1.100107
9    2.645407
dtype: float64

In [67]:
# 再check ranking
ser2.rank()

2     1.0
5     2.0
3     3.0
1     4.0
0     5.0
7     6.0
8     7.0
6     8.0
4     9.0
9    10.0
dtype: float64