In [1]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
from numpy.random import randn

In [3]:
#簡単なSeriesを作ってみましょう。
ser1 = Series([1,2,3,4],index=['A','B','C','D'])

In [4]:
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [5]:
#  「reindex」で「index」の値を変えれる。
ser2 = ser1.reindex(['A','B','C','D','E','F'])

In [6]:
#　E,Fは値がないので、自動的に、null。
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [7]:
# 新しいindexの値を埋めることもできます。
# 「fill.value=〇」で空白に数字を入れれる。
# E,Fは前文のコードでnullを自動的に導入されているので、空白ではない。
ser2.reindex(['A','B','C','D','E','F','G'],fill_value=0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [8]:
ser3 = Series(['USA','Mexico','Canada'],index=[0,5,10])

ser3

0        USA
5     Mexico
10    Canada
dtype: object

In [9]:
# ffillは、forward fillの略
# range(15)なので、0.5，10以外はnull値が入る。
# しかし、「method="ffill"」で、埋められる。
ser3.reindex(range(15),method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

In [10]:
# indexは行名、columnsは列名。
# reshapeを使ってDataFrameを作ってみます。

dframe = DataFrame(randn(25).reshape((5,5)),index=['A','B','D','E','F'],columns=['col1','col2','col3','col4','col5'])
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,0.154394,1.039439,0.978693,0.576016,-1.186757
B,-0.059129,0.389652,-0.043793,0.696509,0.863489
D,-0.195142,-0.551297,-1.250391,0.380576,-0.251798
E,0.456363,0.869999,0.720872,-0.654378,-0.593442
F,1.19152,1.224578,1.598917,-1.289238,-1.408856


In [11]:
# Cは何も入れてなかったので、Null値が自動では入る。
new_index = ['A','B','C','D','E','F']
dframe2 = dframe.reindex(new_index)
dframe2

Unnamed: 0,col1,col2,col3,col4,col5
A,0.154394,1.039439,0.978693,0.576016,-1.186757
B,-0.059129,0.389652,-0.043793,0.696509,0.863489
C,,,,,
D,-0.195142,-0.551297,-1.250391,0.380576,-0.251798
E,0.456363,0.869999,0.720872,-0.654378,-0.593442
F,1.19152,1.224578,1.598917,-1.289238,-1.408856


In [30]:
#列にも同じような操作ができます。
new_columns = ['col1','col2','col3','col4','col5','col6']

dframe2.reindex(columns=new_columns)

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,2.009186,-0.416911,1.209801,0.933627,-0.328955,
B,1.378455,-1.249703,-0.787076,0.150592,-0.362649,
C,,,,,,
D,-0.970608,0.553002,-1.622929,0.344363,-1.229152,
E,-0.231687,-0.9826,-0.667134,1.32602,1.452672,
F,-1.043072,-0.232878,1.277972,0.974397,1.545201,


In [31]:
# ixを使うと、素早くReindexが可能です。
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,2.009186,-0.416911,1.209801,0.933627,-0.328955
B,1.378455,-1.249703,-0.787076,0.150592,-0.362649
D,-0.970608,0.553002,-1.622929,0.344363,-1.229152
E,-0.231687,-0.9826,-0.667134,1.32602,1.452672
F,-1.043072,-0.232878,1.277972,0.974397,1.545201


In [32]:
dframe.ix[new_index, new_columns]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Unnamed: 0,col1,col2,col3,col4,col5,col6
A,2.009186,-0.416911,1.209801,0.933627,-0.328955,
B,1.378455,-1.249703,-0.787076,0.150592,-0.362649,
C,,,,,,
D,-0.970608,0.553002,-1.622929,0.344363,-1.229152,
E,-0.231687,-0.9826,-0.667134,1.32602,1.452672,
F,-1.043072,-0.232878,1.277972,0.974397,1.545201,
