In [1]:
import numpy as np
import pandas as pd

In [2]:
from numpy.random import randn

### Re-arrange data to a new index

In [3]:
ser1 = pd.Series([1,2,3,4],index=['A','B','C','D'])
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [4]:
ser2 = ser1.reindex(['A','B','C','D','E','F'])
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

when you reindex ser1 to ser2, notice that E and F does'nt exist in ser1, it will automatically give them null values in ser2

If you want to fill a certain value, you can pass the argument

In [5]:
ser2.reindex(['A','B','C','D','E','F','G','H'],fill_value=0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
H    0.0
dtype: float64

You can use particular methods for filling values:

In [6]:
ser3 = pd.Series(['USA','Mexico','Canada'],index=[0,5,10])
ser3

0        USA
5     Mexico
10    Canada
dtype: object

What if we want to reindex ser3 from 0 to 14, how to fill the null value

In [7]:
ranger = range(15)
ranger

range(0, 15)

In [9]:
ser3.reindex(index=ranger, method='ffill') # ffill = forward fill
# there are a lot other method that you can use, check the pandas documentation

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

### Reindex dataframe

In [10]:
randn(25)

array([-1.73019478,  0.2299739 ,  0.40253842,  0.24915913, -0.43314417,
        1.38098426, -0.73464189, -0.86162162,  1.18327149, -1.08255848,
       -1.54259696, -0.09692513, -1.2974857 , -0.28109486, -1.66564222,
       -1.14378108, -0.36339218,  0.12847718,  0.44114458, -1.24737383,
        0.83165095,  0.38407515, -0.1799437 ,  0.89183303,  0.86791026])

In [12]:
randn(5,5)

array([[ 0.04565274,  1.02646235,  1.63209007,  0.01534563, -2.0478782 ],
       [ 0.07991113,  0.59870759,  0.55905837, -0.98634555, -0.41195292],
       [-1.11208768, -1.01717154, -0.59339295, -1.53249528, -0.47345661],
       [-0.16372697,  2.07748961, -0.45123656, -0.15218516, -2.14987333],
       [ 0.07844536,  1.18019095,  0.35093867, -0.64725606, -1.41820845]])

In [13]:
df = pd.DataFrame(randn(5,5),index=['A','B','D','E','F'],
                  columns=['col1','col2','col3','col4','col5'])
df

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.492435,0.43522,1.120089,-0.077459,-0.239397
B,-0.487057,-0.965585,-2.293711,-0.243436,0.781226
D,-0.347844,0.511375,2.224218,-1.435547,0.635071
E,-0.84378,-1.070551,0.004086,0.722326,-0.596787
F,2.354108,-1.119675,-1.912513,-0.295928,0.174952


In [14]:
df2 = df.reindex(['A','B','C','D','E','F'])
df2

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.492435,0.43522,1.120089,-0.077459,-0.239397
B,-0.487057,-0.965585,-2.293711,-0.243436,0.781226
C,,,,,
D,-0.347844,0.511375,2.224218,-1.435547,0.635071
E,-0.84378,-1.070551,0.004086,0.722326,-0.596787
F,2.354108,-1.119675,-1.912513,-0.295928,0.174952


In [15]:
df2 = df.reindex(['A','B','D','C','E','F']) # Penempatan index baru menentukan letak barisnya
df2

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.492435,0.43522,1.120089,-0.077459,-0.239397
B,-0.487057,-0.965585,-2.293711,-0.243436,0.781226
D,-0.347844,0.511375,2.224218,-1.435547,0.635071
C,,,,,
E,-0.84378,-1.070551,0.004086,0.722326,-0.596787
F,2.354108,-1.119675,-1.912513,-0.295928,0.174952


In [16]:
df2 = df.reindex(['A','B','C','D','E','F'])
df2

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.492435,0.43522,1.120089,-0.077459,-0.239397
B,-0.487057,-0.965585,-2.293711,-0.243436,0.781226
C,,,,,
D,-0.347844,0.511375,2.224218,-1.435547,0.635071
E,-0.84378,-1.070551,0.004086,0.722326,-0.596787
F,2.354108,-1.119675,-1.912513,-0.295928,0.174952


In [17]:
new_columns = columns=['col1','col2','col3','col4','col5','col6']
new_columns

['col1', 'col2', 'col3', 'col4', 'col5', 'col6']

In [18]:
df2.reindex(columns=new_columns)

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,-0.492435,0.43522,1.120089,-0.077459,-0.239397,
B,-0.487057,-0.965585,-2.293711,-0.243436,0.781226,
C,,,,,,
D,-0.347844,0.511375,2.224218,-1.435547,0.635071,
E,-0.84378,-1.070551,0.004086,0.722326,-0.596787,
F,2.354108,-1.119675,-1.912513,-0.295928,0.174952,


In [19]:
df

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.492435,0.43522,1.120089,-0.077459,-0.239397
B,-0.487057,-0.965585,-2.293711,-0.243436,0.781226
D,-0.347844,0.511375,2.224218,-1.435547,0.635071
E,-0.84378,-1.070551,0.004086,0.722326,-0.596787
F,2.354108,-1.119675,-1.912513,-0.295928,0.174952


In [20]:
df2

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.492435,0.43522,1.120089,-0.077459,-0.239397
B,-0.487057,-0.965585,-2.293711,-0.243436,0.781226
C,,,,,
D,-0.347844,0.511375,2.224218,-1.435547,0.635071
E,-0.84378,-1.070551,0.004086,0.722326,-0.596787
F,2.354108,-1.119675,-1.912513,-0.295928,0.174952


In [21]:
df2.iloc[['A','B','C','D','E','F'],new_columns]
# .ix tidak bisa diapply menjadi iloc; iloc spesifik untuk index location

IndexError: .iloc requires numeric indexers, got ['A' 'B' 'C' 'D' 'E' 'F']