Outline
* Index Objects
* Reindex

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

# Index Objects

In [2]:
my_ser = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])
my_ser

A    1
B    2
C    3
D    4
dtype: int64

In [3]:
my_index = my_ser.index

In [4]:
my_index

Index([u'A', u'B', u'C', u'D'], dtype='object')

In [10]:
my_index[0]

'A'

In [7]:
my_index[2:]

Index([u'C', u'D'], dtype='object')

In [8]:
my_index[0] = 'Z'

TypeError: Index does not support mutable operations

# Reindex [API](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html)

In [11]:
from numpy.random import randn

In [12]:
ser1 = Series([1, 2, 3, 4], index=['A', 'B', 'C', 'D'])
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [13]:
# Reindex
ser2 = ser1.reindex(['A', 'B', 'C', 'D', 'E', 'F'])
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [15]:
# Reindex and Fill new index with default value
ser2.reindex(['A', 'B', 'C', 'D', 'E', 'F', 'G'], fill_value=0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [16]:
ser3 = Series(['USA', 'Mexico', 'Canada'], index=[0 ,5 ,10])
ser3

0        USA
5     Mexico
10    Canada
dtype: object

In [21]:
ranger = range(15)
ranger

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

In [22]:
# Method: ffill (forward fill)
ser3.reindex(ranger, method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

In [28]:
dframe = DataFrame(randn(25).reshape([5, 5]),
                   index=['A', 'B', 'D', 'E','F'],
                   columns=['col1', 'col2', 'col3', 'col4', 'col5'])
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.844397,1.448835,-1.108635,1.298688,0.06401
B,-0.362694,-0.164023,-1.877601,-1.17858,-1.073468
D,0.32094,0.537644,1.180497,-0.734689,1.544703
E,1.17396,0.494967,0.455954,0.05927,-2.018397
F,-0.01345,0.32186,2.798681,-1.369663,0.35774


In [29]:
dframe2 = dframe.reindex(['A', 'B', 'C', 'D', 'E', 'F']) # add index 'C'
dframe2

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.844397,1.448835,-1.108635,1.298688,0.06401
B,-0.362694,-0.164023,-1.877601,-1.17858,-1.073468
C,,,,,
D,0.32094,0.537644,1.180497,-0.734689,1.544703
E,1.17396,0.494967,0.455954,0.05927,-2.018397
F,-0.01345,0.32186,2.798681,-1.369663,0.35774


In [30]:
dframe2.reindex(columns=['col1', 'col2', 'col3', 'col4', 'col5', 'col6']) # add column 'col6'

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,-0.844397,1.448835,-1.108635,1.298688,0.06401,
B,-0.362694,-0.164023,-1.877601,-1.17858,-1.073468,
C,,,,,,
D,0.32094,0.537644,1.180497,-0.734689,1.544703,
E,1.17396,0.494967,0.455954,0.05927,-2.018397,
F,-0.01345,0.32186,2.798681,-1.369663,0.35774,


In [31]:
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,-0.844397,1.448835,-1.108635,1.298688,0.06401
B,-0.362694,-0.164023,-1.877601,-1.17858,-1.073468
D,0.32094,0.537644,1.180497,-0.734689,1.544703
E,1.17396,0.494967,0.455954,0.05927,-2.018397
F,-0.01345,0.32186,2.798681,-1.369663,0.35774


In [36]:
dframe.ix[['A', 'B', 'C', 'D', 'E', 'F'], ['col1', 'col2', 'col3', 'col4', 'col5', 'col6']]

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,-0.844397,1.448835,-1.108635,1.298688,0.06401,
B,-0.362694,-0.164023,-1.877601,-1.17858,-1.073468,
C,,,,,,
D,0.32094,0.537644,1.180497,-0.734689,1.544703,
E,1.17396,0.494967,0.455954,0.05927,-2.018397,
F,-0.01345,0.32186,2.798681,-1.369663,0.35774,
