In [36]:
#Now we'll learn about reindexing

In [37]:
import numpy as np
from pandas import Series, DataFrame
import pandas as pd
from numpy.random import randn

In [38]:
#Lets create a new series
ser1 = Series([1,2,3,4],index=['A','B','C','D'])

In [39]:
#Show
ser1

A    1
B    2
C    3
D    4
dtype: int64

In [40]:
#Call reindex to rearrange the data to a new index
ser2 = ser1.reindex(['A','B','C','D','E','F'])

In [41]:
#Show
ser2

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
dtype: float64

In [42]:
# We can alos fill in values for new indexes
ser2.reindex(['A','B','C','D','E','F','G'],fill_value=0)

A    1.0
B    2.0
C    3.0
D    4.0
E    NaN
F    NaN
G    0.0
dtype: float64

In [43]:
#Using a particular method for filling values
ser3 = Series(['USA','Mexico','Canada'],index=[0,5,10])

#Show
ser3

0        USA
5     Mexico
10    Canada
dtype: object

In [44]:
#Can use a forward fill for interploating values between indices 
ser3.reindex(range(15),method='ffill')

0        USA
1        USA
2        USA
3        USA
4        USA
5     Mexico
6     Mexico
7     Mexico
8     Mexico
9     Mexico
10    Canada
11    Canada
12    Canada
13    Canada
14    Canada
dtype: object

In [45]:
#Reindexing rows, columns or both

#Lets make a dataframe with some random values
dframe = DataFrame(randn(25).reshape((5,5)),index=['A','B','D','E','F'],columns=['col1','col2','col3','col4','col5'])

#Show
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,0.55898,-2.272321,0.204348,-0.771805,-0.160506
B,-0.438774,0.948207,-0.157986,-0.179256,0.964703
D,-1.466389,0.132449,0.134669,-0.819419,1.08604
E,-0.556057,-1.215295,0.267768,1.716307,-0.380185
F,-0.158158,-0.267589,-2.004589,-0.919007,0.99587


In [46]:
#Notice we forgot 'C' , lets reindex it into dframe
dframe2 = dframe.reindex(['A','B','C','D','E','F'])
dframe2


Unnamed: 0,col1,col2,col3,col4,col5
A,0.55898,-2.272321,0.204348,-0.771805,-0.160506
B,-0.438774,0.948207,-0.157986,-0.179256,0.964703
C,,,,,
D,-1.466389,0.132449,0.134669,-0.819419,1.08604
E,-0.556057,-1.215295,0.267768,1.716307,-0.380185
F,-0.158158,-0.267589,-2.004589,-0.919007,0.99587


In [47]:
# BP What if I want to avoid null values in C? This seems hacky
dframe.index.values[2] = 'C'
dframe.index.values[3] = 'D'
dframe.index.values[4] = 'E'
dframe


Unnamed: 0,col1,col2,col3,col4,col5
A,0.55898,-2.272321,0.204348,-0.771805,-0.160506
B,-0.438774,0.948207,-0.157986,-0.179256,0.964703
C,-1.466389,0.132449,0.134669,-0.819419,1.08604
D,-0.556057,-1.215295,0.267768,1.716307,-0.380185
E,-0.158158,-0.267589,-2.004589,-0.919007,0.99587


In [48]:
# BP Let's try something else! this is nice!
dframe = dframe.reindex(['A','B','D','E','F'])
dframe.set_index(pd.Index(['A','B','C','D','E']))


Unnamed: 0,col1,col2,col3,col4,col5
A,0.55898,-2.272321,0.204348,-0.771805,-0.160506
B,-0.438774,0.948207,-0.157986,-0.179256,0.964703
C,-1.466389,0.132449,0.134669,-0.819419,1.08604
D,-0.556057,-1.215295,0.267768,1.716307,-0.380185
E,-0.158158,-0.267589,-2.004589,-0.919007,0.99587


In [49]:
#Can also explicitly reindex columns
new_columns = ['col1','col2','col3','col4','col5','col6']

dframe2.reindex(columns=new_columns)

Unnamed: 0,col1,col2,col3,col4,col5,col6
A,0.55898,-2.272321,0.204348,-0.771805,-0.160506,
B,-0.438774,0.948207,-0.157986,-0.179256,0.964703,
C,,,,,,
D,-1.466389,0.132449,0.134669,-0.819419,1.08604,
E,-0.556057,-1.215295,0.267768,1.716307,-0.380185,
F,-0.158158,-0.267589,-2.004589,-0.919007,0.99587,


In [50]:
#Reindex quickly using the label-indexing with ix (we'll see this more in the future)
# BP -> pd.DataFrame.ix is not supported anymore, use pd.DataFrame.loc

#Show original
dframe

Unnamed: 0,col1,col2,col3,col4,col5
A,0.55898,-2.272321,0.204348,-0.771805,-0.160506
B,-0.438774,0.948207,-0.157986,-0.179256,0.964703
D,-1.466389,0.132449,0.134669,-0.819419,1.08604
E,-0.556057,-1.215295,0.267768,1.716307,-0.380185
F,-0.158158,-0.267589,-2.004589,-0.919007,0.99587


In [51]:
dframe2

# BP but this will not work because he did not previously overwrite the frame
dframe2 = dframe2.reindex(columns=new_columns)
dframe2.loc[['A','B','C','D','E','F'],new_columns]

Unnamed: 0,col1,col2,col3,col4,col5
A,0.55898,-2.272321,0.204348,-0.771805,-0.160506
B,-0.438774,0.948207,-0.157986,-0.179256,0.964703
C,,,,,
D,-1.466389,0.132449,0.134669,-0.819419,1.08604
E,-0.556057,-1.215295,0.267768,1.716307,-0.380185
F,-0.158158,-0.267589,-2.004589,-0.919007,0.99587
