In [2]:
import pandas as pd
import numpy as np

# Reindexing and altering labels
https://pandas.pydata.org/pandas-docs/stable/basics.html#basics-reindexing

reindex() is the fundamental data alignment method in pandas. It is used to implement nearly all other features relying on label-alignment functionality. To reindex means to conform the data to match a given set of labels along a particular axis. This accomplishes several things:

    - Reorders the existing data to match a new set of labels
    - Inserts missing value (NA) markers in label locations where no data for that label existed
    - If specified, fill data for missing labels using logic (highly relevant to working with time series data)

Example from pandas documentation https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reindex.html

In [3]:
index = ['Firefox', 'Chrome', 'Safari', 'IE10', 'Konqueror']
df = pd.DataFrame({
    'http_status': [200,200,404,404,301],
    'response_time': [0.04, 0.02, 0.07, 0.08, 1.0]},
    index=index)

In [4]:
df

Unnamed: 0,http_status,response_time
Firefox,200,0.04
Chrome,200,0.02
Safari,404,0.07
IE10,404,0.08
Konqueror,301,1.0


In [5]:
new_index= ['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', 'Chrome']

In [6]:
new_index

['Safari', 'Iceweasel', 'Comodo Dragon', 'IE10', 'Chrome']

In [7]:
df

Unnamed: 0,http_status,response_time
Firefox,200,0.04
Chrome,200,0.02
Safari,404,0.07
IE10,404,0.08
Konqueror,301,1.0


In [8]:
df.reindex(new_index)

Unnamed: 0,http_status,response_time
Safari,404.0,0.07
Iceweasel,,
Comodo Dragon,,
IE10,404.0,0.08
Chrome,200.0,0.02


# df.reindex() - Time Series Example

In [9]:
>>> date_index = pd.date_range('1/1/2010', periods=6, freq='D')
>>> df2 = pd.DataFrame({"prices": [100, 101, np.nan, 100, 89, 88]},
...                    index=date_index)
>>> df2

Unnamed: 0,prices
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,
2010-01-04,100.0
2010-01-05,89.0
2010-01-06,88.0


In [10]:
>>> date_index2 = pd.date_range('12/29/2009', periods=10, freq='D')
>>> df2.reindex(date_index2)

Unnamed: 0,prices
2009-12-29,
2009-12-30,
2009-12-31,
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,
2010-01-04,100.0
2010-01-05,89.0
2010-01-06,88.0
2010-01-07,


In [11]:
>>> df2.reindex(date_index2, method='bfill', limit=2)


Unnamed: 0,prices
2009-12-29,
2009-12-30,100.0
2009-12-31,100.0
2010-01-01,100.0
2010-01-02,101.0
2010-01-03,
2010-01-04,100.0
2010-01-05,89.0
2010-01-06,88.0
2010-01-07,


# df.align with another object

In [12]:
import matplotlib.pyplot as plt

In [13]:
x1 = np.arange(1,11)
x2 = np.arange(3,12)
y1 = np.sin(x1)
y2 = np.sin(x2)

In [14]:
s1 = pd.Series(y1, index=x1)
s2 = pd.Series(y2, index=x2)

In [23]:
display(s1)
display(s2)

1     0.841471
2     0.909297
3     0.141120
4    -0.756802
5    -0.958924
6    -0.279415
7     0.656987
8     0.989358
9     0.412118
10   -0.544021
dtype: float64

3     0.141120
4    -0.756802
5    -0.958924
6    -0.279415
7     0.656987
8     0.989358
9     0.412118
10   -0.544021
11   -0.999990
dtype: float64

In [20]:
s1a, s2a = s1.align(s2)

In [21]:
display(s1a)

display(s2a)

1     0.841471
2     0.909297
3     0.141120
4    -0.756802
5    -0.958924
6    -0.279415
7     0.656987
8     0.989358
9     0.412118
10   -0.544021
11         NaN
dtype: float64

1          NaN
2          NaN
3     0.141120
4    -0.756802
5    -0.958924
6    -0.279415
7     0.656987
8     0.989358
9     0.412118
10   -0.544021
11   -0.999990
dtype: float64