In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pylab as plt
import scipy as sp

In [5]:
# df 만들기
idx = ["r0", "r1", "r2", "r3", "r4"]
np.random.seed(0)
df = pd.DataFrame(
    {
        "c1": np.arange(5),
        "c2": np.random.randn(5)
    },
    index=idx 
)
df

Unnamed: 0,c1,c2
r0,0,1.764052
r1,1,0.400157
r2,2,0.978738
r3,3,2.240893
r4,4,1.867558


In [10]:
# reindex : index 재설정
# r3, r4 행을 없애고 r5, r6 행으로 채우고 싶다.
new_idx = ["r0", "r1", "r2", "r5", "r6"]
df1 = df.reindex(new_idx)
df1

Unnamed: 0,c1,c2
r0,0.0,1.764052
r1,1.0,0.400157
r2,2.0,0.978738
r5,,
r6,,


In [16]:
# reindex 과정에서 생긴 결측값 채우기
# fill_value : 결측값을 원하는 값으로 바꿔줌
df2 = df.reindex(new_idx, fill_value=0)
df2

Unnamed: 0,c1,c2
r0,0,1.764052
r1,1,0.400157
r2,2,0.978738
r5,0,0.0
r6,0,0.0


In [20]:
df.reindex(new_idx, fill_value="missing")

Unnamed: 0,c1,c2
r0,0,1.76405
r1,1,0.400157
r2,2,0.978738
r5,missing,missing
r6,missing,missing


In [22]:
df.reindex(new_idx, fill_value="-")

Unnamed: 0,c1,c2
r0,0,1.76405
r1,1,0.400157
r2,2,0.978738
r5,-,-
r6,-,-


In [25]:
# time series
idx = pd.date_range("2017/08/01", periods=5, freq='D')
print(idx)

df = pd.DataFrame(
    {
        "c1": [x for x in range(10, 60, 10)],
        "c2": np.arange(5)
    },
    index=idx
)
df

DatetimeIndex(['2017-08-01', '2017-08-02', '2017-08-03', '2017-08-04',
               '2017-08-05'],
              dtype='datetime64[ns]', freq='D')


Unnamed: 0,c1,c2
2017-08-01,10,0
2017-08-02,20,1
2017-08-03,30,2
2017-08-04,40,3
2017-08-05,50,4


In [28]:
# reindex of TimeSeries Data
idx2 = pd.date_range("07/30/2017", periods=10, freq="D")
df.reindex(idx2)

Unnamed: 0,c1,c2
2017-07-30,,
2017-07-31,,
2017-08-01,10.0,0.0
2017-08-02,20.0,1.0
2017-08-03,30.0,2.0
2017-08-04,40.0,3.0
2017-08-05,50.0,4.0
2017-08-06,,
2017-08-07,,
2017-08-08,,


In [34]:
# method=ffill : fill in missing value of TimeSeries Data
df.reindex(idx2, method="ffill")

Unnamed: 0,c1,c2
2017-07-30,,
2017-07-31,,
2017-08-01,10.0,0.0
2017-08-02,20.0,1.0
2017-08-03,30.0,2.0
2017-08-04,40.0,3.0
2017-08-05,50.0,4.0
2017-08-06,50.0,4.0
2017-08-07,50.0,4.0
2017-08-08,50.0,4.0


In [35]:
# method=bfill
df.reindex(idx2, method="bfill")

Unnamed: 0,c1,c2
2017-07-30,10.0,0.0
2017-07-31,10.0,0.0
2017-08-01,10.0,0.0
2017-08-02,20.0,1.0
2017-08-03,30.0,2.0
2017-08-04,40.0,3.0
2017-08-05,50.0,4.0
2017-08-06,,
2017-08-07,,
2017-08-08,,


In [36]:
# method=nearest
df.reindex(idx2, method="nearest")

Unnamed: 0,c1,c2
2017-07-30,10,0
2017-07-31,10,0
2017-08-01,10,0
2017-08-02,20,1
2017-08-03,30,2
2017-08-04,40,3
2017-08-05,50,4
2017-08-06,50,4
2017-08-07,50,4
2017-08-08,50,4
