In [21]:
import pandas as pd

# Series重置索引

## 创建Series对象

In [22]:
s1 = pd.Series(
    data=[88, 60, 75],
    index=[1, 2, 3])
s1

1    88
2    60
3    75
dtype: int64

## 重置索引操作
- 新索引：[1, 2, 3, 4, 5]
- 新索引的长度大于原索引，则以NaN填充

In [23]:
s2 = s1.reindex([1, 2, 3, 4, 5])
s2

1    88.0
2    60.0
3    75.0
4     NaN
5     NaN
dtype: float64

In [24]:
s2 = s1.reindex([1, 2, 3, 4, 5], fill_value=0)
s2

1    88
2    60
3    75
4     0
5     0
dtype: int64

In [25]:
s2 = s1.reindex([1, 2, 3, 4, 5], method='ffill')
s2

1    88
2    60
3    75
4    75
5    75
dtype: int64

In [26]:
s2 = s1.reindex([1, 2, 3, 4, 5], method='bfill')
s2

1    88.0
2    60.0
3    75.0
4     NaN
5     NaN
dtype: float64

In [27]:
s2 = s1.reindex([1, 2, 3, 4, 5], method='nearest')
s2

1    88
2    60
3    75
4    75
5    75
dtype: int64

# DataFrame重置索引

## 创建DataFrame对象

In [18]:
data = [[110, 105, 99],
        [105, 88, 115],
        [109, 120, 130]]
index = ['A001', 'A003', 'A005']
columns = ['语文', '数学', '英语']
df = pd.DataFrame(
    data=data,
    index=index,
    columns=columns)
df

Unnamed: 0,语文,数学,英语
A001,110,105,99
A003,105,88,115
A005,109,120,130


## 重置行列索引

In [20]:
index = ['A001', 'A002', 'A003', 'A004', 'A005']
columns = ['语文', '数学', '英语', '化学']
tb = df.reindex(index=index, columns=columns, fill_value=0)
tb

Unnamed: 0,语文,数学,英语,化学
A001,110,105,99,0
A002,0,0,0,0
A003,105,88,115,0
A004,0,0,0,0
A005,109,120,130,0


In [31]:
# set_index()
df = pd.read_excel('./data/demo_08.xlsx')
df = df.set_index('用户ID')
df

Unnamed: 0_level_0,付款金额,产品数量,产品名称,类别,订单付款时间
用户ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A001,24179.75,2.0,产品A,优选,2021-10-09 22:54:26
A002,13277.8,1.0,产品B,,2021-10-09 22:52:42
A003,8232.91,1.0,产品C,优选,2021-01-19 12:53:01
A004,13774.875,,产品D,优选,2021-06-30 11:46:14
A005,50381.5,1.0,产品E,,2021-03-23 18:25:45
A006,7053.41,1.0,产品F,普通,2021-03-24 19:25:45
A007,9412.41,1.0,产品G,普通,2021-03-25 11:00:45
A008,7053.41,,产品H,优选,2021-03-26 23:11:11
A009,7053.41,1.0,产品K,优选,2021-03-27 07:25:30
A010,5873.91,1.0,产品Q,优选,2021-03-28 18:09:12


In [35]:
# reset_index()
df = pd.read_excel('./data/demo_08.xlsx')
df = df.dropna().reset_index(drop=True)
df

Unnamed: 0,用户ID,付款金额,产品数量,产品名称,类别,订单付款时间
0,A001,24179.75,2.0,产品A,优选,2021-10-09 22:54:26
1,A003,8232.91,1.0,产品C,优选,2021-01-19 12:53:01
2,A006,7053.41,1.0,产品F,普通,2021-03-24 19:25:45
3,A007,9412.41,1.0,产品G,普通,2021-03-25 11:00:45
4,A009,7053.41,1.0,产品K,优选,2021-03-27 07:25:30
5,A010,5873.91,1.0,产品Q,优选,2021-03-28 18:09:12
6,A001,24179.75,2.0,产品A,优选,2021-10-09 22:54:26


# 总结

- reindex: 对索引重新赋值
- set_index: 设置某列为索引
- reset_index: 重置索引从0开始