In [1]:
import numpy as np
import pandas as pd

# 一.索引对象Index

   1.Series和DataFrame的索引都是Index对象

In [5]:
ps = pd.Series(range(5), index = list("abcde"))
ps

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [8]:
df = pd.DataFrame(np.arange(9).reshape((3, 3)), index = list("abc"), columns = list("ABC"))
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [12]:
print("<Series Index Type> ", type(ps.index))
print("<DataFrame Index Type> ", type(df.index))
print("<DataFrame Column Index Type> ", type(df.columns))

<Series Index Type>  <class 'pandas.core.indexes.base.Index'>
<DataFrame Index Type>  <class 'pandas.core.indexes.base.Index'>
<DataFrame Column Index Type>  <class 'pandas.core.indexes.base.Index'>


2.索引对象具有不可变性

In [13]:
# ❌ Index does not support mutable operations
ps = pd.Series(range(5), index = list("abcde"))
ps.index[0] = 'd'

TypeError: Index does not support mutable operations

3.常见索引类型

(1) 索引 - Index

(2) 整数索引 - Int64Index

(3) 层级索引 - MultiIndex

(4) 时间戳索引 - DatetimeIndex

# 二.索引基本操作

### 1.重新索引

In [25]:
# reindex重新索引Series

# ⚠️
#  [1] 重新索引不会改变原来的Series所以，而是返回新的对象
#  [2] 重新索引并不是无脑替换原来的索引，和新索引之类的索引相匹配的对应的值不变，不相匹配的用Nan进行填充
#  [3] 数据类型统一变为float64类型

ps = pd.Series(range(5), index = list("abcde"))

reindex_series = ps.reindex(list("edcbaABC"))
print("<src  series>\n", ps)
print("<dest series>\n", reindex_series)

<src  series>
 a    0
b    1
c    2
d    3
e    4
dtype: int64
<dest series>
 e    4.0
d    3.0
c    2.0
b    1.0
a    0.0
A    NaN
B    NaN
C    NaN
dtype: float64


In [26]:
# reindex重新索引DataFrame行索引，原则和Series一致
df = pd.DataFrame(np.arange(9).reshape((3, 3)), index = list("abc"), columns = list("ABC"))
target_df = df.reindex(list("cbaX"))

In [27]:
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [28]:
target_df

Unnamed: 0,A,B,C
c,6.0,7.0,8.0
b,3.0,4.0,5.0
a,0.0,1.0,2.0
X,,,


In [29]:
# reindex重新索引DataFrame列索引，原则和Series一致
target_df = df.reindex(columns = list("CBAX"))
target_df

Unnamed: 0,C,B,A,X
a,2,1,0,
b,5,4,3,
c,8,7,6,


### 2.增

In [49]:
# Series增加元素(在原有对象进行更改)
ps = pd.Series(range(5), index = list("abcde"))
# 增加行标签g，对应的值为9
ps['g'] = 9
ps

a    0
b    1
c    2
d    3
e    4
g    9
dtype: int64

In [50]:
# Series增加元素(不在原有对象进行更改，生成新对象)

ps1 = ps.append(pd.Series({'f': 10}))
print("<ps>\n", ps)
print("<ps1>\n", ps1)

<ps>
 a    0
b    1
c    2
d    3
e    4
g    9
dtype: int64
<ps1>
 a     0
b     1
c     2
d     3
e     4
g     9
f    10
dtype: int64


In [62]:
# DataFrame增加元素
df = pd.DataFrame(np.arange(9).reshape((3, 3)), index = list("abc"), columns = list("ABC"))
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [63]:
# DataFrame增加列元素
# ⚠️
#   [1] DataFrame默认增加列元素
#   [2] DataFrame默认增加列是追加方式
df['X'] = 100
df

Unnamed: 0,A,B,C,X
a,0,1,2,100
b,3,4,5,100
c,6,7,8,100


In [64]:
# DataFrame增加列元素并插入到第1列
# ⚠️
#    [1] insert不能重复插入相同的列
#    [2] 在原有对象基础上进行修改 
df.insert(0, 'Y', [-1, -1, -1])
df

Unnamed: 0,Y,A,B,C,X
a,-1,0,1,2,100
b,-1,3,4,5,100
c,-1,6,7,8,100


In [65]:
# DataFrame增加行索引
# 方式一：通过loc高级索引定位行标签（在原有对象基础上进行修改）
df.loc['d'] = [0, 0, 0, 0, 0]
df

Unnamed: 0,Y,A,B,C,X
a,-1,0,1,2,100
b,-1,3,4,5,100
c,-1,6,7,8,100
d,0,0,0,0,0


In [67]:
# DataFrame增加行索引
# 方式二：通过append方法添加一个字段元素
# ⚠️
#   [1] 不会修改原有对象
#   [2] 如果原有对象自定义了行标签索引，这里需要忽略行标签
df.append({'Y': 1, "A": 1, "B": 1, "C": 1, "X": 1}, ignore_index = True)

Unnamed: 0,Y,A,B,C,X
0,-1,0,1,2,100
1,-1,3,4,5,100
2,-1,6,7,8,100
3,0,0,0,0,0
4,1,1,1,1,1


### 3.删

In [72]:
# 删除Series中元素(在原有对象基础上进行修改)
ps = pd.Series(range(5), index = list("abcde"))
del ps['e']
ps

a    0
b    1
c    2
d    3
dtype: int64

In [73]:
# 删除DataFrame中元素
df = pd.DataFrame(np.arange(9).reshape((3, 3)), index = list("abc"), columns = list("ABC"))
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [74]:
# 删除对应列（默认方式）
# (在原有对象基础上进行修改)
del df['C']
df

Unnamed: 0,A,B
a,0,1
b,3,4
c,6,7


#### 通过drop函数删除元素

⚠️ 默认不修改原有对象生成新对象

In [76]:
# 删除Series对应标签对应的元素
ps = pd.Series(range(5), index = list("abcde"))
ps.drop("e")

a    0
b    1
c    2
d    3
dtype: int64

In [78]:
# 删除Series对应多个标签对应的元素
ps.drop(['a', 'b'])

c    2
d    3
e    4
dtype: int64

In [79]:
# 删除DataFrame行
df = pd.DataFrame(np.arange(9).reshape((3, 3)), index = list("abc"), columns = list("ABC"))
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [80]:
# 默认轴axis=0 代表删除一行
df.drop("c")

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5


In [82]:
# 设置轴axis=1 代表删除一列
df.drop(['A', 'B'], axis = 1)

Unnamed: 0,C
a,2
b,5
c,8


In [84]:
# 如何通过drop函数在原有对象上进行修改
# ⚠️
#    通过设置inplace为True
df = pd.DataFrame(np.arange(9).reshape((3, 3)), index = list("abc"), columns = list("ABC"))
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [85]:
df.drop(['a', 'b'], inplace = True)
df

Unnamed: 0,A,B,C
c,6,7,8


### 4.改

In [87]:
# 修改Series元素
ps = pd.Series(range(5), index = list("abcde"))
ps

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [88]:
# 通过标签名修改(在原有对象基础上进行修改)
ps['a'] = 99
ps

a    99
b     1
c     2
d     3
e     4
dtype: int64

In [89]:
# 通过索引修改(在原有对象基础上进行修改)
ps[4] = 99
ps

a    99
b     1
c     2
d     3
e    99
dtype: int64

In [90]:
# 修改DataFrame元素
df = pd.DataFrame(np.arange(9).reshape((3, 3)), index = list("abc"), columns = list("ABC"))
df

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [93]:
# 修改列(在原有对象基础上进行修改)
df['A'] = [-1, -2, -3]
df

Unnamed: 0,A,B,C
a,-1,1,2
b,-2,4,5
c,-3,7,8


In [94]:
# 修改行(在原有对象基础上进行修改)
# 通过loc标签索引修改
df.loc["c"] = [0, 0, 0]
df

Unnamed: 0,A,B,C
a,-1,1,2
b,-2,4,5
c,0,0,0


In [95]:
# 修改值
# 通过loc高级索引修改
df.loc["c", "C"] = 100
df

Unnamed: 0,A,B,C
a,-1,1,2
b,-2,4,5
c,0,0,100
