# 一.索引对象Index

In [12]:
import numpy as np
import pandas as pd

### 1. Series和DataFrame中的索引都是Index对象

In [13]:
ps1 = pd.Series(range(5),index=['a','b','c','d','e'])
print(type(ps1.index))
ps1

<class 'pandas.core.indexes.base.Index'>


a    0
b    1
c    2
d    3
e    4
dtype: int32

In [14]:
pd1 = pd.DataFrame(np.arange(9).reshape(3,3),index = ['a','b','c'],columns = ['A','B','C'])
pd1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


### 2. 索引对象不可变，保证了数据的安全

In [15]:
ps.index[0] = 2
ps

NameError: name 'ps' is not defined

In [16]:
pd1.index[1] = 2

TypeError: Index does not support mutable operations

### 3.常见的Index种类
Index，索引

Int64Index，整数索引

MultiIndex，层级索引

DatetimeIndex，时间戳类型

# 二.索引的 一些基本操作
### 1.重新索引
### 2.增
### 3.删
### 4.改
### 5.查

In [17]:
#1.reindex 创建一个符合新索引的新对象
ps2 = ps1.reindex(['a','b','c','d','e','f'])
ps2

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
f    NaN
dtype: float64

In [18]:
#行索引重建
pd2 = pd1.reindex(['a','b','c','d'])
pd2

Unnamed: 0,A,B,C
a,0.0,1.0,2.0
b,3.0,4.0,5.0
c,6.0,7.0,8.0
d,,,


In [19]:
#列索引重建
pd3 = pd1.reindex(columns = ['C','B','A'])
pd3

Unnamed: 0,C,B,A
a,2,1,0
b,5,4,3
c,8,7,6


## 增

In [20]:
ps1

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [21]:
ps1['g'] = 9
ps1

a    0
b    1
c    2
d    3
e    4
g    9
dtype: int64

In [22]:
s1 = pd.Series({'f':999})
ps3 = ps1.append(s1)
ps3

a      0
b      1
c      2
d      3
e      4
g      9
f    999
dtype: int64

In [23]:
pd1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [24]:
#增加列
pd1[4] = [10,11,12]
pd1

Unnamed: 0,A,B,C,4
a,0,1,2,10
b,3,4,5,11
c,6,7,8,12


In [25]:
# 插入
pd1.insert(0,'E',[9,99,999])
pd1

Unnamed: 0,E,A,B,C,4
a,9,0,1,2,10
b,99,3,4,5,11
c,999,6,7,8,12


In [26]:
#增加行

In [27]:
#标签索引loc
pd1.loc['d'] = [1,1,1,1,1]
pd1

Unnamed: 0,E,A,B,C,4
a,9,0,1,2,10
b,99,3,4,5,11
c,999,6,7,8,12
d,1,1,1,1,1


In [28]:
row = {'E':6,'A':6,'B':6,'C':6,4:6}
pd5 = pd1.append(row,ignore_index=True)
#ignore_index 参数默认值为False，如果为True，会对新生成的dataframe使用新的索引（自动产生），忽略原来数据的索引。
pd5

Unnamed: 0,E,A,B,C,4
0,9,0,1,2,10
1,99,3,4,5,11
2,999,6,7,8,12
3,1,1,1,1,1
4,6,6,6,6,6


## 删

In [29]:
#del
ps1

a    0
b    1
c    2
d    3
e    4
g    9
dtype: int64

In [30]:
del ps1['b']
ps1

a    0
c    2
d    3
e    4
g    9
dtype: int64

In [31]:
pd1

Unnamed: 0,E,A,B,C,4
a,9,0,1,2,10
b,99,3,4,5,11
c,999,6,7,8,12
d,1,1,1,1,1


In [32]:
del pd1['E']
pd1

Unnamed: 0,A,B,C,4
a,0,1,2,10
b,3,4,5,11
c,6,7,8,12
d,1,1,1,1


In [33]:
#drop  删除轴上数据
#删除一条
ps6 = ps1.drop('g')
ps6

a    0
c    2
d    3
e    4
dtype: int64

In [34]:
#删除多条
ps1.drop(['c','d'])

a    0
e    4
g    9
dtype: int64

In [35]:
#dataframe
#删除行
pd1.drop('a')

Unnamed: 0,A,B,C,4
b,3,4,5,11
c,6,7,8,12
d,1,1,1,1


In [36]:
pd1.drop(['a','d'])

Unnamed: 0,A,B,C,4
b,3,4,5,11
c,6,7,8,12


In [37]:
#删除列
pd1.drop('A',axis=1)  #1列  0 行

Unnamed: 0,B,C,4
a,1,2,10
b,4,5,11
c,7,8,12
d,1,1,1


In [38]:
pd1.drop('A',axis='columns')

Unnamed: 0,B,C,4
a,1,2,10
b,4,5,11
c,7,8,12
d,1,1,1


In [39]:
#inplace属性   在原对象上删除，并不会返回新的对象
ps1

a    0
c    2
d    3
e    4
g    9
dtype: int64

In [40]:
ps1.drop('d',inplace=True)
ps1

a    0
c    2
e    4
g    9
dtype: int64

# 改

In [48]:
ps1 = pd.Series(range(5),index=['a','b','c','d','e'])
print(type(ps1.index))
ps1

<class 'pandas.core.indexes.base.Index'>


a    0
b    1
c    2
d    3
e    4
dtype: int32

In [49]:
pd1 = pd.DataFrame(np.arange(9).reshape(3,3),index = ['a','b','c'],columns = ['A','B','C'])
pd1

Unnamed: 0,A,B,C
a,0,1,2
b,3,4,5
c,6,7,8


In [50]:
ps1['a'] = 999
ps1

a    999
b      1
c      2
d      3
e      4
dtype: int32

In [51]:
ps1[0] = 888
ps1

a    888
b      1
c      2
d      3
e      4
dtype: int32

In [53]:
#直接使用索引
pd1['A'] = [9,10,11]
pd1

Unnamed: 0,A,B,C
a,9,1,2
b,10,4,5
c,11,7,8


In [54]:
#对象.列
pd1.A = 6
pd1

Unnamed: 0,A,B,C
a,6,1,2
b,6,4,5
c,6,7,8


In [55]:
# 变成增加列的操作
pd1['a'] = 777
pd1

Unnamed: 0,A,B,C,a
a,6,1,2,777
b,6,4,5,777
c,6,7,8,777


In [56]:
#loc 标签索引
pd1.loc['a'] =777
pd1

Unnamed: 0,A,B,C,a
a,777,777,777,777
b,6,4,5,777
c,6,7,8,777


In [57]:
pd1.loc['a','A'] = 1000
pd1

Unnamed: 0,A,B,C,a
a,1000,777,777,777
b,6,4,5,777
c,6,7,8,777


# 查

In [59]:
#Series
# 1.行索引
ps1

a    888
b      1
c      2
d      3
e      4
dtype: int32

In [60]:
ps1['a']

888

In [61]:
ps1[0]

888

In [62]:
#2.切片
# 位置切片索引
ps1[1:4]

b    1
c    2
d    3
dtype: int32

In [63]:
#标签切片   按照水印名切片操作  是包含终止索引的
ps1['b':'e']

b    1
c    2
d    3
e    4
dtype: int32

In [66]:
# 3.不连续索引
ps1[['b','e']]

b    1
e    4
dtype: int32

In [67]:
ps1[[0,2,3]]

a    888
c      2
d      3
dtype: int32

In [69]:
# 布尔索引
ps1[ps1>2]

a    888
d      3
e      4
dtype: int32

In [70]:
#dataframe
pd1

Unnamed: 0,A,B,C,a
a,1000,777,777,777
b,6,4,5,777
c,6,7,8,777


In [71]:
#1.列索引
pd1['A']

a    1000
b       6
c       6
Name: A, dtype: int64

In [80]:
#取多列
pd1[['A','C']]

Unnamed: 0,A,C
a,1000,777
b,6,5
c,6,8


In [84]:
#选取一个值
pd1['A']['a']

1000

In [82]:
#2.切片
pd1[:2]  #获取行

Unnamed: 0,A,B,C,a
a,1000,777,777,777
b,6,4,5,777


## 高级索引
1. loc 标签索引
2. iloc 位置索引
3. ix 标签与位置混合索引

In [86]:
#loc 标签索引
#loc是基于标签名的索引 自定义的索引名
ps1['a':'c']

a    888
b      1
c      2
dtype: int32

In [87]:
ps1.loc['a':'c']

a    888
b      1
c      2
dtype: int32

In [88]:
pd1

Unnamed: 0,A,B,C,a
a,1000,777,777,777
b,6,4,5,777
c,6,7,8,777


In [91]:
pd1.loc['a':'b','A':'C']  #第一个参数索引行 第二个是列

Unnamed: 0,A,B,C
a,1000,777,777
b,6,4,5


In [92]:
#2.iloc位置索引
ps1[1:3]

b    1
c    2
dtype: int32

In [93]:
ps1.iloc[1:3]

b    1
c    2
dtype: int32

In [95]:
pd1.iloc[0:2,0:3]

Unnamed: 0,A,B,C
a,1000,777,777
b,6,4,5


In [97]:
#3.ix标签与位置混合索引
ps1.ix[1:3]
ps1.ix['b':'c']

b    1
c    2
dtype: int32

In [98]:
pd1.ix[0:2,0]

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


a    1000
b       6
Name: A, dtype: int64