In [1]:
# coding:utf-8
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
# 二级索引，索引由a、b和0、1构成
# 在index中设置多个数组，构建多级索引
s = Series([1, 2, 3, 4], index=[["a", "a", "b", "b"], [0, 1, 0, 1]])
s

a  0    1
   1    2
b  0    3
   1    4
dtype: int64

In [4]:
# 隐式构造多级索引
df = DataFrame(np.random.randint(0, 100, size=(6, 3)), 
			   columns=["语文", "数学", "Python"],
			   index=[["张三", "张三", "李四", "李四", "王五", "王五"],
					  ["期中", "期末", "期中", "期末", "期中", "期末"]])
df

Unnamed: 0,Unnamed: 1,语文,数学,Python
张三,期中,43,85,1
张三,期末,60,41,46
李四,期中,27,99,35
李四,期末,22,88,45
王五,期中,27,69,58
王五,期末,56,18,54


In [5]:
# 显示构造多级行索引
df1 = DataFrame(np.random.randint(0, 150, size=(6, 3)),
				columns=["语文", "数学", "Python"],
				index=pd.MultiIndex.from_arrays(
					[["张三", "张三", "李四", "李四", "王五", "王五"],
					 ["期中", "期末", "期中", "期末", "期中", "期末"]]))
df1

Unnamed: 0,Unnamed: 1,语文,数学,Python
张三,期中,53,119,52
张三,期末,115,13,73
李四,期中,145,120,99
李四,期末,135,77,76
王五,期中,98,37,21
王五,期末,97,128,24


In [8]:
df2 = DataFrame(np.random.randint(0, 150, size=(6, 3)),
				columns=["语文", "数学", "Python"],
				index=pd.MultiIndex.from_tuples(
					[("张三", "期中"), ("张三", "期末"),
					("李四", "期中"), ("李四", "期末"),
					("王五", "期中"), ("王五", "期末")]))
df2

Unnamed: 0,Unnamed: 1,语文,数学,Python
张三,期中,72,129,27
张三,期末,80,126,148
李四,期中,73,16,101
李四,期末,71,75,20
王五,期中,109,124,16
王五,期末,103,89,61


In [9]:
df3 = DataFrame(np.random.randint(0, 150, size=(6, 3)),
				columns=["语文", "数学", "Python"],
				index=pd.MultiIndex.from_product(
					[["张三", "李四", "王五"], ["期中", "期末"]]))
df3

Unnamed: 0,Unnamed: 1,语文,数学,Python
张三,期中,100,74,17
张三,期末,93,74,122
李四,期中,36,63,129
李四,期末,106,63,90
王五,期中,147,105,141
王五,期末,33,85,135


In [10]:
# 构造多级列索引
df4 = DataFrame(np.random.randint(0, 150, size=(3, 6)),
				columns=pd.MultiIndex.from_product(
					[["语文", "数学", "Python"],
					 ["期中", "期末"]]),
				index=["张三", "李四", "王五"])
df4

Unnamed: 0_level_0,语文,语文,数学,数学,Python,Python
Unnamed: 0_level_1,期中,期末,期中,期末,期中,期末
张三,44,145,149,77,122,110
李四,11,118,144,76,123,144
王五,36,101,31,119,94,75


In [11]:
s

a  0    1
   1    2
b  0    3
   1    4
dtype: int64

In [12]:
s["a", 0]

1

In [13]:
s["a" : "b"]

a  0    1
   1    2
b  0    3
   1    4
dtype: int64

In [14]:
s.iloc[0 : 3]

a  0    1
   1    2
b  0    3
dtype: int64

In [15]:
df1

Unnamed: 0,Unnamed: 1,语文,数学,Python
张三,期中,53,119,52
张三,期末,115,13,73
李四,期中,145,120,99
李四,期末,135,77,76
王五,期中,98,37,21
王五,期末,97,128,24


In [16]:
# DataFrame切片操作
df1.loc["张三": "李四"]

Unnamed: 0,Unnamed: 1,语文,数学,Python
张三,期中,53,119,52
张三,期末,115,13,73
李四,期中,145,120,99
李四,期末,135,77,76


In [18]:
df1.iloc[0: 3]

Unnamed: 0,Unnamed: 1,语文,数学,Python
张三,期中,53,119,52
张三,期末,115,13,73
李四,期中,145,120,99


In [19]:
df1.loc["张三", "期中"]

语文         53
数学        119
Python     52
Name: (张三, 期中), dtype: int32

In [20]:
# 索引的堆
df4

Unnamed: 0_level_0,语文,语文,数学,数学,Python,Python
Unnamed: 0_level_1,期中,期末,期中,期末,期中,期末
张三,44,145,149,77,122,110
李四,11,118,144,76,123,144
王五,36,101,31,119,94,75


In [23]:
# stack()、unstack()可以进行行列转置
df4.stack()

Unnamed: 0,Unnamed: 1,Python,数学,语文
张三,期中,122,149,44
张三,期末,110,77,145
李四,期中,123,144,11
李四,期末,144,76,118
王五,期中,94,31,36
王五,期末,75,119,101


In [25]:
df2.unstack()

Unnamed: 0_level_0,语文,语文,数学,数学,Python,Python
Unnamed: 0_level_1,期中,期末,期中,期末,期中,期末
张三,72,80,129,126,27,148
李四,73,71,16,75,101,20
王五,109,103,124,89,16,61


In [30]:
# 聚合操作
# sum()默认axis=0，会对每一列进行求和
# 当axis=1时，会对每一行求和
df1.sum()

语文        643
数学        494
Python    345
dtype: int64

In [31]:
df1

Unnamed: 0,Unnamed: 1,语文,数学,Python
张三,期中,53,119,52
张三,期末,115,13,73
李四,期中,145,120,99
李四,期末,135,77,76
王五,期中,98,37,21
王五,期末,97,128,24
