In [6]:
import numpy as np
import pandas as pd

### 什么是Series？

Series是一种一维标记的数组型对象，能够保证任何数据类型（int，float，str，python object等类型），它包含了数据的标签，称之为索引。

Series主要由数据和索引组成，索引（index）在左，数据在右（values），并且索引是自动创建的。例如下面这个样子：

```text
     Series
-----------------
index     element
[0]       10
[1]       20
[2]       50
[3]       70
[4]       80
```

### 创建Series

In [5]:
# 方法一：通过python列表创建（自动创建索引）
s = pd.Series([1.1, 2.1, 3.2, 4.5])
print("<s>\n", s)
# pandas.core.series.Series
print("<s type>\n", type(s))

<s>
 0    1.1
1    2.1
2    3.2
3    4.5
dtype: float64
<s type>
 <class 'pandas.core.series.Series'>


In [7]:
# 通过dnarray创建（自动创建索引）
pd.Series(np.arange(0, 4))

0    0
1    1
2    2
3    3
dtype: int64

In [10]:
# 方法二：自定义创建索引
s = pd.Series([1.1, 2.1, 3.2, 4.5], index = ['a', 'b', 'c', 'd'])
print("<s>\n", s)
# 获取索引
print("<s index>\n", s.index)
print("<s index type>\n", type(s.index))   # pandas.core.indexes.base.Index
# 获取数据
print("<s values>\n", s.values)
print("<s values type>\n", type(s.values)) # numpy.ndarray

<s>
 a    1.1
b    2.1
c    3.2
d    4.5
dtype: float64
<s index>
 Index(['a', 'b', 'c', 'd'], dtype='object')
<s index type>
 <class 'pandas.core.indexes.base.Index'>
<s values>
 [1.1 2.1 3.2 4.5]
<s values type>
 <class 'numpy.ndarray'>


In [11]:
# 方法三：通过字典创建
pd.Series({
    "Name"    : "airtosupply",
    "Age"     :  24,
    "Location": "NJ"
})

Name        airtosupply
Age                  24
Location             NJ
dtype: object

In [14]:
# ⚠️ 当索引个数大于数据个数 数据会默认通过Nan进行填充
pd.Series({
    "Name"    : "airtosupply",
    "Age"     :  24,
    "Location": "NJ"
}, index = ["Name", "Age", "Location", "Sex"])

Name        airtosupply
Age                  24
Location             NJ
Sex                 NaN
dtype: object

### 基本使用

#### 检查是否有缺失值

In [20]:
location = pd.Series({"X": 3, "Y":  4}, index = ["X", "Y", "Z"])
print("<location>\n", location)

# isnull方法用于判断series中元素是否是缺失值
print("<isnull>\n", location.isnull())

# notnull方法用于判断series中元素是否不是缺失值
print("<notnull>\n", location.notnull())

<location>
 X    3.0
Y    4.0
Z    NaN
dtype: float64
<isnull>
 X    False
Y    False
Z     True
dtype: bool
<notnull>
 X     True
Y     True
Z    False
dtype: bool


#### 获取索引和数据

In [21]:
print("<location index>: ", location.index)
print("<location value>: ", location.values)

<location index>:  Index(['X', 'Y', 'Z'], dtype='object')
<location value>:  [ 3.  4. nan]


#### 切片和索引

In [23]:
weight = pd.Series({"w1": 0.3, "w2": 0.4, "w3": 0.2, "w4": 0.7, "w5": 0.1, "w6": 0.3})
print("<weight>\n", weight)

<weight>
 w1    0.3
w2    0.4
w3    0.2
w4    0.7
w5    0.1
w6    0.3
dtype: float64


In [31]:
# 通过标签取值

# 获取确定索引下标对应的值
print("<weight[1]>\n", weight[1])

# 获取多个确定索引下标对应的值
print("<weight[[2, 4]]>\n", weight[[2, 4]])

# 获取索引下标范围在[2,5)所对应的值
print("<weight[2:5]>\n", weight[2:5])

<weight[1]>
 0.4
<weight[[2, 4]]>
 w3    0.2
w5    0.1
dtype: float64
<weight[2:5]>
 w3    0.2
w4    0.7
w5    0.1
dtype: float64


In [33]:
# 通过标签名取值

# 获取确定索引名称对应的值
print("<weight['w2']>\n", weight['w2'])

# 获取多个确定索引名称对应的值
print("weight[['w2', 'w4']]>\n", weight[['w2', 'w4']])

# 获取多个确定索引名称对应的值
# ⚠️ 这里和weight[2:5]不一样的是索引名称范围是['w1':'w5']，包含'w5'这个边界值
print("weight['w1':'w5']>\n", weight['w1':'w5'])

<weight['w2']>
 0.4
weight[['w2', 'w4']]>
 w2    0.4
w4    0.7
dtype: float64
weight['w1':'w5']>
 w1    0.3
w2    0.4
w3    0.2
w4    0.7
w5    0.1
dtype: float64


#### 布尔索引

In [35]:
W = pd.Series({"w1": 0.3, "w2": 0.4, "w3": 0.2, "w4": 0.7, "w5": 0.1, "w6": 0.3})
print("<W>\n", W)

# 查询元素大于0.3的值
print("<W[W > 0.3]>\n", W[W > 0.3])

<W>
 w1    0.3
w2    0.4
w3    0.2
w4    0.7
w5    0.1
w6    0.3
dtype: float64
<W[W > 3]>
 w2    0.4
w4    0.7
dtype: float64


#### 索引和对应数据的关系不被运算所影响

In [37]:
print("<W>\n", W)
print("<W * 2>\n", W * 2)
print("<W > 0.2>\n", W > 0.2)

<W>
 w1    0.3
w2    0.4
w3    0.2
w4    0.7
w5    0.1
w6    0.3
dtype: float64
<W * 2>
 w1    0.6
w2    0.8
w3    0.4
w4    1.4
w5    0.2
w6    0.6
dtype: float64
<W > 0.2>
 w1     True
w2     True
w3    False
w4     True
w5    False
w6     True
dtype: bool


#### 快速探查数据

In [40]:
s = pd.Series([1, 4, 12, 56, 34, 12, 2])
s

0     1
1     4
2    12
3    56
4    34
5    12
6     2
dtype: int64

In [41]:
# 查看前3条数据 默认查看前5条
s.head(3)

0     1
1     4
2    12
dtype: int64

In [43]:
# 查看最后5条数据 默认查看前5条
s.tail()

2    12
3    56
4    34
5    12
6     2
dtype: int64