In [18]:
print("""
@Description: The Series object
@Author(s): Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime: 2023-06-10 15:39:11
""")


@Description: The Series object
@Author(s): Stephen CUI
@LastEditor(s): Stephen CUI
@CreatedTime: 2023-06-10 15:39:11



# The Series object

- Instantiating Series objects from lists, dictionaries, tuples, and more
- Setting a custom index on a Series
- Accessing attributes and invoking methods on a Series
- Performing mathematical operations on one or more Series
- Passing the Series to Python’s built-in functions

In [19]:
import pandas as pd
import numpy as np

In [20]:
pd.Series()

Series([], dtype: object)

In [21]:
ice_cream_flavors = [
    'Chocolate', 'Vanilla', 'Strawberry', 'Rum Raisin'
]
pd.Series(ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [22]:
day_of_week = ['Monday', 'Wednesday', 'Friday', 'Saturday']
pd.Series(ice_cream_flavors, day_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Saturday     Rum Raisin
dtype: object

In [23]:
pd.Series(data=ice_cream_flavors, index=day_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Saturday     Rum Raisin
dtype: object

In [24]:
day_of_week = ['Monday', 'Wednesday', 'Friday', 'Wednesday']
pd.Series(ice_cream_flavors, day_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Wednesday    Rum Raisin
dtype: object

In [25]:
pd.Series(index=day_of_week, data=ice_cream_flavors)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Wednesday    Rum Raisin
dtype: object

In [26]:
bunch_of_bools = [True, False, False]
pd.Series(bunch_of_bools)

0     True
1    False
2    False
dtype: bool

In [27]:
stock_prices = [985.5, 950.55]
time_of_day = ['Open', 'Close']
pd.Series(data=stock_prices, index=time_of_day)

Open     985.50
Close    950.55
dtype: float64

In [28]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers)

0     4
1     8
2    15
3    16
4    23
5    42
dtype: int64

In [29]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers, dtype='float')

0     4.0
1     8.0
2    15.0
3    16.0
4    23.0
5    42.0
dtype: float64

In [30]:
temperatures = [94, 88, np.nan, 91]
pd.Series(temperatures)

0    94.0
1    88.0
2     NaN
3    91.0
dtype: float64

In [31]:
calorie_info = {
    'Cereal': 125,
    'Chocolate Bar': 406,
    'Ice Cream Sundae': 342
}
diet = pd.Series(calorie_info)
diet

Cereal              125
Chocolate Bar       406
Ice Cream Sundae    342
dtype: int64

In [32]:
pd.Series(data=('Red', 'Blue', 'Green'))

0      Red
1     Blue
2    Green
dtype: object

In [33]:
rgb_colors = [(120, 41, 26), (196, 165, 45)]
pd.Series(data=rgb_colors)

0     (120, 41, 26)
1    (196, 165, 45)
dtype: object

In [34]:
my_set = {'Ricky', 'Bobby'}
pd.Series(my_set)

TypeError: 'set' type is unordered

In [35]:
pd.Series(list(my_set))

0    Bobby
1    Ricky
dtype: object

In [36]:
random_data = np.random.randint(1, 101, 10)
random_data

array([66, 30, 56, 65, 30, 64,  8, 81, 56,  3])

In [37]:
pd.Series(random_data)

0    66
1    30
2    56
3    65
4    30
5    64
6     8
7    81
8    56
9     3
dtype: int32

In [38]:
diet.values

array([125, 406, 342], dtype=int64)

In [39]:
type(diet.values)

numpy.ndarray

In [40]:
diet.index

Index(['Cereal', 'Chocolate Bar', 'Ice Cream Sundae'], dtype='object')

In [41]:
type(diet.index)

pandas.core.indexes.base.Index

In [42]:
diet.dtype

dtype('int64')

In [43]:
diet.size

3

In [44]:
diet.shape

(3,)

In [45]:
diet.is_unique

True

In [46]:
pd.Series(data=[3, 3]).is_unique

False

In [47]:
pd.Series(data=[2, 3, 6]).is_monotonic_increasing

True

In [48]:
pd.Series(data=[2, 6, 3]).is_monotonic_decreasing

False

In [49]:
values = range(0, 500, 5)
nums = pd.Series(data=values)
nums

0       0
1       5
2      10
3      15
4      20
     ... 
95    475
96    480
97    485
98    490
99    495
Length: 100, dtype: int64

In [50]:
nums.head(3)

0     0
1     5
2    10
dtype: int64

In [51]:
nums.tail(3)

97    485
98    490
99    495
dtype: int64

In [52]:
nums.tail(n=3)

97    485
98    490
99    495
dtype: int64

## 数学操作

### 统计运算

In [53]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [54]:
numbers.count()

5

In [55]:
numbers.sum()

15.0

In [56]:
numbers.sum(skipna=False)

nan

In [57]:
numbers.sum(min_count=3)

15.0

In [58]:
numbers.sum(min_count=6)

nan

In [59]:
numbers.product(), numbers.product(skipna=False), numbers.product(min_count=3)

(120.0, nan, 120.0)

In [60]:
numbers.cumsum()

0     1.0
1     3.0
2     6.0
3     NaN
4    10.0
5    15.0
dtype: float64

In [61]:
numbers.cumsum(skipna=False)

0    1.0
1    3.0
2    6.0
3    NaN
4    NaN
5    NaN
dtype: float64

In [62]:
numbers.pct_change()

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [63]:
numbers.pct_change(fill_method='pad')

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [64]:
numbers.pct_change(fill_method='ffill')

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [65]:
numbers.pct_change(fill_method='bfill')

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.000000
5    0.250000
dtype: float64

In [66]:
numbers.pct_change(fill_method='backfill')

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.000000
5    0.250000
dtype: float64

In [67]:
numbers.mean(), numbers.median(), numbers.std(), numbers.min(), numbers.max()

(3.0, 3.0, 1.5811388300841898, 1.0, 5.0)

In [68]:
animals = pd.Series(['koala', 'aardvark', 'zebra'])
animals

0       koala
1    aardvark
2       zebra
dtype: object

In [69]:
numbers.describe()

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64

In [70]:
numbers.sample(3)

1    2.0
2    3.0
5    5.0
dtype: float64

In [71]:
authors = pd.Series(
    ['Hemingway', 'Orwell', 'Dostoevsky', 'Fitzgerald', 'Orwell']
)
authors.unique()

array(['Hemingway', 'Orwell', 'Dostoevsky', 'Fitzgerald'], dtype=object)

In [72]:
authors.nunique()

4

### 算术运算

In [73]:
s1 = pd.Series(data=[5, np.nan, 15], index=['A', 'B', 'C'])
s1

A     5.0
B     NaN
C    15.0
dtype: float64

In [74]:
s1 + 3

A     8.0
B     NaN
C    18.0
dtype: float64

In [75]:
s1.add(3)

A     8.0
B     NaN
C    18.0
dtype: float64

In [76]:
s1 - 5

A     0.0
B     NaN
C    10.0
dtype: float64

In [77]:
s1.sub(5)

A     0.0
B     NaN
C    10.0
dtype: float64

In [78]:
s1.subtract(5)

A     0.0
B     NaN
C    10.0
dtype: float64

In [79]:
s1 * 2

A    10.0
B     NaN
C    30.0
dtype: float64

In [80]:
s1.mul(3)

A    15.0
B     NaN
C    45.0
dtype: float64

In [81]:
s1.multiply(3)

A    15.0
B     NaN
C    45.0
dtype: float64

In [82]:
s1 / 3

A    1.666667
B         NaN
C    5.000000
dtype: float64

In [83]:
s1.div(3)

A    1.666667
B         NaN
C    5.000000
dtype: float64

In [84]:
s1.divide(3)

A    1.666667
B         NaN
C    5.000000
dtype: float64

In [85]:
s1 // 3

A    1.0
B    NaN
C    5.0
dtype: float64

In [86]:
s1.floordiv(3)

A    1.0
B    NaN
C    5.0
dtype: float64

In [87]:
s1 % 3

A    2.0
B    NaN
C    0.0
dtype: float64

In [88]:
s1.mod(3)

A    2.0
B    NaN
C    0.0
dtype: float64

In [89]:
s1 = pd.Series([1, 2, 3], index=['A', 'B', 'C'])
s2 = pd.Series([4, 5, 6], index=['A', 'B', 'C'])

In [90]:
s1 + s2

A    5
B    7
C    9
dtype: int64

In [93]:
s1 = pd.Series([3, 6, np.nan, 12])
s2 = pd.Series([2, 6, np.nan, 12])
s1 == s2

0    False
1     True
2    False
3     True
dtype: bool

In [94]:
s1.eq(s2)

0    False
1     True
2    False
3     True
dtype: bool

In [95]:
s1 != s2

0     True
1    False
2     True
3    False
dtype: bool

In [96]:
s1.ne(s2)

0     True
1    False
2     True
3    False
dtype: bool

In [100]:
s1 = pd.Series(data=[5, 10, 15], index=['A', 'B', 'C'])
s2 = pd.Series(data=[4, 8, 12, 16], index=['B', 'C', 'D', 'E'])
s1 + s2

A     NaN
B    14.0
C    23.0
D     NaN
E     NaN
dtype: float64

## Passing the Series to Python’s built-in functions

In [101]:
cities = pd.Series(data=['San Francisco', 'Los Angeles', 'Las Vegas', np.nan])
cities

0    San Francisco
1      Los Angeles
2        Las Vegas
3              NaN
dtype: object

In [102]:
len(cities), type(cities)

(4, pandas.core.series.Series)

In [103]:
dir(cities)

['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__reduce__',
 '__reduce_ex__',
 '__rep

In [104]:
list(cities)

['San Francisco', 'Los Angeles', 'Las Vegas', nan]

In [106]:
dict(cities)

{0: 'San Francisco', 1: 'Los Angeles', 2: 'Las Vegas', 3: nan}

In [107]:
cities

0    San Francisco
1      Los Angeles
2        Las Vegas
3              NaN
dtype: object

In [109]:
assert 'Las Vegas' not in cities

In [110]:
assert 2 in cities

In [111]:
assert 'Las Vegas' in cities.values

In [112]:
assert 100 not in cities
assert 'Paris' not in cities.values