## 2.1 Overview of a Series

In [2]:
import pandas as pd
import numpy as np
#

### 2.1.1 Classes, and Instances

In [3]:
pd.Series()

Unnamed: 0,0


### 2.1.2 Populating the Series with Values

In [4]:
pd.Series(
    [ "Chocolate", "Vanilla", "Strawberry", "Rum Raisin"]
)

Unnamed: 0,0
0,Chocolate
1,Vanilla
2,Strawberry
3,Rum Raisin


In [5]:
pd.Series(
    data= [ 11, 12, 13, 14],
    name = "col-name",
    dtype=str
)

Unnamed: 0,col-name
0,11
1,12
2,13
3,14


### 2.1.3 Customizing the Series Index

In [6]:
pd.Series(
    data = ["Chocolate", "Vanilla", "Strawberry", "Rum Raisin"],
    index =["Monday", "Wednesday", "Friday", "Saturday"],
    name='col-name'
)

Unnamed: 0,col-name
Monday,Chocolate
Wednesday,Vanilla
Friday,Strawberry
Saturday,Rum Raisin


In [7]:
s = pd.Series(
    data = ["Chocolate", "Vanilla", "Strawberry", "Rum Raisin"],
    index =["Monday", "Wednesday", "Friday", "Saturday"],
    name='col-name'
)

In [8]:
s.describe()

Unnamed: 0,col-name
count,4
unique,4
top,Chocolate
freq,1


In [9]:
s.info()

<class 'pandas.core.series.Series'>
Index: 4 entries, Monday to Saturday
Series name: col-name
Non-Null Count  Dtype 
--------------  ----- 
4 non-null      object
dtypes: object(1)
memory usage: 64.0+ bytes


In [10]:
bunch_of_bools = [True, False, False]
pd.Series(bunch_of_bools)

Unnamed: 0,0
0,True
1,False
2,False


In [11]:
stock_prices = [985.32, 950.44]
time_of_day = ["Open", "Close"]
pd.Series(data = stock_prices, index = time_of_day)

Unnamed: 0,0
Open,985.32
Close,950.44


In [12]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers)

Unnamed: 0,0
0,4
1,8
2,15
3,16
4,23
5,42


In [13]:
pd.Series(
    data=[4, 8, 15, 16, 23, 42],
    name='col-name',
    dtype = "float"
)

Unnamed: 0,col-name
0,4.0
1,8.0
2,15.0
3,16.0
4,23.0
5,42.0


### 2.1.4 Creating a Series with Missing Values

In [14]:
pd.Series(
    data = [94, 88, np.nan, 91],
    name='col-name'
)

Unnamed: 0,col-name
0,94.0
1,88.0
2,
3,91.0


## 2.2 Create a Series from Python Objects

In [15]:
pd.Series(
    {
      "Cereal": 125,
      "Chocolate Bar": 406,
      "Ice Cream Sundae": 342,
    },
    name='col-name',
    dtype=float
)

Unnamed: 0,col-name
Cereal,125.0
Chocolate Bar,406.0
Ice Cream Sundae,342.0


In [16]:
s = pd.Series(
    data = [
        [120, 41, 26],
        [196, 165, 45]
    ],
    name='col-name',
    dtype=float
)
s

Unnamed: 0,col-name
0,"(120, 41, 26)"
1,"(196, 165, 45)"


In [17]:
type(s[0])

tuple

**NOTE**: I've commented out the code below so that the Notebook can run without raising an error.

In [18]:
my_set = {"Ricky", "Bobby"}
# pd.Series(my_set)

In [19]:
pd.Series(
    data = list( {"Ricky", "Bobby"}) ,
    name='col-name',
)

Unnamed: 0,col-name
0,Ricky
1,Bobby


In [20]:
random_data = np.random.randint(1, 101, 10)
random_data

array([38, 21, 52, 76, 39, 50,  9, 15, 31, 86])

In [21]:
pd.Series(
    data=np.random.randint(1, 101, 10),
    name='col-name',
    dtype=float
)

Unnamed: 0,col-name
0,49.0
1,74.0
2,83.0
3,79.0
4,56.0
5,68.0
6,66.0
7,45.0
8,51.0
9,12.0


## 2.3 Series Attributes

In [22]:
diet = pd.Series(
    {
      "Cereal": 125,
      "Chocolate Bar": 406,
      "Ice Cream Sundae": 342,
    },
    name='col-name',
    dtype=float
)
diet.values

array([125., 406., 342.])

In [23]:
type(diet.values)

numpy.ndarray

In [24]:
diet.index

Index(['Cereal', 'Chocolate Bar', 'Ice Cream Sundae'], dtype='object')

In [25]:
type(diet.index)

pandas.core.indexes.base.Index

In [26]:
diet.dtype

dtype('float64')

In [27]:
diet.size

3

In [28]:
diet.shape

(3,)

In [29]:
diet.is_unique

True

In [30]:
pd.Series(data = [3, 3]).is_unique

False

In [33]:
# pd.Series(data = [1, 3, 6]).is_monotonic

In [34]:
# pd.Series(data = [1, 6, 3]).is_monotonic

## 2.4 Retrieving the First and Last Rows

In [35]:
nums = pd.Series(
    data = range(0, 500, 5),
    name='col-name',
    dtype=float)
nums.head(10)

Unnamed: 0,col-name
0,0.0
1,5.0
2,10.0
3,15.0
4,20.0
5,25.0
6,30.0
7,35.0
8,40.0
9,45.0


In [36]:
nums.dtype

dtype('float64')

In [37]:
nums.tail()

Unnamed: 0,col-name
95,475.0
96,480.0
97,485.0
98,490.0
99,495.0


In [38]:
diet = pd.Series(
    {
      "Cereal": 125,
      "Chocolate Bar": 406,
      "Ice Cream Sundae": 342,
    },
    name='col-name',
    dtype=float
)

In [39]:
diet.iloc[0]

np.float64(125.0)

In [40]:
diet.loc['Cereal']

np.float64(125.0)

## 2.5 Mathematical Operations

### 2.5.1 Statistical Operations

In [43]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
numbers

Unnamed: 0,0
0,1.0
1,2.0
2,3.0
3,
4,4.0
5,5.0


In [44]:
numbers.count()

np.int64(5)

In [None]:
numbers.sum()

15.0

In [None]:
numbers.sum(skipna = False)

nan

In [None]:
numbers.sum(min_count = 3)

15.0

In [None]:
numbers.sum(min_count = 6)

nan

In [None]:
numbers.product()

120.0

In [None]:
numbers.product(skipna = False)

nan

In [None]:
numbers.product(min_count = 3)

120.0

In [None]:
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [46]:
numbers.cumsum()

Unnamed: 0,0
0,1.0
1,3.0
2,6.0
3,
4,10.0
5,15.0


In [None]:
numbers.cumsum(skipna = False)

0    1.0
1    3.0
2    6.0
3    NaN
4    NaN
5    NaN
dtype: float64

In [None]:
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [None]:
numbers.pct_change()

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [None]:
# The three lines below are equivalent
numbers.pct_change()
numbers.pct_change(fill_method = "pad")
numbers.pct_change(fill_method = "ffill")

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [None]:
# The two lines below are equivalent
numbers.pct_change(fill_method = "bfill")
numbers.pct_change(fill_method = "backfill")

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.000000
5    0.250000
dtype: float64

In [None]:
numbers.mean()

3.0

In [None]:
numbers.median()

3.0

In [None]:
numbers.std()

1.5811388300841898

In [None]:
numbers.max()

5.0

In [None]:
numbers.min()

1.0

In [None]:
animals = pd.Series(["koala", "aardvark", "zebra"])
animals

0       koala
1    aardvark
2       zebra
dtype: object

In [None]:
animals.max()

'zebra'

In [None]:
animals.min()

'aardvark'

In [None]:
numbers.describe()

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64

In [47]:
numbers.sample(3)

Unnamed: 0,0
3,
4,4.0
0,1.0


In [48]:
authors = pd.Series(
    ["Hemingway", "Orwell", "Dostoevsky", "Fitzgerald", "Orwell"]
)

authors.unique()

array(['Hemingway', 'Orwell', 'Dostoevsky', 'Fitzgerald'], dtype=object)

In [49]:
authors.nunique()

4

### 2.5.2 Arithmetic Operations

In [53]:
s1 = pd.Series(
    data = [5, np.nan, 15],
    index = ["A", "B", "C"],
    name='col-name'
)

In [56]:
s1 + 3
s1.add(3)

Unnamed: 0,col-name
A,8.0
B,
C,18.0


In [55]:
# The three lines
# below are
# equivalent
s1 - 5
s1.sub(5)
s1.subtract(5)

Unnamed: 0,col-name
A,0.0
B,
C,10.0


In [None]:
# The three lines
# below are
# equivalent
s1 * 2
s1.mul(2)
s1.multiply(2)

A    10.0
B     NaN
C    30.0
dtype: float64

In [None]:
# The three lines
# below are
# equivalent
s1 / 2
s1.div(2)
s1.divide(2)

A    2.5
B    NaN
C    7.5
dtype: float64

In [None]:
# The two lines
# below are
# equivalent
s1 // 4
s1.floordiv(4)

A    1.0
B    NaN
C    3.0
dtype: float64

In [None]:
# The two lines
# below are
# equivalent
s1 % 3
s1.mod(3)

A    2.0
B    NaN
C    0.0
dtype: float64

### 2.5.3 Broadcasting

In [57]:
s1 = pd.Series(
    data = [1, 2, 3],
    index = ["A", "B", "C"]
)
s2 = pd.Series(
    data=[4, 5, 6],
    index = ["A", "B", "C"]
)

In [58]:
s1 + s2

Unnamed: 0,0
A,5
B,7
C,9


In [59]:
s1 = pd.Series(data = [3, 6, np.nan, 12])
s2 = pd.Series(data = [2, 6, np.nan, 12])

In [60]:
# The two lines below are equivalent
s1 == s2
s1.eq(s2)

Unnamed: 0,0
0,False
1,True
2,False
3,True


In [None]:
# The two lines below are equivalent
s1 != s2
s1.ne(s2)

0     True
1    False
2     True
3    False
dtype: bool

In [64]:
s1 = pd.Series(
    data = [5, 10, 15],
    index = ["A", "B", "C"]
)

s2 = pd.Series(
    data = [4, 8, 12, 14],
    index = ["B", "C", "D", "E"]
)

In [66]:
s1 + s2

Unnamed: 0,0
A,
B,14.0
C,23.0
D,
E,


In [None]:
s1 = pd.Series(
    data = [5, 10, 15],
    index = ["A", "B", "C"]
)

In [68]:
list(s1)

[5, 10, 15]

In [74]:
dict(s1)

{'A': np.int64(5), 'B': np.int64(10), 'C': np.int64(15)}

## 2.6 Passing the Series to Python's Built-In Functions

In [69]:
cities = pd.Series(
    data = ["San Francisco", "Los Angeles", "Las  Vegas", np.nan]
)

In [70]:
len(cities)

4

In [71]:
type(cities)

In [72]:
dir(cities)

['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__bool__',
 '__class__',
 '__column_consortium_standard__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pandas_priority__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__

In [73]:
list(cities)

['San Francisco', 'Los Angeles', 'Las  Vegas', nan]

In [None]:
dict(cities)

{0: 'San Francisco', 1: 'Los Angeles', 2: 'Las  Vegas', 3: nan}

In [None]:
cities

0    San Francisco
1      Los Angeles
2       Las  Vegas
3              NaN
dtype: object

In [None]:
"Las Vegas" in cities

False

In [None]:
2 in cities

True

In [75]:
100 not in cities

True

In [None]:
"Las Vegas" in cities.values

False

In [None]:
"Paris" not in cities.values

True

## 2.7 Coding Challenge

### 2.7.1 Problems

### 2.7.2 Solutions

In [None]:
superheroes = [
    "Batman",
    "Superman",
    "Spider-Man",
    "Iron Man",
    "Captain America",
    "Wonder Woman"
]

In [None]:
strength_levels = (100, 120, 90, 95, 110, 120)

In [None]:
pd.Series(superheroes)

0             Batman
1           Superman
2         Spider-Man
3           Iron Man
4    Captain America
5       Wonder Woman
dtype: object

In [None]:
pd.Series(data = strength_levels)

0    100
1    120
2     90
3     95
4    110
5    120
dtype: int64

In [None]:
heroes = pd.Series(
    data = strength_levels, index = superheroes
)

heroes

Batman             100
Superman           120
Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64

In [None]:
heroes.head(2)

Batman      100
Superman    120
dtype: int64

In [None]:
heroes.tail(4)

Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64

In [None]:
heroes.nunique()

5

In [None]:
heroes.mean()

105.83333333333333

In [None]:
heroes.max()

120

In [None]:
heroes.min()

90

In [None]:
heroes * 2

Batman             200
Superman           240
Spider-Man         180
Iron Man           190
Captain America    220
Wonder Woman       240
dtype: int64

In [None]:
dict(heroes)

{'Batman': 100,
 'Superman': 120,
 'Spider-Man': 90,
 'Iron Man': 95,
 'Captain America': 110,
 'Wonder Woman': 120}

## 2.7 Summary