# Series

1D Series with associated labels

In [68]:
import pandas as pd

In [69]:
students = ['Gaurav', 'Saurav', 'Sangeeta', 'Sarita']

In [70]:
type(students)

list

In [71]:
pd.Series(students)

0      Gaurav
1      Saurav
2    Sangeeta
3      Sarita
dtype: object

In [72]:
ages = [27, 29, 23, 32]

In [73]:
pd.Series(ages)

0    27
1    29
2    23
3    32
dtype: int64

In [74]:
heights = [170, 172, 150, 120]

In [75]:
pd.Series(heights)

0    170
1    172
2    150
3    120
dtype: int64

In [76]:
mixed = [True, "say", {'my_mood': 100}]

In [77]:
pd.Series(mixed)

0                True
1                 say
2    {'my_mood': 100}
dtype: object

A Series is simply a one dimentional sequence of values with associated labels

## Parameters vs Arguments

In [78]:
pd.Series(students)

0      Gaurav
1      Saurav
2    Sangeeta
3      Sarita
dtype: object

In [79]:
pd.Series(data=students)

0      Gaurav
1      Saurav
2    Sangeeta
3      Sarita
dtype: object

> Here the 'data' is parameter while 'students' is argument

In [80]:
# python and general programming terminology

In [81]:
def greeting(parameter):
    print(parameter)

greeting("argument")

argument


## What's in the data

In [82]:
books_list = ['Fooled by Randomness', 'Eat That Frog', 'Lenin on the Train']
# list is a ordered data structure in python, with indexes

In [83]:
list_series = pd.Series(books_list)
list_series

0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
dtype: object

In [84]:
books_dict = {0: 'Fooled by Randomness', 1:'Eat That Frog', 12:'Lenin on the Train'}

In [85]:
dict_series = pd.Series(books_dict)
dict_series

0     Fooled by Randomness
1            Eat That Frog
12      Lenin on the Train
dtype: object

In [86]:
# sets_series = pd.Series({"hello", "world"})
# sets_series

In [87]:
list_series.equals(dict_series)

False

In [88]:
# Pandas automatically assign indexes to data if pass iterable data type, lict dict, list

In [89]:
pd.Series(('apple', 'cat', 'banana'))

0     apple
1       cat
2    banana
dtype: object

In [90]:
pd.Series(108)

0    108
dtype: int64

In [91]:
# wrong 😑 pd.Series(109, 108, 109)

In [92]:
pd.Series('Gaurav')

0    Gaurav
dtype: object

In [93]:
# pd.Series(*["hello", "world"]) wont work
print(*["hello", "world"])

hello world


In [94]:
## If no label provided, it will label the data with integer

In [95]:
pd.Series(data=['this', 'is', 'gaurav'])
pd.Series(data={0: 'this', 1: 'is', 2: 'gaurav'})
pd.Series(data=0)
pd.Series(data='weather')

0    weather
dtype: object

## The .dtype Attribute

In [247]:
ages

[27, 29, 23, 32]

In [97]:
pd.Series(data=ages)

0    27
1    29
2    23
3    32
dtype: int64

In [98]:
pd.Series(data=ages, dtype='float')

0    27.0
1    29.0
2    23.0
3    32.0
dtype: float64

In [99]:
# You don't have to specify the 'dtype' manually because pandas type inference is good enough

In [249]:
print(*students, students)

Gaurav Saurav Sangeeta Sarita ['Gaurav', 'Saurav', 'Sangeeta', 'Sarita']


In [101]:
name_series = pd.Series(students)
name_series

0      Gaurav
1      Saurav
2    Sangeeta
3      Sarita
dtype: object

In [102]:
name_series.dtype

dtype('O')

In [103]:
type(name_series)

pandas.core.series.Series

## BONUS - What is dtype, Really?

In [104]:
# pandas benifits from numpy

In [105]:
# numpy expects homogenous ("same type") data:

In [106]:
heights

[170, 172, 150, 120]

In [107]:
pd.Series(heights, dtype='float64')

0    170.0
1    172.0
2    150.0
3    120.0
dtype: float64

In [108]:
height_2 = [170.3, 172.77, 150, '456']

In [109]:
pd.Series(height_2)

0     170.3
1    172.77
2       150
3       456
dtype: object

In [110]:
# Moral: Pandas always relies on numpy

## Index and RangeIndex

In [111]:
books_list

['Fooled by Randomness', 'Eat That Frog', 'Lenin on the Train']

In [112]:
list_series

0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
dtype: object

In [113]:
pd.Series(books_list, index=['funny', 'serious and amusing', 'kinda intresting'])

funny                  Fooled by Randomness
serious and amusing           Eat That Frog
kinda intresting         Lenin on the Train
dtype: object

In [114]:
pd.Series(books_list, index=['funny', 'serious and amusing', 'kinda intresting'], dtype='object')

funny                  Fooled by Randomness
serious and amusing           Eat That Frog
kinda intresting         Lenin on the Train
dtype: object

In [251]:
pd.Series(books_list, ['funny', 'serious and amusing', 'kinda intresting'])

funny                  Fooled by Randomness
serious and amusing           Eat That Frog
kinda intresting         Lenin on the Train
dtype: object

In [252]:
pd.__version__

'2.2.3'

In [255]:
str.__mro__

(str, object)

In [116]:
pd.Series(books_list, ['funny', 'serious and amusing', 'kinda intresting'], dtype='string')

funny                  Fooled by Randomness
serious and amusing           Eat That Frog
kinda intresting         Lenin on the Train
dtype: string

In [246]:
pd.Series(dtype='string', books_list, ['funny', 'serious and amusing', 'kinda intresting'])

SyntaxError: positional argument follows keyword argument (3113056596.py, line 1)

In [119]:
list_series.index

RangeIndex(start=0, stop=3, step=1)

In [120]:
type(list_series.index)

pandas.core.indexes.range.RangeIndex

In [256]:
# RangeIndex constructor on pandas module
pd.RangeIndex(start=4, stop=7, step=1)

RangeIndex(start=4, stop=7, step=1)

In [270]:
print([ i for i in list(range(1, 100, 3)) if i % 7 == 0 ])

[7, 28, 49, 70, 91]


In [122]:
list(pd.RangeIndex(start=4, stop=7, step=1))

[4, 5, 6]

In [272]:
list(pd.RangeIndex(start=10, stop=-11, step=-1))

[10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10]

In [275]:
## Immutable object, the RangeIndex can't change, for perfrom issue
## We can provide our custom index/lable to our series
## If we don't provide index, pandas will create it's own immutable range index, for better performance

In [274]:
pd.Series(books_list)

0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
dtype: object

## Series And Index Names

In [125]:
list_series

0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
dtype: object

In [126]:
# intelligble: capable of being understood

In [127]:
list_series.size # attributes

3

In [276]:
list_series.equals(dict_series) # method/function

False

In [277]:
list_series.dtype

dtype('O')

In [278]:
list_series.name

'My Favorite Books'

In [279]:
list_series.name == None

False

In [280]:
list_series.name = 'My Favorite Books'

In [132]:
list_series

0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
Name: My Favorite Books, dtype: object

In [283]:
list_series.index.name

'My Books'

In [282]:
list_series.index.name == None

False

In [284]:
list_series.index.name = "My Books"

In [136]:
list_series

My Books
0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
Name: My Favorite Books, dtype: object

In [137]:
list_series.values

array(['Fooled by Randomness', 'Eat That Frog', 'Lenin on the Train'],
      dtype=object)

In [138]:
list_series.keys

<bound method Series.keys of My Books
0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
Name: My Favorite Books, dtype: object>

## Skill Challenge 😈

Create a python list of length 4, that contains some of your favorite actors. So this should be a list of strings. Call this list-assign it to a variable called-actor_names

In [139]:
actor_names = ['Gaurav', 'Sourav', 'Miraj', 'Sekhar']

Next, create another python list of the same length that contains your guesses for how old each actor is, feel free to just use integers or float. call this list actor_ages

In [140]:
actor_ages = [26, 29, 32, 56.2]

Create a series that stores actor ages and labels the ages using the actor names. To clearify, use actor name in the index and actor age as values. Give this series a name of actors.

In [141]:
actors = pd.Series(actor_ages, index=actor_names, name="actors")

In [142]:
actors

Gaurav    26.0
Sourav    29.0
Miraj     32.0
Sekhar    56.2
Name: actors, dtype: float64

In [143]:
actors_dicts = dict(zip(actor_names, actor_ages))

In [144]:
actors_dicts

{'Gaurav': 26, 'Sourav': 29, 'Miraj': 32, 'Sekhar': 56.2}

In [145]:
pd.Series(data=actors_dicts, name='actors')

Gaurav    26.0
Sourav    29.0
Miraj     32.0
Sekhar    56.2
Name: actors, dtype: float64

## Another Solution 😈

In [146]:
{name:age for name, age in zip(actor_names, actor_ages)}

{'Gaurav': 26, 'Sourav': 29, 'Miraj': 32, 'Sekhar': 56.2}

In [147]:
pd.Series({name:age for name, age in zip(actor_names, actor_ages)})

Gaurav    26.0
Sourav    29.0
Miraj     32.0
Sekhar    56.2
dtype: float64

In [148]:
# list, dict comprehension

In [149]:
dict.fromkeys(range(5), True)

{0: True, 1: True, 2: True, 3: True, 4: True}

In [150]:
{n: n**2 for n in range(5)}

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}

## The head(), tail() methods

In [285]:
int_series = pd.Series(range(60))

In [286]:
int_series.size

60

In [153]:
len(int_series)

60

In [154]:
int_series.head()

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [155]:
int_series.tail()

55    55
56    56
57    57
58    58
59    59
dtype: int64

In [156]:
int_series.head(10)

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64

In [157]:
int_series.tail(n=10)

50    50
51    51
52    52
53    53
54    54
55    55
56    56
57    57
58    58
59    59
dtype: int64

In [158]:
pd.Series(range(100000))

0            0
1            1
2            2
3            3
4            4
         ...  
99995    99995
99996    99996
99997    99997
99998    99998
99999    99999
Length: 100000, dtype: int64

In [159]:
pd.options.display.min_rows = 15 # bad practice, for more control

In [160]:
pd.Series(range(100000))

0            0
1            1
2            2
3            3
4            4
5            5
6            6
         ...  
99993    99993
99994    99994
99995    99995
99996    99996
99997    99997
99998    99998
99999    99999
Length: 100000, dtype: int64

## Extracting By Index Position

In [293]:
from string import ascii_lowercase, ascii_uppercase

In [294]:
ascii_lowercase, ascii_uppercase

('abcdefghijklmnopqrstuvwxyz', 'ABCDEFGHIJKLMNOPQRSTUVWXYZ')

In [163]:
alphabet_series = pd.Series(list(ascii_lowercase))

In [164]:
alphabet_series.head(5).head(1)

0    a
dtype: object

In [165]:
alphabet_series.tail(1)

25    z
dtype: object

In [166]:
alphabet_series[0:5]

0    a
1    b
2    c
3    d
4    e
dtype: object

In [167]:
alphabet_series[10] # 11th letter

'k'

In [168]:
# alphabet_series[:3]
alphabet_series[0:3] # first 3 letter

0    a
1    b
2    c
dtype: object

In [169]:
alphabet_series[5:11] # sixth to 11th letters

5     f
6     g
7     h
8     i
9     j
10    k
dtype: object

In [170]:
alphabet_series[-6:] # from end 6th to the end

20    u
21    v
22    w
23    x
24    y
25    z
dtype: object

In [171]:
alphabet_series.iloc[0]

'a'

## Accessing Elements By Label

In [172]:
from string import ascii_uppercase

In [173]:
ascii_uppercase

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [174]:
ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [175]:
labeled_alphabet = pd.Series(data=list(ascii_lowercase), index=map(lambda x: 'label_' + x ,list(ascii_uppercase)))

In [176]:
labeled_alphabet.head()

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
dtype: object

In [177]:
labeled_alphabet.tail(3)

label_X    x
label_Y    y
label_Z    z
dtype: object

In [178]:
labeled_alphabet['label_A']

'a'

In [179]:
labeled_alphabet.iloc[0]

'a'

In [180]:
labeled_alphabet.iloc[10]

'k'

In [181]:
labeled_alphabet['label_K']

'k'

In [182]:
labeled_alphabet.iloc[:3]

label_A    a
label_B    b
label_C    c
dtype: object

In [183]:
labeled_alphabet[:'label_C']

label_A    a
label_B    b
label_C    c
dtype: object

In [184]:
labeled_alphabet.iloc

<pandas.core.indexing._iLocIndexer at 0xffff813f3f40>

In [185]:
labeled_alphabet.iloc[5:10]

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [186]:
labeled_alphabet['label_F': 'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [187]:
labeled_alphabet.iloc[-6:]

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [188]:
labeled_alphabet['label_U':]

label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

**Assessing Postion by Label or Index is possible via same squre brackets**

## The `add_prefix()` and `add_suffix()` methods

In [189]:
alphabet_series.head()

0    a
1    b
2    c
3    d
4    e
dtype: object

In [190]:
alphabet_series.add_prefix('label_').head(3)

label_0    a
label_1    b
label_2    c
dtype: object

In [295]:
alphabet_series.add_suffix('_some_cool_ending').head(3)

0_x_some_cool_ending    a
1_x_some_cool_ending    b
2_x_some_cool_ending    c
dtype: object

In [298]:
alphabet_series.head(4)

0_x    a
1_x    b
2_x    c
3_x    d
dtype: object

In [192]:
## This doesn't modify the actual data in the series, just create a copy of variable

In [193]:
alphabet_series = alphabet_series.add_suffix('_x')

In [194]:
alphabet_series.head(3)

0_x    a
1_x    b
2_x    c
dtype: object

## Using Dot Notation

In [195]:
labeled_alphabet['label_V']

'v'

In [299]:
labeled_alphabet.label_V # it has some set of it's own limitation

'v'

In [197]:
labeled_alphabet.iloc[-6:-5]

label_U    u
dtype: object

In [198]:
labeled_alphabet['label_v': 'label_x']

Series([], dtype: object)

In [199]:
# labeled_alphabet.label_V:label_X

In [200]:
# labeled_alphabet.label v more label

## Boolean Masks And The `.loc` or `.iloc` Indexer

In [201]:
labeled_alphabet['label_F': 'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [202]:
labeled_alphabet.loc['label_F': 'label_J']

label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
dtype: object

In [203]:
list_series

My Books
0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
Name: My Favorite Books, dtype: object

In [204]:
list_series.loc[[True, True, True]]

My Books
0    Fooled by Randomness
1           Eat That Frog
2      Lenin on the Train
Name: My Favorite Books, dtype: object

In [205]:
list_series.loc[[True, False, True]]

My Books
0    Fooled by Randomness
2      Lenin on the Train
Name: My Favorite Books, dtype: object

In [206]:
# The length of the mask should be the length of series

In [207]:
# list_series.loc[[True, True]] - Get a index error

In [208]:
labeled_alphabet.head(3)

label_A    a
label_B    b
label_C    c
dtype: object

In [209]:
labeled_alphabet.size

26

In [210]:
# labeled_alphabet.loc[[True, False]]

In [211]:
labeled_alphabet.loc[[True for i in range(labeled_alphabet.size)]]

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
label_F    f
label_G    g
label_H    h
label_I    i
label_J    j
label_K    k
label_L    l
label_M    m
label_N    n
label_O    o
label_P    p
label_Q    q
label_R    r
label_S    s
label_T    t
label_U    u
label_V    v
label_W    w
label_X    x
label_Y    y
label_Z    z
dtype: object

In [212]:
labeled_alphabet.loc[[(True if i % 2 == 0 else False) for i in range(labeled_alphabet.size)]]

label_A    a
label_C    c
label_E    e
label_G    g
label_I    i
label_K    k
label_M    m
label_O    o
label_Q    q
label_S    s
label_U    u
label_W    w
label_Y    y
dtype: object

In [213]:
pd.Series(['a', 'b', 'c'])[[True, False, True]]

0    a
2    c
dtype: object

#### Boolean Masks
- used to index select items at scale
- works with [] and .loc
- need to be same length as series
- iloc is `index` based extractor approach
- loc is `label` based extractor approach

## Extracting By Position With `.iloc`

In [214]:
# iloc -> integer loc -> indexing by position
# loc -> location -> indexing by label

In [215]:
labeled_alphabet.iloc[0]

'a'

In [216]:
labeled_alphabet.iloc[1]

'b'

In [217]:
labeled_alphabet.iloc[1:3]

label_B    b
label_C    c
dtype: object

In [218]:
# akin: similar to

In [300]:
labeled_alphabet.iloc[[1, 4, 9]] # index position that we want to extract

label_B    b
label_E    e
label_J    j
dtype: object

## Using Callables With `.loc` And `.iloc`

##### Indexing with Callables
- uses for highly customizable indexing
- work with [], .loc and .iloc
- a single-argument function that returns indexing output, where output means
    - a list of labels
    - list of booleans
    - a slice, etc

In [220]:
labeled_alphabet.loc['label_V']

'v'

In [301]:
labeled_alphabet.loc[lambda x: 'label_V'] # this is just same as above one

'v'

In [222]:
labeled_alphabet.loc[lambda x: ['label_V', 'label_A']]

label_V    v
label_A    a
dtype: object

In [223]:
labeled_alphabet.loc[lambda x: []]

Series([], dtype: object)

In [224]:
labeled_alphabet.loc[lambda x: [True for i in range(x.size)]].head(5)

label_A    a
label_B    b
label_C    c
label_D    d
label_E    e
dtype: object

In [225]:
def every_fifth(x):
    return [True if (i+1)%5==0 else False for i in range(x.size)]

In [226]:
labeled_alphabet.iloc[every_fifth]

label_E    e
label_J    j
label_O    o
label_T    t
label_Y    y
dtype: object

## Selection With `.get()`

In [227]:
labeled_alphabet.get('label_I')

'i'

In [228]:
labeled_alphabet.loc['label_I']

'i'

In [229]:
labeled_alphabet['label_I']

'i'

In [230]:
labeled_alphabet.get('label_Inexist')

In [231]:
labeled_alphabet.get('Index_unknown') == None

True

In [232]:
labeled_alphabet.get('Index_unknown', default='not found')

'not found'

In [233]:
labeled_alphabet.get('Index_unknown', default=99.12)

99.12

In [234]:
# labeled_alphabet.loc['label_unknown'] raises, error

In [235]:
# chimera: greek mythology creature with the head of a lion and body of a goat and python tail

##### Selection By Label

Approach | Example | Comment
---|---|---
[idx'ing] | series['label'] | slices, callables, boolean mask
.loc[] | series.loc['label'] | slices, callables, boolean mask
dot access | series.label | no slice or boolean, boolean mask
.get() | series.get('label') | no slice, provides default, forgiving

##### Selection By Position

Approach | Example | Comment
---|---|---
[idx'ing] | series[0] | slices, callables, boolean mask
.loc[] | series.iloc[0] | slices, callables, boolean mask
.get() | series.get(0) | no slice, provides default, forgiving

## Skill Challenge 😈

Create a series of length 100 containing the squares of integers from 0 to 99. Assign it to the variable squares

In [236]:
squares = pd.Series(data=map(lambda x: x**2, range(100)))

In [237]:
squares.size

100

In [238]:
squares.head()

0     0
1     1
2     4
3     9
4    16
dtype: int64

Extract the last three items from the squares series using square bracket indexing

In [239]:
squares.tail()

95    9025
96    9216
97    9409
98    9604
99    9801
dtype: int64

In [240]:
squares.iloc[-3:]

97    9409
98    9604
99    9801
dtype: int64

Repeat Step 2 but using the .tail() method instead.

In [241]:
squares.tail(3)

97    9409
98    9604
99    9801
dtype: int64

In [242]:
res1 = squares.iloc[-3:]
res2 = squares.tail(3)

In [243]:
res1.equals(res2)

True

In [244]:
res1 == res2

97    True
98    True
99    True
dtype: bool

In [245]:
res1, res2

(97    9409
 98    9604
 99    9801
 dtype: int64,
 97    9409
 98    9604
 99    9801
 dtype: int64)