In [1]:
# This code is used to learn basics of Series 
# data type in pandas module.

import pandas as pd

In [2]:
# list of strings will be considered as 'object' data type in Series
cats = ['blueCat','rainbowCat','snailCat','strayCat','nobodylovesmeCat']
pd.Series(cats)

0             blueCat
1          rainbowCat
2            snailCat
3            strayCat
4    nobodylovesmeCat
dtype: object

In [3]:
# List of integers in Series
nums_list = [1,5,7]
pd.Series(nums_list)

0    1
1    5
2    7
dtype: int64

In [4]:
# List of float in Series
float_list = [1.0, 3.0, 5.4]
pd.Series(float_list)

0    1.0
1    3.0
2    5.4
dtype: float64

In [5]:
# List of combination of integer, float, string and None
item_list = [1.0, 5, None, 'StringInSeries']
pd.Series(item_list)

0                 1
1                 5
2              None
3    StringInSeries
dtype: object

In [6]:
# None in list of numbers (int and float) will be stored as NaN
nums_list = [1.0, 2.0, 3.4, None]
pd.Series(nums_list)

0    1.0
1    2.0
2    3.4
3    NaN
dtype: float64

### Note the difference between NaN and None
None is an object type and NaN is treated like a number. The name gives away the datatype as well, since NaN stands for Not A Number.

### Assign Index/Keys to Series


In [7]:
# Convert dictionary to Series
studentMajor = {'Leo': 'Mechanical Engineer', 'Victoria': 'Marketing', 'Will': 'Chemistry'}
majorSeries = pd.Series(studentMajor)
majorSeries.index

Index(['Leo', 'Victoria', 'Will'], dtype='object')

In [8]:
# Index and Values can be parsed separately into Series
studentName = ['Leo', 'Victoria', 'Finn', 'Fong', 'Will']
majors = ['Engineering', 'Marketing', None, 'CS', 'Chemistry']
student_majors = pd.Series(majors, index = studentName)
student_majors

Leo         Engineering
Victoria      Marketing
Finn               None
Fong                 CS
Will          Chemistry
dtype: object

In [9]:
# When creating series from a dictionary, one can selectively
# import keys and corresponding values to Series
# If the given index is not in the given dictionary, 
# NaN will be assigned to its value in Series.
myfamily = pd.Series(studentMajor, index = studentName[:3])
myfamily

Leo         Mechanical Engineer
Victoria              Marketing
Finn                        NaN
dtype: object

### Querying a Series

In [10]:
print(student_majors.iloc[3]) # iloc for values

print(student_majors.loc['Leo']) # loc for keys


CS
Engineering


In [16]:
# this is the same as iloc but could be ambiguous 
# when the indexes/labels/values have the same name
print(student_majors[3]) 
s = {100: 'China', 101: 'USA', 102: 'Japan'}
# s[0] # This will raise an error
s = {'China': 3, 'USA': 2, 'Japan': 1}
# s[2] # This will also raise an error

CS


### Summary:
    Basically use iloc/loc instead of ambiguous index. 

### Math operations

In [18]:
import numpy as np
s = pd.Series(np.random.randint(0,1000,10000))
s.head() # Use head() method to display the first n elements (default n=5)

0    317
1    232
2    272
3    624
4    365
dtype: int32

In [19]:
len(s) # len() works on Series too

10000

In [20]:
%%timeit -n 100
# Here we use Jupyter Notebook's magic function timeit to see the computation time
s_sum = 0
for item in s:
    s_sum += item
    

919 µs ± 19.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%%timeit -n 100
s_sum = np.sum(s) # This will be much faster than iterating every item

62 µs ± 3.56 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [24]:
# Broadcast feature of Pandas and NumPy
s += 2 # This will perform on every item in s
s.head()
# A for loop can do this but slower

0    323
1    238
2    278
3    630
4    371
dtype: int32

In [35]:
# One can add items to Series easily using loc() method
s = pd.Series({'Germany': 'BMW', 'France': 'Renault', 'USA': 'Ford', 'Italy': 'Ferrari'})
s.loc['Japan'] = 'Toyota' # This makes changes to the original Series
print(s)

# Using append() can combine Series without changing the original Series
other_cars = pd.Series({'China': 'Hongqi', 'Korea': 'Kia', 'England': 'Aston Martin'})
s_new = s.append(other_cars)
print(s_new)

Germany        BMW
France     Renault
USA           Ford
Italy      Ferrari
Japan       Toyota
dtype: object
Germany             BMW
France          Renault
USA                Ford
Italy           Ferrari
Japan            Toyota
China            Hongqi
Korea               Kia
England    Aston Martin
dtype: object
