In [1]:
import pandas as pd

In [2]:
# we create a series by passing in a list of values
# and pandas automatically identify the data type, assigns an index starting with 0 and sets the name of the series to None

# a list of the three of students
students = ['Alice', 'Jack', 'Molly']

# call the Series function in pandas and pass in the students
pd.Series(students)

0    Alice
1     Jack
2    Molly
dtype: object

In [3]:
# a list of numbers
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

How Numpy and pandas handle missing data

In [4]:
# in python, we hve the none type to indicate a lack of data
# but what do we do if we want to have a typed list like we do in the series object?

# pandas does type conversion. If we create a list of strings and we have one element, a None type
# pandas inserts it as a None and uses the type object for the underlying array
students = ['Alice', 'Jack', None]
pd.Series(students)

0    Alice
1     Jack
2     None
dtype: object

In [5]:
# but if we create a list of numbers, integers or floats, and put in the None type
# pandas automatically converts this to a special floating point value designated as NaN (Not a Number)
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

pandas went and converted our integers to floats automatically. So when you're wondering why the list of integers you put into a series is not floats, it's probably because there is some missing data

In [6]:
# NaN != None
import numpy as np
np.nan == None

False

In [7]:
# also we can not do an equality test of NaN to itself, the answer will always be false
np.nan == np.nan

False

In [8]:
np.isnan(np.nan)

True

In [10]:
# a series can be directly from dictionary data, the index is automatically assigned to the keys of the dictionary
students_scores = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English'}
s = pd.Series(students_scores)
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [11]:
# we see that the index, the first column, is also a list of strings
# we can get the index object using the index attribute
s.index

Index(['Alice', 'Jack', 'Molly'], dtype='object')

In [12]:
# let's try a more complex type of data, a list of tuples
students = [("Alice", "Brown"), ("Jack", "White"), ("Molly", "Green")]
pd.Series(students)

0    (Alice, Brown)
1     (Jack, White)
2    (Molly, Green)
dtype: object

In [13]:
# we can pass the index as a list to the series function
s = pd.Series(['Physics', 'Chemistry', 'English'], index=['Alice', 'Jack', 'Molly'])
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [16]:
# if my list of values in the index object are not aligned with the keys in my dictionary for creating the series
# pandas will ignore from my dictionary all keys which are not in my index
# and pandas will add None or NaN type values for any index value I provide, which is not in my dictionary keys
students_scores = {'Alice': 'Physics', 
                   'Jack': 'Chemistry',
                   'Molly': 'English'}

s = pd.Series(students_scores, index=['Alice', 'Molly', 'Sam'])
s

Alice    Physics
Molly    English
Sam          NaN
dtype: object

we see that we did not provide 'Molly' in the index, so 'Molly' was ignored, and the value of 'Sam' was set to be NaN