In [28]:
import pandas as pd

## Series

In [29]:
songs = pd.Series ([145 , 142, 38, 13],
                    name='counts ',
                    index =['Paul ', 'John ', 'George ', 'Ringo '])

In [30]:
songs.index

Index(['Paul ', 'John ', 'George ', 'Ringo '], dtype='object')

## Mask

In [31]:
mask = songs > songs.median() # boolean array
mask

Paul        True
John        True
George     False
Ringo      False
Name: counts , dtype: bool

In [32]:
songs[mask]

Paul     145
John     142
Name: counts , dtype: int64

# Categorical Data

In [33]:
s1 = pd.Series(['m', 'l', 'xs', 'xxl', 's', 'xl', 'fb', 'xxxl'], dtype='category')
s1

0       m
1       l
2      xs
3     xxl
4       s
5      xl
6      fb
7    xxxl
dtype: category
Categories (8, object): ['fb', 'l', 'm', 's', 'xl', 'xs', 'xxl', 'xxxl']

In [34]:
# inspect the .cat attribute to see if the categorical data is ordered
s1.cat.ordered

False

In [35]:
# To convert a non-categorical series to an ordered category,
# create a type with the CategoricalDtype constructor and the appropriate parameters.
# Then pass this type into the .astype method:
size_type = pd.api.types.CategoricalDtype(categories=['s', 'm', 'l', 'xl'], ordered=True)
s2 = s1.astype(size_type)
s2

0      m
1      l
2    NaN
3    NaN
4      s
5     xl
6    NaN
7    NaN
dtype: category
Categories (4, object): ['s' < 'm' < 'l' < 'xl']

In [36]:
# In this case, we limited the categories to just 's', 'm', 'l' and 'xl'.
# If the data has values that were not in those categories, they are replaced by NaN.
s2 > 's'

0     True
1     True
2    False
3    False
4    False
5     True
6    False
7    False
dtype: bool

In [40]:
# We can also add ordering information to an existing series with categorical data.
# But you must ensure that you specify all members of the category,
# otherwise pandas will throw a ValueError:
s3 = s1.cat.reorder_categories(['xs', 's', 'm', 'l', 'xl', 'xxl', 'xxxl', 'fb'], ordered=True)

In [41]:
s3 > 'm'

0    False
1     True
2    False
3     True
4    False
5     True
6     True
7     True
dtype: bool

In [43]:
# String and datetime series have a str and dt attribute that allow us to perform common
# operations specific to that type. If we convert these types to categorical types, we can still
# use the str or dt attributes on them:
s3.str.upper()

0       M
1       L
2      XS
3     XXL
4       S
5      XL
6      FB
7    XXXL
dtype: object

In [46]:
# Ex. 1
# Max temperatures for next 7 days
values = [15, 16, 12, 13, 11, 12, 13]
temps = pd.Series(values)
temps

0    15
1    16
2    12
3    13
4    11
5    12
6    13
dtype: int64

In [47]:
temps.mean()

13.142857142857142

In [49]:
mask = temps > temps.mean()
temps[mask]

0    15
1    16
dtype: int64

In [52]:
# Ex. 2 Create a series with your favorite colors. Use a categorical type
my_fav_colors = pd.Series(['Gold', 'Platinum', 'Aquamarine', 'Teal', 'Indigo'], dtype='category')
my_fav_colors

0          Gold
1      Platinum
2    Aquamarine
3          Teal
4        Indigo
dtype: category
Categories (5, object): ['Aquamarine', 'Gold', 'Indigo', 'Platinum', 'Teal']