# Series Introduction

In [39]:
import pandas as pd
import numpy as np

"""

# Create a Series from data (sequence, dictionary or scalar)

pd.Series(  data=None, 
            index=None,
            dtype=None,
            name=None, 
            copy=False )


# Access index of series
s.index 

# Cast series to dtype. To ignore errors (and return original object) use errors='ignore'
s.astype(dtype, errors="raise")

# Return values from s where boolean_array is True
s[boolean_array]

# Determine if a categorical series is ordered
s.cat.ordered

# Add categories (potentially ordered) to the series. new_categories must include all categories
s.cat.reorder_categories(new_categories, ordered=False)

"""

'\n\n# Create a Series from data (sequence, dictionary or scalar)\n\npd.Series(  data=None, \n            index=None,\n            dtype=None,\n            name=None, \n            copy=False )\n\n\n# Access index of series\ns.index \n\n# Cast series to dtype. To ignore errors (and return original object) use errors=\'ignore\'\ns.astype(dtype, errors="raise")\n\n# Return values from s where boolean_array is True\ns[boolean_array]\n\n# Determine if a categorical series is ordered\ns.cat.ordered\n\n# Add categories (potentially ordered) to the series. new_categories must include all categories\ns.cat.reorder_categories(new_categories, ordered=False)\n\n'

In [2]:
songs = {
    'index':['Paul', 'John', 'George', 'Ringo'],
    'data':[145, 142, 38, 13],
    'name':'counts'
}

def get (series, idx):
    """ Take the dictionary and the index and return the value """
    value_idx = series['index'].index(idx)
    return series['data'][value_idx]

print(get(songs, 'Paul'))
print(get(songs, 'George'))

""" Creating a Series """
songs_2 = pd.Series([145, 142, 38, 13], name='counts')
print(songs_2)

""" Index Information """
songs_2.index

""" Non-integer index """
songs_3 = pd.Series([145, 142, 38, 13],
                    name='counts',
                    index=['Paul', 'John', 'George', 'Ringo'])
print(songs_3)

145
38
0    145
1    142
2     38
3     13
Name: counts, dtype: int64
Paul      145
John      142
George     38
Ringo      13
Name: counts, dtype: int64


### NaN's / Nulls / < NA >

In [7]:
""" Optional Integer Support for NaN """
nan_series = pd.Series([2, None, 8],
                       index=['A', 'B', 'C'],
                       dtype='Int64')

print(nan_series)

A       2
B    <NA>
C       8
dtype: Int64


### Making a Mask

In [10]:
mask = songs_3 > songs_3.median() # Boolean Array
print(mask)
print(songs_3[mask])

mask_2 = songs_3 > songs_3.mean()
print(songs_3[mask_2])

mask_3 = songs_3 > 30
print(songs_3[mask_3])

Paul       True
John       True
George    False
Ringo     False
Name: counts, dtype: bool
Paul    145
John    142
Name: counts, dtype: int64
Paul    145
John    142
Name: counts, dtype: int64
Paul      145
John      142
George     38
Name: counts, dtype: int64


### Categories

In [37]:
size = pd.Series(['xs', 's', 'm', 'l', 'xl'], dtype='category')
print(size)

print(size.cat.ordered)

""" To convert a non-categorical series to an ordered category, create a type with CategoricalDtype"""
s2 = pd.Series(['m', 'xs', 'xl', 'l', 's'])
size_type = pd.api.types.CategoricalDtype(
    categories=['xs', 's', 'm', 'l', 'xl'],
    ordered=True)
s3 = s2.astype(size_type)


print("\n",[size for size in s3])

mask = s3 < 'm'
print(s3 < 'm')

print(s3[mask])

print(s3.str.upper())

print(s3.cat.ordered)

0    xs
1     s
2     m
3     l
4    xl
dtype: category
Categories (5, object): ['l', 'm', 's', 'xl', 'xs']
False

 ['m', 'xs', 'xl', 'l', 's']
0    False
1     True
2    False
3    False
4     True
dtype: bool
1    xs
4     s
dtype: category
Categories (5, object): ['xs' < 's' < 'm' < 'l' < 'xl']
0     M
1    XS
2    XL
3     L
4     S
dtype: object
True


# Exercises

1) Using Jupyter, create a series with the temperature values for the last seven days. Filter out the values below the mean

2) Using Jupyter, create a series with your favourite machine learning model. Use categorical type.

In [44]:
# TASK 1

emp_percs = {
    "Jerome":2.6,
    "Mary":10.0,
    "Phill":15,
    "Jane": 2.7,
    "Bill":3.6,
    "Darcy":5,
    "Megan":7.9,
    "Jon":2.7,
    "Willis":8,
    "Watson":6,
    "Tessa":3.5
}

s = pd.Series(emp_percs)

below_average_raise = s < np.mean(s)
print("Average Raise =", np.mean(s))
print(s[below_average_raise])

Average Raise = 6.090909090909091
Jerome    2.6
Jane      2.7
Bill      3.6
Darcy     5.0
Jon       2.7
Watson    6.0
Tessa     3.5
dtype: float64


In [63]:
# TASK 2

ml_models = ["BERT", "LSTM", "GRU", "RNN", "GNN", "U-Net", "Vision Transformer", "CNN"]

s = pd.Series(ml_models, dtype='category')
print(s)

s.cat.reorder_categories(["CNN", "Vision Transformer", "U-Net", "GNN", "RNN", "GRU", "LSTM", "BERT"], ordered=False)

0                  BERT
1                  LSTM
2                   GRU
3                   RNN
4                   GNN
5                 U-Net
6    Vision Transformer
7                   CNN
dtype: category
Categories (8, object): ['BERT', 'CNN', 'GNN', 'GRU', 'LSTM', 'RNN', 'U-Net', 'Vision Transformer']


0                  BERT
1                  LSTM
2                   GRU
3                   RNN
4                   GNN
5                 U-Net
6    Vision Transformer
7                   CNN
dtype: category
Categories (8, object): ['CNN', 'Vision Transformer', 'U-Net', 'GNN', 'RNN', 'GRU', 'LSTM', 'BERT']