# Series

## Pure Python Series

In [1]:
ser = {
    'index':[0, 1, 2, 3],
    'data':[145, 142, 38, 33],
    'name':'songs'
}

In [2]:
def get(ser, idx):
    value_idx = ser['index'].index(idx)
    return ser['data'][value_idx]

In [3]:
get(ser, 1)

142

In [4]:
songs = {
    'index':['Paul', 'John', 'George', 'Ringo'],
    'data':[145, 142, 38, 13],
    'name':'counts'
}

In [5]:
get(songs, 'John')

142

## Pandas Series

In [6]:
import pandas as pd

songs2 = pd.Series([145, 142, 38, 13],
                  name='counts')

In [7]:
songs2

0    145
1    142
2     38
3     13
Name: counts, dtype: int64

In [8]:
songs2.index

RangeIndex(start=0, stop=4, step=1)

In [9]:
songs3 = pd.Series([145, 142, 38, 13],
                  name='counts',
                  index=['Paul', 'John', 'George', 'Ringo'])

songs3

Paul      145
John      142
George     38
Ringo      13
Name: counts, dtype: int64

In [10]:
songs3.index

Index(['Paul', 'John', 'George', 'Ringo'], dtype='object')

In [11]:
class Foo:
    pass

ringo = pd.Series(
    ['Righcard', 'Starkey', 13, Foo()],
    name='ringo')

ringo

0                                   Righcard
1                                    Starkey
2                                         13
3    <__main__.Foo object at 0x7f4d73e61290>
Name: ringo, dtype: object

## the NaN value

In [12]:
nan_ser = pd.Series([2, None],
                   index=['Ono', 'Clapton'])

nan_ser   # NaN IS THE FLOAT TYPE!!!

Ono        2.0
Clapton    NaN
dtype: float64

In [13]:
nan_ser.count()

1

## Similar to Numpy

In [14]:
import numpy as np
numpy_ser = np.array([145, 142, 38, 13])
songs3[1]

142

In [15]:
numpy_ser[1]

142

In [16]:
songs3.mean()

84.5

In [17]:
numpy_ser.mean()

84.5

In [18]:
mask = songs3 > songs3.median()  # boolean array

mask

Paul       True
John       True
George    False
Ringo     False
Name: counts, dtype: bool

In [19]:
songs3[mask]

Paul    145
John    142
Name: counts, dtype: int64

In [20]:
numpy_ser[numpy_ser > np.median(numpy_ser)]

array([145, 142])

# Series CRUD

## Creation

In [22]:
# Pandas allow index to be string and not unique!
george_dupe = pd.Series([10, 7, 1, 22], 
                       index=['1968', '1969', '1970', '1970'],
                       name='George Songs')

george_dupe

1968    10
1969     7
1970     1
1970    22
Name: George Songs, dtype: int64

In [23]:
g2 = pd.Series({'1969':7, '1979': [1, 22]},
              index=['1969', '1970', '1970'])

g2

1969      7
1970    NaN
1970    NaN
dtype: object

## Reading

In [24]:
george_dupe['1968']

10

In [25]:
# may not be a scalar!
george_dupe['1970']

1970     1
1970    22
Name: George Songs, dtype: int64

In [26]:
for item in george_dupe:
    print(item)

10
7
1
22


In [27]:
22 in george_dupe # membership checking is against the index items, not the values

False

In [28]:
22 in set(george_dupe)

True

In [29]:
22 in george_dupe.values # now, you are checking the values of it

True

In [30]:
'1970' in george_dupe

True

In [31]:
# to iterate over the tuples containing both the index label and the value, use the .iteritems mechod
for item in george_dupe.iteritems():
    print(item)

('1968', 10)
('1969', 7)
('1970', 1)
('1970', 22)


## Updating

In [32]:
george_dupe['1969'] = 6
george_dupe['1969']

6

In [33]:
george_dupe['1973'] = 11
george_dupe

1968    10
1969     6
1970     1
1970    22
1973    11
Name: George Songs, dtype: int64

In [34]:
george_dupe['1970'] = 2
george_dupe

1968    10
1969     6
1970     2
1970     2
1973    11
Name: George Songs, dtype: int64