In [32]:
import pandas as pd
import numpy as np

In [3]:
x = pd.Series([0.25, 0.5, 0.75, 1.0])
print(x)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [4]:
x.values  # the data part

array([0.25, 0.5 , 0.75, 1.  ])

In [5]:
x.index  # the index part

RangeIndex(start=0, stop=4, step=1)

## Creating a Series
From a list

In [9]:
x = pd.Series([0.25, 0.5, 0.75, 1.0]) # by inputing data
print(x)

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64


In [8]:
x = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd']) # by specifying the index
print(x)

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64


Or by a dictionary

In [11]:
d = {'a':0.25, 'b':0.5, 'c':0.75, 'd':1.0} 
x = pd.Series(d)
print(x)

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64


From a scalar (length is determined by index)

In [12]:
d = pd.Series(0.25, index = ['a', 'b', 'c', 'd']) # fills the series with the same value for each index
print(d)

a    0.25
b    0.25
c    0.25
d    0.25
dtype: float64


Remember indices are immutable!

## Microquiz

In [13]:
x = pd.Series([1,2,3], index=['a','b','a']) # this is allowed (duplicate indices)
x['a']                                      # will return all cols with 'a'

a    1
a    3
dtype: int64

## Indexing into a series 
Similar to numpy!

In [14]:
x = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
x[2] # simple indexing

0.75

In [15]:
x[:2] # slicing

a    0.25
b    0.50
dtype: float64

In [16]:
x[(x > 0.3) & (x < 0.9)] # boolean mask

b    0.50
c    0.75
dtype: float64

In [17]:
x[[0,2]] # fancy indexing

a    0.25
c    0.75
dtype: float64

## Implicit vs explicit indexes

In [18]:
x = pd.Series([0.25, 0.5, 0.75, 1.0], index=[3,6,0,1])
print(x)

3    0.25
6    0.50
0    0.75
1    1.00
dtype: float64


In [19]:
x[0] # is using the label

0.75

In [20]:
x[1:] # is implicit

6    0.50
0    0.75
1    1.00
dtype: float64

Rule: 
- Simple Indexing: explicit index
- Slicing: implicit index

## Loc and Iloc
- Loc use explicit series index
- Iloc use *implicit* python index

In [24]:
print(x)

3    0.25
6    0.50
0    0.75
1    1.00
dtype: float64


In [22]:
x.loc[0]

0.75

In [23]:
x.iloc[0]

0.25

### Additonal Notes:

In [26]:
x.loc[:0] # all explicit indices until 0 and is inclusive

3    0.25
6    0.50
0    0.75
dtype: float64

In [27]:
x.iloc[:3] # all implici indices until n = 3 and is exclusive

3    0.25
6    0.50
0    0.75
dtype: float64

## Ufuncs with Series

In [33]:
x = pd.Series([2,5])
np.sqrt(x)

0    1.414214
1    2.236068
dtype: float64

Index Alignment

In [34]:
hw1 = pd.Series({'001':89, '002':74, '004':94})
hw2 = pd.Series({'001':92, '003':78, '004':97})
(hw1 + hw2)/2 # pandas aligns this and peforms the calculation. 

001    90.5
002     NaN
003     NaN
004    95.5
dtype: float64

We do not want to see `NaN` so we should use something like this:

In [36]:
hw1.add(hw2, fill_value=0)/2 # adds hw2 to hw1 and fills in missing values with 0

001    90.5
002    37.0
003    39.0
004    95.5
dtype: float64

Be careful with vectorized logic

In [37]:
x = pd.Series([True, False, True])
y = pd.Series([True, True, False])
z = pd.Series([3, 6, 2])

In [38]:
x and y # This will cause an error

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [39]:
x & y # We should use bitwise logic

0     True
1    False
2    False
dtype: bool

In [40]:
z > 1 & z < 5

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [41]:
(z > 1) & (z < 5)

0     True
1    False
2     True
dtype: bool

In [42]:
~(x | y) # ~ not

0    False
1    False
2    False
dtype: bool

In [43]:
x = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
x['b']

0.5

In [44]:
x.loc['b']

0.5

In [45]:
x['b'] = 0.55
x.loc['b']

0.55

In [47]:
x.keys() # hows the indexex

Index(['a', 'b', 'c', 'd'], dtype='object')

In [48]:
list(x.items()) # can add contents into a list

[('a', 0.25), ('b', 0.55), ('c', 0.75), ('d', 1.0)]

In [49]:
ind1 = pd.Index([1,2,4,6])
ind2 = pd.Index([1,3,4,5])
ind1 & ind2 # returns the indices that exist in both (intersection)

Int64Index([1, 4], dtype='int64')

In [50]:
ind1 | ind2 # returns all indices between both (Set union)

Int64Index([1, 2, 3, 4, 5, 6], dtype='int64')