# pandas Series

## Setup

In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', 10)
pd.set_option('display.max_rows', 10)

### Create Series from ndarray

In [2]:
s1 = pd.Series(np.arange(0,5))
s1

0    0
1    1
2    2
3    3
4    4
dtype: int32

### Create Series with index

In [3]:
s2 = pd.Series(np.arange(0,5), index=['a','b','c','d','e'])
s2

a    0
b    1
c    2
d    3
e    4
dtype: int32

### Assign index to Existing Series

In [4]:
s2.index = ['A','B','C','D','E']
s2 

A    0
B    1
C    2
D    3
E    4
dtype: int32

### The index and values Properties

In [5]:
s2.index

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [6]:
s2.values

array([0, 1, 2, 3, 4])

In [7]:
type(s2.values)

numpy.ndarray

### Create One-Item Series from Scalar

In [8]:
s3 = pd.Series(5)
s3

0    5
dtype: int64

### Create Series from list

In [9]:
s4 = pd.Series([1,2,3,4,5])
s4

0    1
1    2
2    3
3    4
4    5
dtype: int64

### Create Series from dict

In [10]:
from datetime import date
bdays = {
    'John': date(1940, 10, 9),
    'Paul': date(1942, 6, 18),
    'George': date(1943, 2, 25),
    'Ringo': date(1940, 7, 7),
}
s5 = pd.Series(bdays)
s5

John      1940-10-09
Paul      1942-06-18
George    1943-02-25
Ringo     1940-07-07
dtype: object

In [11]:
ar = np.array([1,2,3,np.nan,5,6,7,np.nan,9,10])
ar

array([ 1.,  2.,  3., nan,  5.,  6.,  7., nan,  9., 10.])

In [12]:
ar.mean()

nan

In [13]:
s6 = pd.Series(ar)
s6

0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     7.0
7     NaN
8     9.0
9    10.0
dtype: float64

In [14]:
s6.mean(), sum([1,2,3,5,6,7,9,10])/8

(5.375, 5.375)

In [15]:
s6.mean(skipna=False)

nan

### loc[] and iloc[]

In [16]:
s7 = pd.Series(np.random.sample(5), index=['a','b','c','d','e'])
s7

a    0.511459
b    0.179318
c    0.018874
d    0.633074
e    0.850213
dtype: float64

In [17]:
s7.loc['a'], s7.iloc[0]

(0.5114588432103118, 0.5114588432103118)

In [18]:
s7.loc['b':'d']

b    0.179318
c    0.018874
d    0.633074
dtype: float64

In [19]:
s7.iloc[1:4]

b    0.179318
c    0.018874
d    0.633074
dtype: float64

In [20]:
s7.loc[['a','c','d']]

a    0.511459
c    0.018874
d    0.633074
dtype: float64

In [21]:
s7.iloc[[0,2,4]]

a    0.511459
c    0.018874
e    0.850213
dtype: float64

### Alignment

In [None]:
grades1 = pd.Series([17, 44, 28, 8, 3], index=['A','B','C','D','F'])
grades2 = pd.Series([76, 122, 151, 21, 0], index=['D','C','B','A','F'])

In [None]:
grades1

In [None]:
grades2

In [None]:
grades_all = grades1 + grades2
grades_all

In [None]:
grades1 = pd.Series([17, 44, 28, 8, 3], index=['A','B','C','D','F'])
grades2 = pd.Series([76, 122, 151, 21], index=['D','C','B','A'])
grades_all = grades1 + grades2
grades_all

In [None]:
grades_all = grades1.add(grades2, fill_value=0)
grades_all

### Comparing Series

In [None]:
mantle1968 = pd.Series([14,1,18,54,6], index=['2B','3B','HR','RBI','SB'])
mantle1951 = pd.Series([11,5,13,65,8], index=['2B','3B','HR','RBI','SB'])
mantle1968 > mantle1951

In [None]:
mantle1968[mantle1968 > mantle1951]

### Element-wise Operations

In [None]:
np.random.seed(1)
exam_grades = pd.Series(np.random.randint(60,101,100))
exam_grades

In [None]:
curved_grades = exam_grades.multiply(1.05)
curved_grades

In [None]:
def convert_to_letter(grade):
    if grade >= 90:
        return 'A'
    elif grade >= 80:
        return 'B'
    elif grade >= 70:
        return 'C'
    elif grade >= 65:
        return 'D'
    else:
        return 'F'

In [None]:
convert_to_letter(curved_grades)

In [None]:
letter_grades = curved_grades.apply(convert_to_letter)
letter_grades