# pandas Series

## Setup

In [1]:
import numpy as np
import pandas as pd

## Creation

### Create Series from ndarray

In [2]:
s1 = pd.Series(np.arange(0,5))
s1

0    0
1    1
2    2
3    3
4    4
dtype: int64

### Create Series with index

In [3]:
s2 = pd.Series(np.arange(0,5), index=['a','b','c','d','e'])
s2

a    0
b    1
c    2
d    3
e    4
dtype: int64

### Assign index to Existing Series

In [4]:
s2.index = ['A','B','C','D','E']
s2 

A    0
B    1
C    2
D    3
E    4
dtype: int64

### Create One-Item Series from Scalar

In [5]:
s3 = pd.Series(5)
s3

0    5
dtype: int64

### Create Series from list

In [6]:
s4 = pd.Series([1,2,3,4,5])
s4

0    1
1    2
2    3
3    4
4    5
dtype: int64

### Create Series from dict

In [35]:
from datetime import date
bdays = {
    'Aaron': date(2001, 10, 10),
    'Brian': date(2002, 6, 6),
    'Christine': date(2003, 2, 2),
    'Di': date(2004, 9, 9),
}
s5 = pd.Series(bdays)
s5

Aaron        2001-10-10
Brian        2002-06-06
Christine    2003-02-02
Di           2004-09-09
dtype: object

In [36]:
ar = np.array([1,2,3,np.nan,5,6,7,np.nan,9,10])
ar

array([ 1.,  2.,  3., nan,  5.,  6.,  7., nan,  9., 10.])

In [9]:
ar.mean()

nan

In [10]:
s6 = pd.Series(ar)
s6

0     1.0
1     2.0
2     3.0
3     NaN
4     5.0
5     6.0
6     7.0
7     NaN
8     9.0
9    10.0
dtype: float64

In [11]:
s6.mean(), sum([1,2,3,5,6,7,9,10])/8

(5.375, 5.375)

In [12]:
s6.mean(skipna=False)

nan

### The index and values Properties

In [37]:
s5.index

Index(['Aaron', 'Brian', 'Christine', 'Di'], dtype='object')

In [38]:
s5.values

array([datetime.date(2001, 10, 10), datetime.date(2002, 6, 6),
       datetime.date(2003, 2, 2), datetime.date(2004, 9, 9)], dtype=object)

In [39]:
type(s5.values)

numpy.ndarray

## Access

### loc[] and iloc[]

In [16]:
s7 = pd.Series(np.random.sample(5), index=['a','b','c','d','e'])
s7

a    0.299203
b    0.664901
c    0.047260
d    0.395740
e    0.820036
dtype: float64

In [17]:
s7.loc['a'], s7.iloc[0]

(0.299202993169179, 0.299202993169179)

In [18]:
s7.loc['b':'d']

b    0.664901
c    0.047260
d    0.395740
dtype: float64

In [19]:
s7.iloc[1:4]

b    0.664901
c    0.047260
d    0.395740
dtype: float64

In [20]:
s7.loc[['a','c','d']]

a    0.299203
c    0.047260
d    0.395740
dtype: float64

In [21]:
s7.iloc[[0,2,4]]

a    0.299203
c    0.047260
e    0.820036
dtype: float64

## Manipulation

### Alignment

In [22]:
grades1 = pd.Series([17, 44, 28, 8, 3], index=['A','B','C','D','F'])
grades2 = pd.Series([76, 122, 151, 21, 0], index=['D','C','B','A','F'])

In [23]:
grades1

A    17
B    44
C    28
D     8
F     3
dtype: int64

In [24]:
grades2

D     76
C    122
B    151
A     21
F      0
dtype: int64

In [25]:
grades_all = grades1 + grades2
grades_all

A     38
B    195
C    150
D     84
F      3
dtype: int64

In [26]:
grades1 = pd.Series([17, 44, 28, 8, 3], index=['A','B','C','D','F'])
grades2 = pd.Series([76, 122, 151, 21], index=['D','C','B','A'])
grades_all = grades1 + grades2
grades_all

A     38.0
B    195.0
C    150.0
D     84.0
F      NaN
dtype: float64

In [27]:
grades_all = grades1.add(grades2, fill_value=0)
grades_all

A     38.0
B    195.0
C    150.0
D     84.0
F      3.0
dtype: float64

### Comparing Series

In [49]:
dies1 = pd.Series(np.random.randint(1, 7, (100,)))
dies2 = pd.Series(np.random.randint(1, 7, (100,)))
dies1 == dies2

0      True
1     False
2      True
3     False
4     False
      ...  
95    False
96    False
97    False
98    False
99    False
Length: 100, dtype: bool

In [50]:
dies1[dies1 == dies2]

0     4
2     5
17    6
20    5
21    4
34    3
59    4
63    1
66    1
70    5
72    3
78    6
82    3
91    1
94    5
dtype: int64

### Element-wise Operations

In [30]:
np.random.seed(1)
exam_grades = pd.Series(np.random.randint(60,101,100))
exam_grades

0     97
1     72
2     68
3     69
4     71
      ..
95    87
96    81
97    71
98    67
99    73
Length: 100, dtype: int64

In [31]:
curved_grades = exam_grades.multiply(1.05)
curved_grades

0     101.85
1      75.60
2      71.40
3      72.45
4      74.55
       ...  
95     91.35
96     85.05
97     74.55
98     70.35
99     76.65
Length: 100, dtype: float64

In [32]:
def convert_to_letter(grade):
    if grade >= 90:
        return 'A'
    elif grade >= 80:
        return 'B'
    elif grade >= 70:
        return 'C'
    elif grade >= 65:
        return 'D'
    else:
        return 'F'

In [58]:
letter_grades = [(i, convert_to_letter(g)) for i in range(0, len(curved_grades)) for g in curved_grades]
letter_grades

[(0, 'A'),
 (0, 'C'),
 (0, 'C'),
 (0, 'C'),
 (0, 'C'),
 (0, 'D'),
 (0, 'C'),
 (0, 'F'),
 (0, 'C'),
 (0, 'F'),
 (0, 'C'),
 (0, 'C'),
 (0, 'D'),
 (0, 'B'),
 (0, 'B'),
 (0, 'A'),
 (0, 'B'),
 (0, 'B'),
 (0, 'C'),
 (0, 'A'),
 (0, 'A'),
 (0, 'C'),
 (0, 'D'),
 (0, 'B'),
 (0, 'B'),
 (0, 'A'),
 (0, 'A'),
 (0, 'B'),
 (0, 'C'),
 (0, 'C'),
 (0, 'C'),
 (0, 'B'),
 (0, 'F'),
 (0, 'F'),
 (0, 'B'),
 (0, 'C'),
 (0, 'B'),
 (0, 'C'),
 (0, 'C'),
 (0, 'A'),
 (0, 'C'),
 (0, 'D'),
 (0, 'D'),
 (0, 'B'),
 (0, 'D'),
 (0, 'D'),
 (0, 'B'),
 (0, 'C'),
 (0, 'A'),
 (0, 'C'),
 (0, 'B'),
 (0, 'C'),
 (0, 'F'),
 (0, 'D'),
 (0, 'B'),
 (0, 'A'),
 (0, 'B'),
 (0, 'C'),
 (0, 'A'),
 (0, 'B'),
 (0, 'A'),
 (0, 'B'),
 (0, 'C'),
 (0, 'D'),
 (0, 'A'),
 (0, 'B'),
 (0, 'A'),
 (0, 'A'),
 (0, 'A'),
 (0, 'B'),
 (0, 'A'),
 (0, 'C'),
 (0, 'A'),
 (0, 'A'),
 (0, 'C'),
 (0, 'B'),
 (0, 'C'),
 (0, 'B'),
 (0, 'B'),
 (0, 'C'),
 (0, 'A'),
 (0, 'C'),
 (0, 'A'),
 (0, 'B'),
 (0, 'B'),
 (0, 'F'),
 (0, 'C'),
 (0, 'D'),
 (0, 'F'),
 (0, 'C'),
 (0, 'D'),

In [51]:
letter_grades = curved_grades.apply(convert_to_letter)
letter_grades

0     A
1     C
2     C
3     C
4     C
     ..
95    A
96    B
97    C
98    C
99    C
Length: 100, dtype: object

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=60e83195-1c6d-4dea-988d-720f1e421976' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>