## Representing Univariate Data with the Series

In [19]:
import pandas as pd
import numpy as np
import datetime
from datetime import datetime, date

# set some pandas options controlling output format
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 80)

# bring in matplotlib for graphics
import matplotlib.pyplot as plt
%matplotlib inline

### Creating a Series

#### Creating a Series using Python lists and dictionaries

In [20]:
# create a series of multiple values from a list
s = pd.Series([10, 11, 12, 13 ,14])
s

0    10
1    11
2    12
3    13
4    14
dtype: int64

In [21]:
#The first column of numbers represents the label in the index
s[3]         #This lookup is by label value

13

In [22]:
# Create a series of alphas
pd.Series(['Sarah','Aya','Eman','Raghad'])

0     Sarah
1       Aya
2      Eman
3    Raghad
dtype: object

In [23]:
# To create a series consisting of a sequence of  identical values 
pd.Series([2]*7)

0    2
1    2
2    2
3    2
4    2
5    2
6    2
dtype: int64

In [24]:
# use each character as a value
pd.Series(list('abcdefg'))

0    a
1    b
2    c
3    d
4    e
5    f
6    g
dtype: object

In [25]:
# create series from dictionary ,  The keys of the dictionary are used as the index labels
pd.Series({'Sarah': 'sister',
          'Aya':'sister',
          'Eman':'cousin',
          'karima':'friend'})

Sarah     sister
Aya       sister
Eman      cousin
karima    friend
dtype: object

#### Creation using Numpy functions

In [27]:
pd.Series(np.arange(4, 9))

0    4
1    5
2    6
3    7
4    8
dtype: int32

In [28]:
pd.Series(np.linspace(0, 9, 5))

0    0.00
1    2.25
2    4.50
3    6.75
4    9.00
dtype: float64

In [29]:
#random numbers
np.random.seed(12345)
pd.Series(np.random.normal(size=5))

0   -0.204708
1    0.478943
2   -0.519439
3   -0.555730
4    1.965781
dtype: float64

In [31]:
# create a one item series
s= pd.Series(np.arange(0, 5))
s * 2

0    0
1    2
2    4
3    6
4    8
dtype: int32

### The .index and .value properties

In [32]:
# get the values in the Series
s = pd.Series([1, 2, 3])
s.values

array([1, 2, 3], dtype=int64)

In [33]:
# show that this is a numpy array
type(s.values)

numpy.ndarray

In [34]:
# get the index of the Series
s.index

RangeIndex(start=0, stop=3, step=1)

In [36]:
# example series
s = pd.Series([0, 1, 2, 3])
len(s)

4

In [37]:
s.size

4

In [38]:
s.shape

(4,)

### Specifing an index at creation

In [39]:
# explicitly create an index
labels = ['sarah','aya','ahmed','mohamed']
role = ['Dad','mom','son','brother']
s = pd.Series(labels, index=role)
s

Dad          sarah
mom            aya
son          ahmed
brother    mohamed
dtype: object

In [40]:
s.index

Index(['Dad', 'mom', 'son', 'brother'], dtype='object')

In [41]:
s['Dad']

'sarah'

In [43]:
s = pd.Series(np.arange(1, 10),
             index = list('abcdefghi'))

In [44]:
s.head()

a    1
b    2
c    3
d    4
e    5
dtype: int32

In [45]:
s.head(n=3)

a    1
b    2
c    3
dtype: int32

In [46]:
s.tail

<bound method NDFrame.tail of a    1
b    2
c    3
d    4
e    5
f    6
g    7
h    8
i    9
dtype: int32>

In [47]:
s.tail(n = 3)

g    7
h    8
i    9
dtype: int32

In [48]:
s.take([1, 5, 8])

b    2
f    6
i    9
dtype: int32

#### Lookups

In [50]:
s1 = pd.Series(np.arange(10, 15), index=list('abcde'))
s1

a    10
b    11
c    12
d    13
e    14
dtype: int32

In [51]:
s1['a']

10

In [52]:
s1[['d', 'b']]

d    13
b    11
dtype: int32

In [53]:
s1[[3,1]]

d    13
b    11
dtype: int32

In [54]:
s2 = pd.Series([1,2,3,4],
              index = [10, 11, 12, 13])

In [55]:
s2[[13, 10]]

13    4
10    1
dtype: int64

In [56]:
s1.iloc[[0, 2]]

a    10
c    12
dtype: int32

In [57]:
s2.iloc[[3, 2]]

13    4
12    3
dtype: int64

In [58]:
s1.loc[['a','b']]

a    10
b    11
dtype: int32

In [59]:
s2.loc[[11,12]]

11    2
12    3
dtype: int64

In [61]:
s1.loc[['a', 'f']]

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


a    10.0
f     NaN
dtype: float64

### Slicing a Series into subsets

In [62]:
# a Series to use for slicing
# using index labels not starting at 0 to demonestrate
#position based slicing
s = pd.Series(np.arange(100,110), index=np.arange(10, 20))
s

10    100
11    101
12    102
13    103
14    104
15    105
16    106
17    107
18    108
19    109
dtype: int32

In [63]:
s[1:6]

11    101
12    102
13    103
14    104
15    105
dtype: int32

In [64]:
s.iloc[[1, 2, 3, 4, 5]]

11    101
12    102
13    103
14    104
15    105
dtype: int32

In [65]:
#items at position 1, 3, 5
s[1:6:2]

11    101
13    103
15    105
dtype: int32

In [66]:
# first five by slicing, same as .head(5)
s[:5]

10    100
11    101
12    102
13    103
14    104
dtype: int32

In [67]:
s[4:]

14    104
15    105
16    106
17    107
18    108
19    109
dtype: int32

In [68]:
s[:5:2]

10    100
12    102
14    104
dtype: int32

In [69]:
s[4::2]

14    104
16    106
18    108
dtype: int32

In [70]:
s[::-1]

19    109
18    108
17    107
16    106
15    105
14    104
13    103
12    102
11    101
10    100
dtype: int32

In [71]:
s[4::-2]

14    104
12    102
10    100
dtype: int32

In [72]:
s[-4:]

16    106
17    107
18    108
19    109
dtype: int32

In [73]:
s[:-4]

10    100
11    101
12    102
13    103
14    104
15    105
dtype: int32

In [74]:
s[:-4]

10    100
11    101
12    102
13    103
14    104
15    105
dtype: int32

In [75]:
s[-4:-1]

16    106
17    107
18    108
dtype: int32

In [77]:
s = pd.Series(np.arange(0, 5),
             index = ['a','b','c','d','e'])

In [78]:
s

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [79]:
s[1:3]

b    1
c    2
dtype: int32

In [80]:
s['b':'d']

b    1
c    2
d    3
dtype: int32

### Alignment via index labels

In [81]:
s1 = pd.Series([1, 2], index = ['a', 'b'])
s1

a    1
b    2
dtype: int64

In [82]:
s1 = pd.Series([4, 3], index = ['b', 'a'])
s1

b    4
a    3
dtype: int64

In [83]:
s1+ s2

10   NaN
11   NaN
12   NaN
13   NaN
a    NaN
b    NaN
dtype: float64

In [84]:
s1 * 2

b    8
a    6
dtype: int64

In [85]:
t = pd.Series(2, s1.index)
t

b    2
a    2
dtype: int64

In [86]:
s1 * t

b    8
a    6
dtype: int64

In [88]:
#we will add this to s1
s3 = pd.Series([5, 6], index = ['b', 'c'])
s3

b    5
c    6
dtype: int64

In [89]:
s1 + s3

a    NaN
b    9.0
c    NaN
dtype: float64

In [90]:
s1 = pd.Series([1.0,2.0,3.0], index=['a','a','b'])
s1

a    1.0
a    2.0
b    3.0
dtype: float64

In [91]:
s2 = pd.Series([4.0, 5.0, 6.0, 7.0],
              index=['a','b','c','a'])
s2

a    4.0
b    5.0
c    6.0
a    7.0
dtype: float64

In [92]:
s1 + s2

a    5.0
a    8.0
a    6.0
a    9.0
b    8.0
c    NaN
dtype: float64

### Performing Boolean selection

In [94]:
s = pd.Series(np.arange(0, 5), index=list('abcde'))
logical_results = s >= 3
logical_results

a    False
b    False
c    False
d     True
e     True
dtype: bool

In [95]:
s[logical_results]

d    3
e    4
dtype: int32

In [96]:
s[s > 5]

Series([], dtype: int32)

In [97]:
s[(s >=2) & (s < 5)]

c    2
d    3
e    4
dtype: int32

In [98]:
(s >= 0).all()

True

In [99]:
s[s < 2].any()

True

In [100]:
(s < 2).sum()

2

### Re-indexing a Series

In [109]:
np.random.seed(123456)
s1 = pd.Series(np.random.randn(4), ['a', 'b', 'c','d'])
s1

a    0.469112
b   -0.282863
c   -1.509059
d   -1.135632
dtype: float64

In [110]:
s2 = s1.reindex(['a', 'c', 'g'])
s2

a    0.469112
c   -1.509059
g         NaN
dtype: float64

In [112]:
s1 = pd.Series([0, 1, 2], index = [0, 1, 2])
s2 = pd.Series([3, 4, 5], index=['0', '1', '2'])
s1 + s2

0   NaN
1   NaN
2   NaN
0   NaN
1   NaN
2   NaN
dtype: float64

In [114]:
s2.index = s2.index.values.astype(int)
s1 + s2

0    3
1    5
2    7
dtype: int64

In [115]:
s2 = s.copy()
s2.reindex(['a', 'f'], fill_value = 0)

a    0
f    0
dtype: int32

In [116]:
s2 = s.copy()
s2.reindex(['a', 'f'], fill_value =0 )

a    0
f    0
dtype: int32

In [117]:
s3 = pd.Series(['red', 'green', 'blue'], index=[0, 3, 5])
s3

0      red
3    green
5     blue
dtype: object

In [119]:
s3.reindex(np.arange(0,7), method='ffill')

0      red
1      red
2      red
3    green
4    green
5     blue
6     blue
dtype: object

In [120]:
s3.reindex(np.arange(0,7), method='bfill')

0      red
1    green
2    green
3    green
4     blue
5     blue
6      NaN
dtype: object

### Modifying a Series in-place

In [121]:
np.random.seed(123456)
s = pd.Series(np.random.randn(3), index=['a','b','c'])
s

a    0.469112
b   -0.282863
c   -1.509059
dtype: float64

In [122]:
s['d'] = 100

In [124]:
s['d']= -100

In [125]:
del(s['a'])
s

b     -0.282863
c     -1.509059
d   -100.000000
dtype: float64

In [126]:
copy = s.copy()
slice = copy[:2]
slice

b   -0.282863
c   -1.509059
dtype: float64

In [127]:
slice['b'] = 0
copy

b      0.000000
c     -1.509059
d   -100.000000
dtype: float64