In [1]:
import numpy as np
import pandas as pd

In [4]:
s = pd.Series([0,1,2,1,4,4,5,7,6,0,np.nan])

In [7]:
print(len(s))
print(s.size)
print(s.shape)
print(s.count())
print(s.value_counts())
print(s.unique())

11
11
(11,)
10
4.0    2
1.0    2
0.0    2
6.0    1
7.0    1
5.0    1
2.0    1
dtype: int64
[ 0.  1.  2.  4.  5.  7.  6. nan]


peeking at data with heads,tails,and take

In [8]:
# first five(default)
s.head()

0    0.0
1    1.0
2    2.0
3    1.0
4    4.0
dtype: float64

In [9]:
# first four
s.head(n =4)

0    0.0
1    1.0
2    2.0
3    1.0
dtype: float64

In [10]:
# last five
s.tail()

6     5.0
7     7.0
8     6.0
9     0.0
10    NaN
dtype: float64

In [11]:
#last four

s.tail(n = 4)

7     7.0
8     6.0
9     0.0
10    NaN
dtype: float64

In [13]:
s.take([4,6,8])

4    4.0
6    5.0
8    6.0
dtype: float64

# Looking up values in Series

In [14]:
# single item lookup
s3 = pd.Series([1,2,3], index = ['a', 'b', 'c'])
print(s3)

a    1
b    2
c    3
dtype: int64


In [15]:
s3['a']

1

In [16]:
# Accesssing this Series using an integer value will perform a zero-based position lookup of the value:

s3[1]

2

In [18]:
# lookup by position since the index is not an integer
# multi item
s3[['c','a']]

c    3
a    1
dtype: int64

In [20]:
# Series with an integer index, but not starting with 0
s5 = pd.Series([1, 2, 3], index = [2, 3, 4])
s5

2    1
3    2
4    3
dtype: int64

# Label-based lookup versus position-based lookup

In [21]:
s5[2] # 2 is considered as label based lookup coz label also has 2 init

1

In [22]:
s5[0] # now see in this case we have integer label lookup,possition lookup is not working

KeyError: 0

In [23]:
s5.loc[2] # loc also works on label based look up

1

In [24]:
s5.iloc[0] # iloc forcefully works on position based look up even you dont specify position based index

1

In [27]:
# multiple items by label(loc)
s5.loc[[4, 3]]

4    3
3    2
dtype: int64

In [28]:
s5[[0, 2]]

KeyError: 'Passing list-likes to .loc or [] with any missing labels is no longer supported, see https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike'

In [29]:
s5.iloc[[0, 2]]

2    1
4    3
dtype: int64

In [30]:
s5.iloc[[0,2,3]] # integer location will throw exception

IndexError: positional indexers are out-of-bounds

# Alignment via index labels

 A fundamental difference between a NumPY ndarray and a pandas Series is the ability of a Series to automatically align data from another Series based on label values before performing an operation.

In [33]:
s6 = pd.Series([1, 2, 3, 4] , index = ['a', 'b', 'c', 'd'])
s6

a    1
b    2
c    3
d    4
dtype: int64

In [34]:
s7 = pd.Series([4, 3, 2, 1], index = ['d', 'c', 'b', 'a'])
s7

d    4
c    3
b    2
a    1
dtype: int64

In [35]:
# add them

s6 + s7 # it first alligns the data as per label then performs operation

a    2
b    4
c    6
d    8
dtype: int64

Nan + number = NaN
NaN added to a number results in NaN


number + NaN = NaN
(Number added to a Nan results in NaN)

In [39]:
s8 = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 5})
s8

a    1
b    2
c    3
d    5
dtype: int64

In [40]:
s9 = pd.Series({'b': 6, 'c': 7, 'd': 9, 'e': 10})
s9

b     6
c     7
d     9
e    10
dtype: int64

In [41]:
s8 + s9 # NaN's result for a and e 
        # demonstrates alignment

a     NaN
b     8.0
c    10.0
d    14.0
e     NaN
dtype: float64

In [45]:
s10 = pd.Series([1.0, 2.0, 3.0], index = ['a', 'b', 'a'])
s10

a    1.0
b    2.0
a    3.0
dtype: float64

In [49]:
s11 = pd.Series([4.0, 5.0, 6.0], index = ['a', 'a', 'c'])
s11

a    4.0
a    5.0
c    6.0
dtype: float64

In [50]:
s11 + s10

a    5.0
a    7.0
a    6.0
a    8.0
b    NaN
c    NaN
dtype: float64

When the two Series objects are added (or any other operation performed), the resulting Series has four 'a' index labels.

# The special case of Not-A-Number(NaN)


In [56]:
# Mean of numpy arrray values
nda = np.array([1,2,3,4,5])
nda.mean()

3.0

In [57]:
# mean of numpy array values with a NaN
ndn = np.array([1,2,3,4, np.NaN])
ndn.mean()

nan

In [58]:
# ignores NaN values
s = pd.Series(nda)
s.mean()

3.0

In [60]:
# Handle NaN Values like Numpy 
s.mean(skipna = False)

3.0

# Boolean Selection

In [61]:
# Which rows have values that are > 5?
s = pd.Series(np.arange(0, 10))
s > 5

0    False
1    False
2    False
3    False
4    False
5    False
6     True
7     True
8     True
9     True
dtype: bool

In [63]:
# select rows where values are > 5
logicalResults = s > 5
s[logicalResults]

6    6
7    7
8    8
9    9
dtype: int32

In [64]:
# commented as it throws an exception
# s[s > 5 and s < 8]

# correct syntax
s[(s > 5) & (s < 8)]

6    6
7    7
dtype: int32

In [65]:
pd.Series([True, False, False, True, True]).all(),pd.Series([True, False, False, True, True]).any() # its given results base on through True

(False, True)

In [66]:
(np.array([True, False, True, True])).sum()

3

In [67]:
# are all items >= 0?
(s >= 0).all()

True

In [68]:
s < 2

0     True
1     True
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool

In [70]:
# any item < 2?
(s[s < 2].any())

True

In [71]:
# how many values < 2?
(s < 2).count()

10