### Convention of importing pandas and numpy

In [1]:
import numpy as np
import pandas as pd

### Pandas series created from a list

In [2]:
s1 = pd.Series([2,4,6,8,10,12,14,16,18,20])
print(s1)

0     2
1     4
2     6
3     8
4    10
5    12
6    14
7    16
8    18
9    20
dtype: int64


### Assigning own labels to Series

In [3]:
a = [1,2,3]
my_var = pd.Series(a,index = ['x','y','z'] )
print(my_var)

x    1
y    2
z    3
dtype: int64


### The Series elements can be accesed through assigned labels 

In [4]:
print(my_var['x'])

1


### Key value objects as Series

In [5]:
mo_yr_num = pd.Series({'jan':202301,'feb': 202302,'mar':202303,'apr':202304})
print(mo_yr_num)

jan    202301
feb    202302
mar    202303
apr    202304
dtype: int64


In [6]:
my_value = pd.Series(mo_yr_num,index = ['jan','feb'])
print(my_value)

jan    202301
feb    202302
dtype: int64


### Formating the output

In [7]:
# To control the representation of the rendering
pd.set_option('display.notebook_repr_html',False)
# To set the maximum number of columns
pd.set_option('display.max_columns',10)
# To set the maximum number of rows
pd.set_option('display.max_rows',10)

### Series can also by constructed using a scalar value

In [8]:
s1 = pd.Series(2)

### The values inside a series can be accessed using the labels

In [9]:
s1[0]

2

### Creation of numpy series using a list

In [10]:
s2 = pd.Series([1,2,3,4,5])
s2

0    1
1    2
2    3
3    4
4    5
dtype: int64

### The values of series can be accessed via the values property

In [11]:
s2.values

array([1, 2, 3, 4, 5], dtype=int64)

### Use of index property of series

In [12]:
s2.index

RangeIndex(start=0, stop=5, step=1)

### Using list to create series

In [13]:
s3 = pd.Series([1,2,3,4,5], index = ['a','b','c','d','e'])

### .value property of series

In [14]:
s3.values

array([1, 2, 3, 4, 5], dtype=int64)

### .index property of series

In [15]:
s3.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

### Series Behaviour 

In [16]:
s3+2

a    3
b    4
c    5
d    6
e    7
dtype: int64

In [17]:
s3

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [18]:
s4 = pd.Series(3,index = s3.index)

In [19]:
s4

a    3
b    3
c    3
d    3
e    3
dtype: int64

### Create Series from an existing index; scalar value with be copied at each index label

In [20]:
s5 = pd.Series(2,index = s2.index )
s5

0    2
1    2
2    2
3    2
4    2
dtype: int64

### Creates a Series from five normally distributed values

In [21]:
d4 = np.random.seed(123456)
pd.Series(np.random.randn(5))

0    0.469112
1   -0.282863
2   -1.509059
3   -1.135632
4    1.212112
dtype: float64

### Creating series from numpy linspace

In [22]:
d5 = pd.Series(np.linspace(0,9,10))
d5

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
7    7.0
8    8.0
9    9.0
dtype: float64

### Creating Series from numpy arange

In [23]:
d6 = pd.Series(np.arange(0,9))
d6

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
dtype: int32

### Creating a series from dictionary 

In [24]:
d7 = pd.Series({'a':1,'b':2,'c':3})
d7

a    1
b    2
c    3
dtype: int64

In [25]:
d7.head(n = 2)

a    1
b    2
dtype: int64

### Example Series to determine Size, shape, uniqueness, and counts of values

In [26]:
s = pd.Series([0, 1, 1, 2, 3, 4, 5, 6, 7, np.nan])
s

0    0.0
1    1.0
2    1.0
3    2.0
4    3.0
5    4.0
6    5.0
7    6.0
8    7.0
9    NaN
dtype: float64

### Length of a Series

In [27]:
len(s)

10

#### .size property to get number of elements in series

In [28]:
s.size

10

#### .shape property returns a tuple with first element = number of elements in series

In [29]:
s.shape

(10,)

#### Number of values that are not NaN

In [30]:
s.count()

9

#### All the unique value in series

In [31]:
s.unique()

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7., nan])

#### Count of all unique items in series

In [32]:
s.value_counts()

1.0    2
0.0    1
2.0    1
3.0    1
4.0    1
5.0    1
6.0    1
7.0    1
dtype: int64

### Peeking at data with heads, tails, and take

##### First 5 data of series

In [33]:
s.head()

0    0.0
1    1.0
2    1.0
3    2.0
4    3.0
dtype: float64

##### s.head(n = 3) or s.head(3) -- first 3 items of series

In [34]:
s.head(3)

0    0.0
1    1.0
2    1.0
dtype: float64

##### Last 5 items of series

In [35]:
s.tail()

5    4.0
6    5.0
7    6.0
8    7.0
9    NaN
dtype: float64

##### s.tail(n=3) or s.tail(3) -- Last 3 items of series

In [36]:
s.tail(3)

7    6.0
8    7.0
9    NaN
dtype: float64

##### Return rows in the series specified by 0-based index

In [37]:
s.take([1,3,5])

1    1.0
3    2.0
5    4.0
dtype: float64

### Lookup Values in series

In [38]:
s = pd.Series(np.arange(0,5),index= ["a","b","c","d","e"])

#### Lookup by label

In [39]:
s["a"]

0

In [40]:
s[2]

2

In [41]:
s[["a","b"]]

a    0
b    1
dtype: int32

In [42]:
s1 = pd.Series([1,2,3,4,5], index = [10,20,30,40,50])

In [43]:
s1[20]

2

#### Label based lookup

In [44]:
s1.loc[20]

2

In [45]:
s1.loc[[20,30]]

20    2
30    3
dtype: int64

#### Position based Iookup

In [46]:
s1.iloc[3]

4

In [47]:
s1.iloc[[2,4]]

30    3
50    5
dtype: int64

In [55]:
s1.iloc[60]

IndexError: single positional indexer is out-of-bounds

### Alingment via the Index labels

In [56]:
a1 = pd.Series([1,2,3,4,5],index = ["a","b","c","e","d"])
a2 = pd.Series([5,8,3,9,1], index = ["e","d","c","b","a"])
a1+a2

a     2
b    11
c     6
d    13
e     9
dtype: int64

### Arithmetic operations on series

#### Vertorized muliplication

In [57]:
a1*2

a     2
b     4
c     6
e     8
d    10
dtype: int64

#### Multiplication after creating new series conidered less efficient

In [60]:
s1 = pd.Series(2, a2.index)
a2*s1

e    10
d    16
c     6
b    18
a     2
dtype: int64

### NaN's result for a and f demonstrates alignment

In [61]:
b1 = pd.Series(np.arange(1,6), index = ['a','b','c','d','e'])
b2 = pd.Series(range(5,10),index = ['b','c','d','e','f'])
b1+b2

a     NaN
b     7.0
c     9.0
d    11.0
e    13.0
f     NaN
dtype: float64

### Two Series objects with duplicate index labels

In [65]:
c1 = pd.Series(np.linspace(1,5,5), index = ['a','a','c','d','e'])
c1

a    1.0
a    2.0
c    3.0
d    4.0
e    5.0
dtype: float64

In [66]:
c2 = pd.Series(range(5,10),index = ['a','a','d','e','f'])
c2

a    5
a    6
d    7
e    8
f    9
dtype: int64

### Operation after cartesian product of all unique index labels in both series

In [69]:
c1+c2

a     6.0
a     7.0
a     7.0
a     8.0
c     NaN
d    11.0
e    13.0
f     NaN
dtype: float64

### NaN the special case

### Numpy response to NaN

In [74]:
e1 = np.array([1,2,3,4])
e1.mean()

2.5

In [73]:
d1 = np.array([1,2,3,4,np.NaN])
d1.mean()

nan

#### Pandas allows aggregation even considering the missing values

In [77]:
bjp = pd.Series(d1)
bjp.mean()

2.5

##### Forcing the pandas to behave similar to numpy by passing (skipna = False) argument

In [76]:
bjp.mean(skipna = False)

nan

## Attributes and methods of pandas series

In [None]:
s1.T

In [None]:
s1

In [None]:
s1.array

In [None]:
s2 = pd.Series(["a","b","c","d","e"])

In [None]:
s2.array