# Vectorization

## Using for loop

In [81]:
v = [1,2,3,4,5]
v2 = [2 * i for i in v]
v2

[2, 4, 6, 8, 10]

In [82]:
2 * v

[1, 2, 3, 4, 5, 1, 2, 3, 4, 5]

## Vectorization with Numpy

In [83]:
import numpy as np
a = np.array(v)
a

array([1, 2, 3, 4, 5])

In [84]:
type(a)

numpy.ndarray

In [85]:
2 * a

array([ 2,  4,  6,  8, 10])

In [86]:
b = np.arange(12).reshape((4, 3))

In [87]:
b

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [88]:
b * 2

array([[ 0,  2,  4],
       [ 6,  8, 10],
       [12, 14, 16],
       [18, 20, 22]])

In [89]:
b ** 2

array([[  0,   1,   4],
       [  9,  16,  25],
       [ 36,  49,  64],
       [ 81, 100, 121]], dtype=int32)

In [90]:
# mean for np.ndarray is by default to the full array
b.mean()

5.5

In [91]:
np.mean(b)

5.5

In [92]:
b.mean(axis=0)

array([4.5, 5.5, 6.5])

In [93]:
b.mean(axis=1)

array([ 1.,  4.,  7., 10.])

In [94]:
np.mean(b, axis=1)

array([ 1.,  4.,  7., 10.])

## Vectorization with Pandas

In [95]:
a = np.arange(21).reshape((7, 3))
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14],
       [15, 16, 17],
       [18, 19, 20]])

In [96]:
import pandas as pd
columns = list('abc')
index = pd.date_range('2023-7-1', periods=7, freq='B')
df = pd.DataFrame(a, columns=columns, index=index)
df

Unnamed: 0,a,b,c
2023-07-03,0,1,2
2023-07-04,3,4,5
2023-07-05,6,7,8
2023-07-06,9,10,11
2023-07-07,12,13,14
2023-07-10,15,16,17
2023-07-11,18,19,20


In [97]:
2 * df

Unnamed: 0,a,b,c
2023-07-03,0,2,4
2023-07-04,6,8,10
2023-07-05,12,14,16
2023-07-06,18,20,22
2023-07-07,24,26,28
2023-07-10,30,32,34
2023-07-11,36,38,40


In [98]:
df.sum()

a    63
b    70
c    77
dtype: int64

In [99]:
df.sum(axis=0)

a    63
b    70
c    77
dtype: int64

In [100]:
df.sum(axis=1)

2023-07-03     3
2023-07-04    12
2023-07-05    21
2023-07-06    30
2023-07-07    39
2023-07-10    48
2023-07-11    57
Freq: B, dtype: int64

In [101]:
df.mean()

a     9.0
b    10.0
c    11.0
dtype: float64

In [102]:
df.mean(axis=0)

a     9.0
b    10.0
c    11.0
dtype: float64

In [103]:
df['a']

2023-07-03     0
2023-07-04     3
2023-07-05     6
2023-07-06     9
2023-07-07    12
2023-07-10    15
2023-07-11    18
Freq: B, Name: a, dtype: int32

In [104]:
df['a'] + df['c']

2023-07-03     2
2023-07-04     8
2023-07-05    14
2023-07-06    20
2023-07-07    26
2023-07-10    32
2023-07-11    38
Freq: B, dtype: int32

In [105]:
df.a * 2 + df.c

2023-07-03     2
2023-07-04    11
2023-07-05    20
2023-07-06    29
2023-07-07    38
2023-07-10    47
2023-07-11    56
Freq: B, dtype: int32

In [106]:
df.a > 5

2023-07-03    False
2023-07-04    False
2023-07-05     True
2023-07-06     True
2023-07-07     True
2023-07-10     True
2023-07-11     True
Freq: B, Name: a, dtype: bool

In [107]:
df[df['a'] > 5]

Unnamed: 0,a,b,c
2023-07-05,6,7,8
2023-07-06,9,10,11
2023-07-07,12,13,14
2023-07-10,15,16,17
2023-07-11,18,19,20


In [108]:
df['c'] > df['b']

2023-07-03    True
2023-07-04    True
2023-07-05    True
2023-07-06    True
2023-07-07    True
2023-07-10    True
2023-07-11    True
Freq: B, dtype: bool