### Numpy array vs Python Lists

In [2]:
 # speed
 # list

a = [i for i in range(10000000)]
b = [i for i in range(10000000,20000000)]
c = []
import time
start = time.time()
for i in range(len(a)):
  c.append(a[i]+b[i])
print(time.time()-start)


5.201359748840332


In [3]:
# Numpy :- same code with numpy
import numpy as np
a = np.arange(10000000)
b = np.arange(10000000,20000000)

start = time.time()
c = a+b
print(time.time()-start)

0.6253163814544678


In [4]:
# memory

In [5]:
a = [i for i in range(10000000)]
import sys

sys.getsizeof(a)


89095160

In [6]:
a = np.arange(10000000, dtype=np.int32)
sys.getsizeof(a)

40000112

In [7]:
# numpy is better than built in data ypes of python
# time efficient
# memory efficient
# Convinient


### Advanced Indexing

In [8]:
# Normal Indexing and Slicing

a = np.arange(24).reshape(6,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [9]:
a[1:3,1:3]

array([[ 5,  6],
       [ 9, 10]])

In [10]:
# Fancy Indexing
a[[0,2,3,5]] # row
a[:,[0,2,3]] # column

array([[ 0,  2,  3],
       [ 4,  6,  7],
       [ 8, 10, 11],
       [12, 14, 15],
       [16, 18, 19],
       [20, 22, 23]])

In [11]:
# Boolean Indexing
# Super Important for both Numpy and Pandas
import numpy as np
a = np.random.randint(1,100,24).reshape(6,4)
a

array([[67, 61, 25, 36],
       [47, 24, 13, 87],
       [10, 78,  7, 88],
       [ 6,  5, 72,  6],
       [92, 12, 65, 16],
       [84, 27, 42, 16]])

In [12]:
# Find all numbers greater than 50
a > 50 # generate a boolean array according to condition
a[a>50] # Boolean Mask

array([67, 61, 87, 78, 88, 72, 92, 65, 84])

In [13]:
# find out even numbers
a % 2 ==0
a[a%2==0]

array([36, 24, 10, 78, 88,  6, 72,  6, 92, 12, 16, 84, 42, 16])

In [14]:
# Find all numbers greater than 50 and are even
# bitwise & = is used to work with boolean values
# Logical and is used for logical values
a[(a>50) & (a % 2==0)]

array([78, 88, 72, 92, 84])

In [18]:
# find all numbers not divisible by 7
print(a[a%7==0])
# not
a[~(a % 7 == 0)]

[ 7 84 42]


array([67, 61, 25, 36, 47, 24, 13, 87, 10, 78, 88,  6,  5, 72,  6, 92, 12,
       65, 16, 27, 16])

### Broadcasting

The term broadcasting describes how NumPy treats arrays with different shapes during arithmetic operations.

The smaller array is “broadcast” across the larger array so that they have compatible shapes.

In [24]:
# same shape
a = np.arange(6).reshape(2,3)
b = np.arange(6,12).reshape(2,3)

print(a)
print(b)
print('Itemwise addition:\n',a+b)

a = np.arange(6).reshape(2,3)
b = np.arange(3).reshape(1,3)

print(a)
print(b)
print(a+b)

[[0 1 2]
 [3 4 5]]
[[ 6  7  8]
 [ 9 10 11]]
Itemwise addition:
 [[ 6  8 10]
 [12 14 16]]
[[0 1 2]
 [3 4 5]]
[[0 1 2]]
[[0 2 4]
 [3 5 7]]


#### Broadcasting Rules

**1. Make the two arrays have the same number of dimensions.**<br>
- If the numbers of dimensions of the two arrays are different, add new dimensions with size 1 to the head of the array with the smaller dimension.<br>

**2. Make each dimension of the two arrays the same size.**<br>
- If the sizes of each dimension of the two arrays do not match, dimensions with size 1 are stretched to the size of the other array.
- If there is a dimension whose size is not 1 in either of the two arrays, it cannot be broadcasted, and an error is raised.

<img src = "https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png">

In [27]:
# More examples

a = np.arange(12).reshape(4,3)
b = np.arange(3)

print(a)
print(b)
print(a+b)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[0 1 2]
[[ 0  2  4]
 [ 3  5  7]
 [ 6  8 10]
 [ 9 11 13]]


In [28]:
a = np.arange(12).reshape(3,4)
b = np.arange(3)

print(a)
print(b)

print(a+b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[0 1 2]


ValueError: operands could not be broadcast together with shapes (3,4) (3,) 

In [30]:
a = np.arange(3).reshape(1,3)
b = np.arange(3).reshape(3,1)

print(a)
print(b)

print(a+b)

[[0 1 2]]
[[0]
 [1]
 [2]]
[[0 1 2]
 [1 2 3]
 [2 3 4]]


In [32]:
a = np.arange(3).reshape(1,3)
b = np.arange(4).reshape(4,1)

print(a)
print(b)

print(a+b)

[[0 1 2]]
[[0]
 [1]
 [2]
 [3]]
[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]]


In [33]:
a = np.array([1])
# shape -> (1,1)
b = np.arange(4).reshape(2,2)
# shape -> (2,2)

print(a)
print(b)

print(a+b)

[1]
[[0 1]
 [2 3]]
[[1 2]
 [3 4]]


In [34]:
a = np.arange(12).reshape(3,4)
b = np.arange(12).reshape(4,3)

print(a)
print(b)

print(a+b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


ValueError: operands could not be broadcast together with shapes (3,4) (4,3) 

In [35]:
a = np.arange(16).reshape(4,4)
b = np.arange(4).reshape(2,2)

print(a)
print(b)

print(a+b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[0 1]
 [2 3]]


ValueError: operands could not be broadcast together with shapes (4,4) (2,2) 