### Numpy array vs Python lists

In [1]:
# speed

# list
a = [i for i in range(10000000)]
b = [i for i in range(10000000,20000000)]

c = []
import time 

start = time.time()
for i in range(len(a)):
  c.append(a[i] + b[i])
print(time.time()-start)

2.2120413780212402


In [2]:
# numpy
import numpy as np
a = np.arange(10000000)
b = np.arange(10000000,20000000)

start = time.time()
c = a + b
print(time.time()-start)

0.16399574279785156


In [3]:
# memory
a = [i for i in range(10000000)]
import sys

sys.getsizeof(a)

89095160

In [4]:
a = np.arange(10000000,dtype=np.int8)
sys.getsizeof(a)

10000112

### Advanced Indexing

In [8]:
# Normal Indexing and slicing

a = np.arange(12).reshape(4,3)
a

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [9]:
a[1,2]

5

In [12]:
a[1:3,1:3]

array([[4, 5],
       [7, 8]])

In [13]:
# Fancy Indexing
a[[0,2,3]]

array([[ 0,  1,  2],
       [ 6,  7,  8],
       [ 9, 10, 11]])

In [14]:
a1 = np.arange(24).reshape(6,4)
a1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [15]:
a1[[0,2,3,5]]

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [20, 21, 22, 23]])

In [17]:
a1[:,[0,2,3]]       # finding 1 2 and 3rd column

array([[ 0,  2,  3],
       [ 4,  6,  7],
       [ 8, 10, 11],
       [12, 14, 15],
       [16, 18, 19],
       [20, 22, 23]])

In [20]:
# Boolean Indexing
a = np.random.randint(1,100,24).reshape(6,4)
a

array([[48, 75, 32, 53],
       [21, 65, 47, 57],
       [57, 45, 68, 71],
       [60, 38,  8, 19],
       [99, 90, 53, 38],
       [ 2, 71,  1, 65]])

In [21]:
# find all numbers greater than 50
a[a > 50]

array([75, 53, 65, 57, 57, 68, 71, 60, 99, 90, 53, 71, 65])

In [22]:
# find out even numbers
a[a % 2 == 0]

array([48, 32, 68, 60, 38,  8, 90, 38,  2])

In [23]:
# find all numbers greater than 50 and are even

a[(a > 50) & (a % 2 == 0)]

array([68, 60, 90])

In [24]:
# find all numbers not divisible by 7
a[~(a % 7 == 0)]

array([48, 75, 32, 53, 65, 47, 57, 57, 45, 68, 71, 60, 38,  8, 19, 99, 90,
       53, 38,  2, 71,  1, 65])

### Broadcasting
The term broadcasting describes how NumPy treats arrays with different shapes during arithmetic operations.

The smaller array is “broadcast” across the larger array so that they have compatible shapes.

In [25]:
# same shape
a = np.arange(6).reshape(2,3)
b = np.arange(6,12).reshape(2,3)

print(a)
print(b)

print(a+b)

[[0 1 2]
 [3 4 5]]
[[ 6  7  8]
 [ 9 10 11]]
[[ 6  8 10]
 [12 14 16]]


In [26]:
# diff shape
a = np.arange(6).reshape(2,3)
b = np.arange(3).reshape(1,3)

print(a)
print(b)

print(a+b)

[[0 1 2]
 [3 4 5]]
[[0 1 2]]
[[0 2 4]
 [3 5 7]]


### Broadcasting Rules

1. Make the two arrays have the same number of dimensions.
* If the numbers of dimensions of the two arrays are different, add new dimensions with size 1 to the head of the array with the smaller dimension.

2. Make each dimension of the two arrays the same size.

* If the sizes of each dimension of the two arrays do not match, dimensions with size 1 are stretched to the size of the other array.
* If there is a dimension whose size is not 1 in either of the two arrays, it cannot be broadcasted, and an error is raised.

<img src = "https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png">

In [27]:
# More examples

a = np.arange(12).reshape(4,3)
b = np.arange(3)

print(a)
print(b)

print(a+b)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]
[0 1 2]
[[ 0  2  4]
 [ 3  5  7]
 [ 6  8 10]
 [ 9 11 13]]


In [28]:
a = np.arange(12).reshape(3,4)
b = np.arange(3)

print(a)
print(b)

print(a+b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[0 1 2]


ValueError: operands could not be broadcast together with shapes (3,4) (3,) 

In [29]:
a = np.arange(3).reshape(1,3)
b = np.arange(3).reshape(3,1)

print(a)
print(b)

print(a+b)

[[0 1 2]]
[[0]
 [1]
 [2]]
[[0 1 2]
 [1 2 3]
 [2 3 4]]


In [30]:
a = np.arange(3).reshape(1,3)
b = np.arange(4).reshape(4,1)

print(a)
print(b)

print(a + b)

[[0 1 2]]
[[0]
 [1]
 [2]
 [3]]
[[0 1 2]
 [1 2 3]
 [2 3 4]
 [3 4 5]]


In [31]:
a = np.array([1])
# shape -> (1,1)
b = np.arange(4).reshape(2,2)
# shape -> (2,2)

print(a)
print(b)

print(a+b)

[1]
[[0 1]
 [2 3]]
[[1 2]
 [3 4]]


In [32]:
a = np.arange(12).reshape(3,4)
b = np.arange(12).reshape(4,3)

print(a)
print(b)

print(a+b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


ValueError: operands could not be broadcast together with shapes (3,4) (4,3) 

In [33]:
a = np.arange(16).reshape(4,4)
b = np.arange(4).reshape(2,2)

print(a)
print(b)

print(a+b)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[0 1]
 [2 3]]


ValueError: operands could not be broadcast together with shapes (4,4) (2,2) 

### Working with mathematical formulas

In [35]:
a = np.arange(10)
print(a)
np.sin(a)

[0 1 2 3 4 5 6 7 8 9]


array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849])

In [37]:
# sigmoid
def sigmoid(array):
  return 1/(1 + np.exp(-(array)))


a = np.arange(100)

sigmoid(a)

array([5.00000000e-01, 2.68941421e-01, 1.19202922e-01, 4.74258732e-02,
       1.79862100e-02, 6.69285092e-03, 2.47262316e-03, 9.11051194e-04,
       3.35350130e-04, 1.23394576e-04, 4.53978687e-05, 1.67014218e-05,
       6.14417460e-06, 2.26032430e-06, 8.31528028e-07, 3.05902227e-07,
       1.12535162e-07, 4.13993755e-08, 1.52299795e-08, 5.60279641e-09,
       2.06115362e-09, 7.58256042e-10, 2.78946809e-10, 1.02618796e-10,
       3.77513454e-11, 1.38879439e-11, 5.10908903e-12, 1.87952882e-12,
       6.91440011e-13, 2.54366565e-13, 9.35762297e-14, 3.44247711e-14,
       1.26641655e-14, 4.65888615e-15, 1.71390843e-15, 6.30511676e-16,
       2.31952283e-16, 8.53304763e-17, 3.13913279e-17, 1.15482242e-17,
       4.24835426e-18, 1.56288219e-18, 5.74952226e-19, 2.11513104e-19,
       7.78113224e-20, 2.86251858e-20, 1.05306174e-20, 3.87399763e-21,
       1.42516408e-21, 5.24288566e-22, 1.92874985e-22, 7.09547416e-23,
       2.61027907e-23, 9.60268005e-24, 3.53262857e-24, 1.29958143e-24,
      

In [38]:
# mean squared error

actual = np.random.randint(1,50,25)
predicted = np.random.randint(1,50,25)
print(actual)
print(predicted)

[43 30 30 18 23 40 34 38  2 11 46 20 33 10 21 42 49 23 19 33 23  3  7 15
 37]
[ 1 31 17 48 37 32 38  1 32 26 16 29 27 46  4 17 41 41 25 34 20 28  7 11
 47]


In [39]:
def mse(actual,predicted):
  return np.mean((actual - predicted)**2)

mse(actual,predicted)

400.24

### Working with missing values

In [40]:
# Working with missing values -> np.nan
a = np.array([1,2,3,4,np.nan,6])
a

array([ 1.,  2.,  3.,  4., nan,  6.])

In [41]:
a[~np.isnan(a)]

array([1., 2., 3., 4., 6.])