### Numpy array vs Python lists

In [1]:
# speed
# List
a = [i for i in range(10000000)]
b = [i for i in range(10000000, 20000000)]

c = []

import time

start = time.time()
for i in range(len(a)):
    c.append(a[i] + b[i])

time_taken_list = time.time() - start
print("Time taken: ", time_taken_list, ' sec')

Time taken:  1.421417474746704  sec


In [2]:
# Now the same in numpy
import numpy as np

a = np.arange(10000000)
b = np.arange(10000000, 20000000)

start = time.time()
c = a + b

time_taken_numpy = time.time() - start
print("Time taken: ", time_taken_numpy, ' sec')

Time taken:  0.11529088020324707  sec


In [3]:
print(time_taken_list / time_taken_numpy)

12.328967150227994


#### This is because numpy uses c type array, i.e. static array, and it is not a referential array
#### While list is a dynamic array. And it is a referential array

In [4]:
# memory
a = [i for i in range(10000000)]

import sys

list_size = sys.getsizeof(a)
print(list_size, ' bytes')

89095160  bytes


In [5]:
a = np.arange(10000000)

numpy_size = sys.getsizeof(a)
print(numpy_size, ' bytes')

print('\nDividing sizes list/numpy: ', list_size / numpy_size)

40000112  bytes

Dividing sizes list/numpy:  2.2273727633562626


In [6]:
a = np.arange(10000000, dtype=np.int8)

numpy_size_int8 = sys.getsizeof(a)
print(numpy_size_int8, ' bytes')

print('\nDividing sizes list/numpy: ', list_size / numpy_size_int8)

10000112  bytes

Dividing sizes list/numpy:  8.909416214538398


In [7]:
# If someone asks why numpy is better than list
# less memory
# less time to execute
# convenient

### Advanced Indexing

In [8]:
# Normal indexing and slicing 
a = np.arange(12).reshape(4, 3)
print(a)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [9]:
print(a[1:3, ::2])

[[3 5]
 [6 8]]


In [10]:
# Fancy indexing
print(a)

[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


In [11]:
print(a[::2])

[[0 1 2]
 [6 7 8]]


In [12]:
# what if we want 1, 3, 4th rows
print(a[[0, 2, 3]])

[[ 0  1  2]
 [ 6  7  8]
 [ 9 10 11]]


In [13]:
a = np.arange(24).reshape(6, 4)
print(a)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]]


In [14]:
# 1, 3, 4, 6 rows
print(a[[0, 2, 3, 5]])

[[ 0  1  2  3]
 [ 8  9 10 11]
 [12 13 14 15]
 [20 21 22 23]]


In [15]:
# 1, 3, 4 columns
print(a[:, [0, 2, 3]])

[[ 0  2  3]
 [ 4  6  7]
 [ 8 10 11]
 [12 14 15]
 [16 18 19]
 [20 22 23]]


In [16]:
# boolean indexing
a = np.random.randint(1, 100, 24).reshape(6, 4)
print(a)

[[50 47 51 74]
 [ 1 21 60 37]
 [35 61 26 25]
 [87 32 70 76]
 [25 33 58 25]
 [99 75 62 43]]


In [17]:
# find all numbers greater than 50
print(a > 50)

[[False False  True  True]
 [False False  True False]
 [False  True False False]
 [ True False  True  True]
 [False False  True False]
 [ True  True  True False]]


In [18]:
print(a[a > 50])

[51 74 60 61 87 70 76 58 99 75 62]


In [19]:
# find out even numbers
print(a & 2 == 0)

[[False False False False]
 [ True  True  True  True]
 [False  True False  True]
 [False  True False  True]
 [ True  True False  True]
 [False False False False]]


In [20]:
print(a[a % 2 == 0])

[50 74 60 26 32 70 76 58 62]


In [21]:
type(a[a % 2 == 0])

numpy.ndarray

In [22]:
# find all numbers greater than 50 and are even
# we use &, which is bitwise operator, because we are dealing with boolean values True and False 
# Rather than 'and', which is a logical AND 

print((a > 50) & (a % 2 == 0))

[[False False False  True]
 [False False  True False]
 [False False False False]
 [False False  True  True]
 [False False  True False]
 [False False  True False]]


In [23]:
print(a[(a > 50) & (a % 2 == 0)])

[74 60 70 76 58 62]


In [24]:
print(a[(a > 50) and (a % 2 == 0)])
# NOT WORKING

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
# find all numbers not divisible by 7
print(a[~(a % 7 == 0)])
print(a[a % 7 != 0])
# Both are same

### Broadcasting

The term broadcasting describes how NumPy treats arrays with different shapes during arithmetic operations.

The smaller array is “broadcast” across the larger array so that they have compatible shapes.

In [None]:
# same shape
a = np.arange(6).reshape(2, 3)
b = np.arange(6, 12).reshape(2, 3)
print(a)
print()
print(b)
print()
print(a + b)
# Item wise addition

In [None]:
a = np.arange(6).reshape(2, 3)
b = np.arange(3).reshape(1, 3)

print(a)
print()
print(b)
print()
print(a + b)
# This issue of improper shape is solved by broadcasting

#### Broadcasting Rules

**1. Make the two arrays have the same number of dimensions.**<br>
- If the numbers of dimensions of the two arrays are different, add new dimensions with size 1 to the head of the array with the smaller dimension.<br>

**2. Make each dimension of the two arrays the same size.**<br>
- If the sizes of each dimension of the two arrays do not match, dimensions with size 1 are stretched to the size of the other array.
- If there is a dimension whose size is not 1 in either of the two arrays, it cannot be broadcasted, and an error is raised.

![Broadcasting Example](https://jakevdp.github.io/PythonDataScienceHandbook/figures/02.05-broadcasting.png)

In [None]:
# More examples
a = np.arange(12).reshape(4, 3)
b = np.arange(3)

print(a)
print()
print(b)
print()
print(a + b)

In [None]:
a = np.arange(12).reshape(3, 4)
b = np.arange(3)

print(a)
print()
print(b)
print()
print(a + b)

In [None]:
a = np.arange(3).reshape(1, 3)
b = np.arange(3).reshape(3, 1)

print(a)
print()
print(b)
print()
print(a + b)

In [None]:
a = np.arange(3).reshape(1, 3)
b = np.arange(4).reshape(4, 1)

print(a)
print()
print(b)
print()
print(a + b)

In [None]:
a = np.array([1])
# shape --> (1, 1)
b = np.arange(4).reshape(2, 2)
# shape --> (2, 2)

print(a)
print()
print(b)
print()
print(a + b)

In [None]:
a = np.arange(12).reshape(3, 4)
b = np.arange(12).reshape(4, 3)

print(a)
print()
print(b)
print()
print(a + b)

In [None]:
a = np.arange(16).reshape(4, 4)
b = np.arange(4).reshape(2, 2)

print(a)
print()
print(b)
print()
print(a + b)

### Working with mathematical formulas

In [None]:
a = np.arange(10)
print(a, '\n')
np.sum(a)

In [None]:
print(np.sin(a))
# These are the most common ones, that is why they are available as built in

In [None]:
# sigmoid from logistic regression
def sigmoid(array):
    return 1 / (1 + np.exp(-array))


a = np.arange(10)
sigmoid(a)

In [None]:
# mean squared error
actual = np.random.randint(1, 50, 25)
predicted = np.random.randint(1, 50, 25)

print(actual)
print(predicted)

In [None]:
actual - predicted

In [None]:
def mse(actual, predicted):
    return np.mean((actual - predicted) ** 2)


mse(actual, predicted)

##### binary cross entropy - logistic regression


![Binary cross entropy](https://cdn.analyticsvidhya.com/wp-content/uploads/2021/03/Screenshot-from-2021-03-03-11-33-29.png)

In [None]:

def bin_cross_entro(actual, predicted):
    predicted = np.clip(predicted, 1e-15, 1 - 1e-15)

    return np.mean(-(actual * np.log(predicted) + (1 - actual) * np.log(1 - predicted)))


bin_cross_entro(actual, predicted)

### Working with missing values

In [None]:
# working with missing values --> np.nan
a = np.array([1, 2, 3, 4, np.nan, 6])
print(a)

In [None]:
## boolean indexing
np.isnan(a)

In [None]:
a[~(np.isnan(a))]

### Plotting Graphs

In [None]:
# plotting a 2D plot
# x = y
# visit desmos.com

x = np.linspace(-10, 10, 100)
print(x)
y = x

In [None]:
print('x shape: ', x.shape, '\ny shape: ', y.shape)

In [None]:
import matplotlib.pyplot as plt

plt.plot(x, y)

In [None]:
# y = x^2 - parabola
x = np.linspace(-10, 10, 100)
y = x ** 2

plt.plot(x, y)

In [None]:
# y = sin(x)

x = np.linspace(-10, 10, 200)
y = np.sin(x)

plt.plot(x, y)


In [None]:
# y = xlog(x)
x = np.linspace(-10, 10, 100)
y = x * np.log(x)

plt.plot(x, y)

In [None]:
x = np.linspace(-10, 10, 100)
y = sigmoid(x)
# or
# y = 1/(1 + np.exp(-x))

plt.plot(x, y)

In [None]:
# plotting a scatter plot

### Meshgrids

In [None]:
# Meshgrids