In [None]:
# Difference between list and np-array

# 1. Datatype Consistency - Python lists allow for storing different datatypes, while numpy arrays enforces datatype consistency
# 2. Numpy array operations are more efficient (faster) because of this datatype consistency
# 3. Performance - Python lists do not support vectorized operations, hence, numerical operations require explicit loops, making them even slower.
#  Numpy arrays are designed for numerical operations and support vectorized operations. These operations are implemented in C
# 4. Memory Efficiency - Python lists store the memory location and the datatype of each element in the list. These addresses subsequently point to the memory location of each element.
#  Numpy arrays are however stored as a contiguous set of data, making them more memory efficient.
# 5. Built-in functions - Numpy arrays have many optimised built-in functions

In [None]:
import time
import sys
import numpy as np

SPEED COMPARISON

In [None]:
a = [i for i in range(3000000)]
b = [i for i in range(1000000, 4000000)]

c = []
start = time.time()
for i in range(len(a)):
    c.append(a[i] + b[i])
print(c[:10])
end = time.time()
print(end - start)

In [None]:
a = np.arange(3000000)
b = np.arange(1000000, 4000000)


MEMORY COMPARISON

In [None]:
a = [i for i in range(5000000)]
print(sys.getsizeof(a))

In [None]:
a = np.arange(5000000)
print(sys.getsizeof(a))

a = np.arange(5000000, dtype=np.int32)
print(sys.getsizeof(a))

Advanced Indexing and Broadcasting

In [None]:
prices1 = np.array([
    [100.1, 110.3, 103.2, 105.2],
    [200.1, 210.3, 203.2, 205.2],
    [300.1, 310.3, 303.2, 305.2]
])

prices2 = np.array([
    [100.1, 110.3, 103.2, 105.2],
    [200.1, 210.3, 203.2, 205.2],
    [300.1, 310.3, 303.2, 305.2],
    [400.1, 410.0, 403.2, 405.2]
])

In [None]:
# Normal slicing
print(prices1[:2,::2])

# Fancy Slicing
print(prices1[:2, [0,2]])

# Normal Slicing
print(prices2[::3,1:3])

print(prices2[::3, [1,2]])
print(prices2[[0,3], 1:3])

In [None]:
# Boolean slicing
print(prices2[prices2 > 103])

print(prices2[prices2 % 2 == 0])

print(prices2[(prices2 % 2 == 0) & (prices2 > 200)])

# BROADCASTING

In [None]:
# This deals with how numpy treats arrays of different shapes during arithmetic operations
# The smaller array is broadcast across the larger array to ensure shape compatibility

In [None]:
prices1 = np.array([
    [100.1, 110.3, 103.2, 105.2],
    [200.1, 210.3, 203.2, 205.2],
    [300.1, 310.3, 303.2, 305.2]
])

prices2 = np.array([
    [100.1, 110.3, 103.2, 105.2],
    [200.1, 210.3, 203.2, 205.2],
    [300.1, 310.3, 303.2, 305.2],
    [400.1, 410.0, 403.2, 405.2]
])

prices3 = np.array([
    [100.1, 110.3, 103.2, 105.2],
    [200.1, 210.3, 203.2, 205.2],
    [300.1, 310.3, 303.2, 305.2]
])

prices4 = np.array([
    [100.1, 110.3, 103.2, 105.2]
])

prices6 = np.array(
    [100.1, 110.3, 103.2, 105.2]
)

prices5 = np.array([
    [100.1],
    [200.1],
    [300.1],
    [400.1]
])

In [None]:
# Same shape
print(np.shape(prices1))
print(np.shape(prices3))

# Add element by element
print(prices1 + prices3)

In [None]:
# Different shape
print(np.shape(prices1))
print(np.shape(prices2))

# Add element by element
# print(prices1 + prices2) # Value error due to shape incompatibility

In [None]:
# Rules

# 1. Make both to have the same number of dimensions
# 2. Make each dimension of the 2 arrays have the same size
# 3. If the dimension of either of the arrays is not 1, the operation cannot be carried out
print(prices2 + prices4)
print(prices2 + prices5)
print(prices5 + prices4)
print(prices2 + prices6)

In [None]:
# Sigmod

print(1/(1 + np.exp(-(prices1))))

# Mean Square Error
def mse(actual, corrected):
    return np.mean((actual - corrected)**2)

print(mse(prices2, prices2))

In [None]:
# Working with Missing Values

prin