In [1]:
import numpy as np


### Array vs List


In [None]:
x = [[1, 2, 3], [4, 5, 6], [1, 10, 100]]
a = np.array(x, dtype='float64')
print(a)
print(a.shape) # shape dtype ndim size itemsize
print(a[1, 2])

In [None]:
y = 2*x
b = a*2
print(f'list: {y}')
print(f'np array: \n{b}')

In [None]:
# z = x*y
c = a*b
print(c)

### Dot Product

In [None]:
x, y = np.full((1, 3), 5), np.full((1, 3), 3)
dot_res = np.sum(x*y)
print(f'first way: {np.sum(x*y)}')
dot_res = x.dot(y.T).flatten()
print(f'second way: {x.dot(y.T).flatten()}')
print(f'new way: {x @ y.T}')

### Vector product

In [None]:
x, y = np.full((2, 4), 8), np.full((4, 2), 2)
print(np.matmul(x, y))

In [None]:
x = [[1, 2, 3], [4, 5, 6], [1, 10, 100]]
a = np.array(x, dtype='float64')
# determinant
c = np.linalg.det(a)
print(c)

# inverse
c = np.linalg.inv(a)
print(c)

# diag
c = np.diag(a)
print(c)

# diag on a vector returns diagonal matrix (overloaded function)
c = np.diag([1, 4, 8])
print(c)

### Speed Test array vs list

In [None]:
from timeit import default_timer as timer

a = np.random.randn(1000)
b = np.random.randn(1000)

A = list(a)
B = list(b)

T = 1000

def dot1():
    dot = 0
    for i in range(len(A)):
        dot += A[i]*B[i]
    return dot

def dot2():
    return np.dot(a,b)

start = timer()
for t in range(T):
    dot1()
end = timer()
print(end-start)

start = timer()
for t in range(T):
    dot2()
end = timer()
print(end-start)

###  Indexing/Slicing/Boolean Indexing

In [8]:
nums = [1, 2, 3, 4]
output = np.ones(len(nums), dtype='int32')
for id, n in enumerate(nums):
    if id == 0:
        output[1:] = n*output[1:]
    elif id == len(nums)-1:
        output[:-1] = n*output[:-1]
    else:
        output[:id] = n*output[:id]
        output[id+1:] = n*output[id+1:]
list(output)

[24, 12, 8, 6]

In [None]:
print(a)
# indexing
print(a[1:, :-1])

# boolean indexing
print(a[:, 1] < 6)
print(np.argwhere(a[:, 1] < 6).flatten())


# fancy indexing
print(a[[1, 2], [0, 1]])

### Reshaping

In [None]:
# reshape
a = np.arange(1, 65)
print('original\n', a)

b = a.reshape((8, -1)) # error if shape cannot be used
print(f'8 x {b.shape[1]}\n', b)

c = a.reshape((2, 32)) # 3 rows, 2 columns
print('2 x 32\n', c)

# newaxis is used to create a new axis in the data
# needed when model require the data to be shaped in a certain manner
print(a.shape)
d = a[np.newaxis, :]
print(d)
print(d.shape)

e = a[:, np.newaxis]
print(e)
print(e.shape)

In [80]:
a[1, :]

array([3, 4])

### Concatenation

In [None]:
# concatenation
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
c = np.concatenate((a, b), axis=None) # axis=None 0 1
print(c)

# hstack: Stack arrays in sequence horizontally. shape is tuple
c = np.hstack((a,b.T))
print(c)

# vstack: Stack arrays in sequence vertically. shape is tuple
c = np.vstack((b,b))
print(c)


### Broadcasting

In [None]:
# broadcasting
# Broadcasting is a powerful mechanism that allows numpy to work with arrays of 
# different shapes when performing arithmetic operations.
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, -1])

print(x + v)  # Add v to each row of x using broadcasting
print(x * .1) 

### Statistical functions

In [None]:
features = np.array([25*np.ones((50,1)), 2016*np.ones((50,1))])
features = features.flatten()
features = features.reshape((2, -1)).T
features += np.random.randn(50,2)
features = np.array([(features[:,0]-np.mean(features[:,0]))/features[:,0].std(), 
                     (features[:,1]-np.mean(features[:,1]))/features[:,1].std()]).T
features.astype(dtype='float64')


In [None]:
from matplotlib import pyplot as plt
import numpy as np
# linspace
x = np.linspace(0, 9, 10)
print(x)
# Normally distributed random ndarray with values in [1, 3]
y = 2*(np.array([np.random.randn() for _ in range(100)]))+2
print(y.sum()) # min max sum mean std var 
rand_id = c = np.random.choice(100, size=10, replace=False)
print(rand_id)
plt.scatter(x, y[rand_id])

### Linear Algebra

In [23]:
X = np.array([[1,2], [2.3,4.1], [1.9, 3.8]])
cov = np.cov(X)
vals, vects = np.linalg.eig(cov)
vects


array([[-0.93413675,  0.35691531, -0.17672691],
       [ 0.24546659,  0.64244755, -0.66640352],
       [ 0.25910362,  0.67813908,  0.72434381]])

In [None]:
# eigenvalues
a = np.array([[1,2], [3,4]])
eigenvalues, eigenvectors = np.linalg.eig(a)

# Note: use eigh if your matrix is symmetric (faster)
print(eigenvalues)
print(eigenvectors) # column vectors
print(eigenvectors[:,0]) # column 0 corresponding to eigenvalue[0]

# verify: e-vec * e-val = A * e-vec
d = eigenvectors[:,0] * eigenvalues[0]
e = a @ eigenvectors[:, 0]
print(d, e)
print(d == e) # numerical issues

# correct way to compare matrix
print(np.allclose(d,e))

### Load data from file

In [None]:
# loading from csv
# 1) load with np.loadtxt()
# skiprows=1
data = np.loadtxt('data.txt', delimiter=",", dtype=np.int32)
print(data.shape, data.dtype)

# 2) load with np.genfromtxt()
# skip_header=0, missing_values="---", filling_values=0.0
data = np.genfromtxt('data.txt', delimiter=",", dtype=np.int32, skip_header=True,
                     names=['first', 'second'])
print(data)


### NaN is a floating value != None & != other NaN values

In [5]:
np.nan == np.nan        # false
np.nan == None          # false
np.isnan(np.nan)        # true

True