In [1]:
import numpy as np


### Array vs List


In [49]:
x = list(range(10))
xarray = np.array(x, dtype='int64')
print(xarray.ndim)

1


In [46]:
mat = np.array([[1, 2, 3],[10 ,20 ,30],[.1, .2, .3]])
vect = np.full((1, 5), 5)
res = mat*mat
print(res.astype('int64'))

[[  1   4   9]
 [100 400 900]
 [  0   0   0]]


In [58]:
dotp = np.sum(vect*vect)
print(dotp)
print(vect/5)

125
[[1. 1. 1. 1. 1.]]


### Dot Product

In [None]:
x, y = np.full((1, 3), 5), np.full((1, 3), 3)
dot_res = np.sum(x*y)
print(f'first way: {np.sum(x*y)}')
dot_res = x.dot(y.T).flatten()
print(f'second way: {x.dot(y.T).flatten()}')
print(f'new way: {x @ y.T}')

### Vector product

In [None]:
x, y = np.full((2, 4), 8), np.full((4, 3), 2)
print(x, '\n', y, '\n')
print(np.matmul(x, y))

In [61]:
x = [[1, 2, 3], [4, 5, 6], [1, 10, 100]]
a = np.array(x, dtype='float64')
print(a)
# determinant
c = np.linalg.det(a)
print(c)

# inverse
c = np.linalg.inv(a)
print(c)

# diag
c = np.diag(a)
print(c)

# diag on a vector returns diagonal matrix (overloaded function)
c = np.diag([1, 4, 8])
print(c)

[[  1.   2.   3.]
 [  4.   5.   6.]
 [  1.  10. 100.]]
-243.00000000000017
[[-1.81069959  0.69958848  0.01234568]
 [ 1.62139918 -0.39917695 -0.02469136]
 [-0.14403292  0.03292181  0.01234568]]
[  1.   5. 100.]
[[1 0 0]
 [0 4 0]
 [0 0 8]]


### Speed Test array vs list

In [None]:
from timeit import default_timer as timer

a = np.random.randn(1000)
b = np.random.randn(1000)

A = list(a)
B = list(b)

T = 1000

def dot1():
    dot = 0
    for i in range(len(A)):
        dot += A[i]*B[i]
    return dot

def dot2():
    return np.dot(a,b)

start = timer()
for t in range(T):
    dot1()
end = timer()
print(end-start)

start = timer()
for t in range(T):
    dot2()
end = timer()
print(end-start)

###  Indexing/Slicing/Boolean Indexing

In [113]:
# print(a)
# # indexing
# print(a[1:, :-1])

# boolean indexing
# print(a[a[: , 1] < 6])
X = np.random.choice(10000, size=2000, replace=False)
# print(X)
print(np.argwhere(X < 5000).flatten())


# # fancy indexing
# print(a[[1, 2], [0, 1]])

[   1    3    4 ... 1993 1997 1999]


### Reshaping

In [125]:
# reshape
a = np.arange(1, 65)
print('original\n', a)

# b = a.reshape((8, 8)) # error if shape cannot be used
# print('8 x 8\n', b)

# c = a.reshape((2, 32)) # 3 rows, 2 columns
# print('2 x 32\n', c)

# newaxis is used to create a new axis in the data
# needed when model require the data to be shaped in a certain manner
print(a.shape)
d = a[:, np.newaxis]
print(d)
print(d.shape)

e = a[:, np.newaxis]
print(e)
print(e.shape)

original
 [ 1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64]
(64,)
[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [19]
 [20]
 [21]
 [22]
 [23]
 [24]
 [25]
 [26]
 [27]
 [28]
 [29]
 [30]
 [31]
 [32]
 [33]
 [34]
 [35]
 [36]
 [37]
 [38]
 [39]
 [40]
 [41]
 [42]
 [43]
 [44]
 [45]
 [46]
 [47]
 [48]
 [49]
 [50]
 [51]
 [52]
 [53]
 [54]
 [55]
 [56]
 [57]
 [58]
 [59]
 [60]
 [61]
 [62]
 [63]
 [64]]
(64, 1)
[[ 1]
 [ 2]
 [ 3]
 [ 4]
 [ 5]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [19]
 [20]
 [21]
 [22]
 [23]
 [24]
 [25]
 [26]
 [27]
 [28]
 [29]
 [30]
 [31]
 [32]
 [33]
 [34]
 [35]
 [36]
 [37]
 [38]
 [39]
 [40]
 [41]
 [42]
 [43]
 [44]
 [45]
 [46]
 [47]
 [48]
 [49]
 [50]
 [51]
 [52]
 [53]
 [54]
 [55]
 [56]
 [57]
 [58]
 [59]
 [60]
 [61]
 [62]
 [63]
 [64]]
(64, 1)


In [80]:
a[1, :]

array([3, 4])

### Concatenation

In [120]:
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
print('a \n', a)
print('b \n', b)

a 
 [[1 2]
 [3 4]]
b 
 [[5 6]]


In [128]:
# concatenation
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])
c = np.concatenate((a, b.T), axis=1) # axis=None 0 1
print(c)

# # hstack: Stack arrays in sequence horizontally. shape is tuple
# c = np.hstack((a,b.T))
# print(c)

# vstack: Stack arrays in sequence vertically. shape is tuple
# c = np.vstack((b,b))
# print(c)


[[1 2 5]
 [3 4 6]]


### Broadcasting

In [None]:
# broadcasting
# Broadcasting is a powerful mechanism that allows numpy to work with arrays of 
# different shapes when performing arithmetic operations.
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10, 11, 12]])
v = np.array([1, 0, -1])

print(x + v)  # Add v to each row of x using broadcasting
print(x * .1) 

### Statistical functions

In [None]:
features = np.array([25*np.ones((50,1)), 2016*np.ones((50,1))])
features = features.flatten()
features = features.reshape((2, -1)).T
features += np.random.randn(50,2)
features = np.array([(features[:,0]-np.mean(features[:,0]))/features[:,0].std(), 
                     (features[:,1]-np.mean(features[:,1]))/features[:,1].std()]).T
features.astype(dtype='float64')


In [None]:
from matplotlib import pyplot as plt
import numpy as np
# linspace
x = np.linspace(0, 9, 10)
print(x)
# Normally distributed random ndarray with values in [1, 3]
y = 2*(np.array([np.random.randn() for _ in range(100)]))+2
print(y.sum()) # min max sum mean std var 
rand_id = c = np.random.choice(100, size=10, replace=False)
print(rand_id)
plt.scatter(x, y[rand_id])

### Load data from file

In [None]:
# loading from csv
# 1) load with np.loadtxt()
# skiprows=1
data = np.loadtxt('data.txt', delimiter=",", dtype=np.int32)
print(data.shape, data.dtype)

# 2) load with np.genfromtxt()
# skip_header=0, missing_values="---", filling_values=0.0
data = np.genfromtxt('data.txt', delimiter=",", dtype=np.int32)
print(data)

### scikit-learn

In [None]:
from sklearn import datasets

iris = datasets.load_iris()
X, y = iris.data, iris.target
X.shape
