In [1]:
import numpy as np

In [2]:
my_arr = np.arange(1000000)

my_list = list(range(1000000))

In [3]:
# Multiply each sequence by 2

In [4]:
%time for _ in range(10): my_arr2 = my_arr * 2

CPU times: user 31.4 ms, sys: 7.67 ms, total: 39 ms
Wall time: 41.2 ms


In [5]:

%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

CPU times: user 772 ms, sys: 198 ms, total: 970 ms
Wall time: 976 ms


In [6]:
# Convert a list to an array

data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)

arr1

array([6. , 7.5, 8. , 0. , 1. ])

In [10]:
# create array with zeroes, ones, empty

np.zeros(10)
np.zeros((3, 6))
np.ones(10)
np.ones((3, 6))

np.empty((2, 3, 2))

eye = np.eye(3)

iden = np.identity(3)


array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [16]:
# create array with range
arr = np.arange(15)

# create array with shape
arr = np.arange(15).reshape((3, 5))

# create array w dtype
arr = np.array([1, 2, 3], dtype=np.float64)

# convert dtype to another
arr = np.array([1, 2, 3], dtype=np.float64)
arr.dtype = np.int64

arr1 = arr.astype(np.float64)


array([4.60718242e+18, 4.61168602e+18, 4.61393782e+18])

In [None]:
# arithmetical operations w arrays

arr = np.array([[1., 2., 3.], [4., 5., 6.]])
arr * arr # elementwise multiplication
arr - arr # elementwise subtraction
1 / arr # elementwise division
arr ** 0.5 # elementwise square root

In [None]:
# Indexing and slicing
arr = np.arange(10)
arr[5] # show 5 index
arr[5:8] # show 5 to 8 index
arr[5:8] = 12 # assign 12 to 5 to 8 index

# working w 2-dimension array
arr2d = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
arr2d[2] # show 2nd row
arr2d[0][2] # show 0th row 2nd column

# working w 3-dimension array
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr3d[0] # show 0th row
arr3d[0][1] # show 0th row 1st column
arr3d[0][1][2] # show 0th row 1st column 2nd element

# assign value to 2-dimension array
arr2d[0][2] = 5


In [None]:
# Bool indexing
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
data = np.random.randn(7, 4)

data[names == 'Bob']
data[names == 'Bob', 2:]
data[names == 'Bob', 3]

mask = (names == 'Bob') | (names == 'Will')
data[mask]

# opposite of mask
data[~mask]


In [None]:
# Fancy indexing

arr = np.empty((8, 4))

for i in range(8):
    arr[i] = i

arr[[4, 3, 0, 6]]


In [None]:
# Transposing arrays and swapping axes

arr = np.arange(15).reshape((3, 5))

arr.T

arr = np.random.randn(6, 3)

np.dot(arr.T, arr)

arr = np.arange(16).reshape((2, 2, 4))
arr.transpose((1, 0, 2))

arr.swapaxes(1, 2)


In [None]:
# Universal functions: fast element-wise array functions

arr = np.arange(10)

np.sqrt(arr) # square root
np.exp(arr) # exponential

x = np.random.randn(8)
y = np.random.randn(8)
result = np.maximum(x, y) # element-wise maximum

arr = np.random.randn(7) * 5
remainder, whole_part = np.modf(arr) # return fractional and integral parts

np.abs(arr) # absolute value
np.fabs(arr) # absolute value (faster)
np.square(arr) # square
np.log(arr) # natural logarithm
np.log10(arr) # base 10 logarithm
np.sign(arr) # sign
np.ceil(arr) # ceiling
np.floor(arr) # floor
np.rint(arr) # round to nearest integer
np.isnan(arr) # check if NaN
np.isfinite(arr) # check if finite
np.modf(arr) # return fractional and integral parts
np.cos, np.cosh, np.sin, np.sinh, np.tan, np.tanh # trigonometric functions
np.arccos, np.arccosh, np.arcsin, np.arcsinh, np.arctan, np.arctanh # inverse trigonometric functions

# Binary universal functions

np.add(x, y) # addition
np.subtract(x, y) # subtraction
np.multiply(x, y) # multiplication
np.divide(x, y) # division
np.floor_divide(x, y) # floor division
np.power(x, y) # power
np.maximum(x, y) # element-wise maximum
np.minimum(x, y) # element-wise minimum
np.mod(x, y) # modulo
np.copysign(x, y) # copy sign of y to x
np.greater, np.greater_equal, np.less, np.less_equal, np.equal, np.not_equal # comparison functions
np.logical_and, np.logical_or, np.logical_xor # logical functions
np.logical_not(x) # logical not


In [None]:
# Data processing using arrays
import matplotlib.pyplot as plt
import numpy as np

points = np.arange(-5, 5, 0.01) # 1000 equally spaced points
xs, ys = np.meshgrid(points, points) # create a grid
z = np.sqrt(xs ** 2 + ys ** 2) # calculate the function

plt.imshow(z, cmap=plt.cm.gray); plt.colorbar()
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
plt.show()

In [None]:
# Expressing conditional logic as array operations

xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5]) # values
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5]) # values
cond = np.array([True, False, True, True, False]) # condition   

result = [(x if c else y) for x, y, c in zip(xarr, yarr, cond)] # using list comprehension
result = np.where(cond, xarr, yarr) # using np.where

arr = np.random.randn(4, 4)
np.where(arr > 0, 2, -2) # set positive values to 2 and negative values to -2


In [None]:
# Mathematical and statistical methods

arr = np.random.randn(5, 4) # normally-distributed data
arr.mean() # mean
np.mean(arr) # mean
arr.sum() # sum
arr.mean(axis=1) # mean by row
arr.sum(axis=0) # sum by column
arr.cumsum(axis=0) # cumulative sum by column
arr.cumprod(axis=1) # cumulative product by row
np.std(arr) # standard deviation
np.var(arr) # variance
np.min(arr) # minimum
np.max(arr) # maximum
np.argmin(arr) # index of minimum
np.argmax(arr) # index of maximum
np.median(arr) # median
np.percentile(arr, 75) # 75th percentile
np.any(arr > 0) # check if any value is true
np.all(arr > 0) # check if all values are true


In [None]:
# Sorting

arr = np.random.randn(6) # normally-distributed data 
arr.sort() # sort in-place

arr = np.random.randn(5, 3) # normally-distributed data
arr.sort(1) # sort by row

large_arr = np.random.randn(1000) # normally-distributed data
large_arr.sort()
large_arr[int(0.05 * len(large_arr))] # 5% quantile


In [None]:
# Unique and other set logic

names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']) # values
np.unique(names) # unique values

ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4]) # values
np.unique(ints) # unique values

values = np.array([6, 0, 0, 3, 2, 5, 6]) # values
np.in1d(values, [2, 3, 6]) # check if values are in another array

np.unique(values) # unique values
np.intersect1d(values, [2, 3, 6]) # intersection
np.union1d(values, [2, 3, 6]) # union
np.setdiff1d(values, [2, 3, 6]) # set difference
np.setxor1d(values, [2, 3, 6]) # set symmetric difference

In [None]:
# Linear algebra

x = np.array([[1., 2., 3.], [4., 5., 6.]]) # 2x3 matrix
y = np.array([[6., 23.], [-1, 7], [8, 9]]) # 3x2 matrix
x.dot(y) # matrix multiplication
np.dot(x, y) # matrix multiplication

np.dot(x, np.ones(3)) # matrix-vector multiplication
np.ones(3) @ x # matrix-vector multiplication

from numpy.linalg import inv, qr

X = np.random.randn(5, 5) # 5x5 matrix
mat = X.T.dot(X) # matrix multiplication
inv(mat) # inverse
mat.dot(inv(mat)) # identity matrix

q, r = qr(mat) # QR decomposition
r

np.diag(mat) # diagonal
np.trace(mat) # trace
np.linalg.det(mat) # determinant
np.linalg.eig(mat) # eigenvalues and eigenvectors
np.linalg.svd(mat) # singular value decomposition
np.linalg.solve(mat, np.eye(5)) # Ax = b for x
np.linalg.lstsq(mat, np.eye(5)) # least squares solution to Ax = b

np.dot(mat, np.linalg.inv(mat)) # identity matrix
np.trace(np.dot(mat, np.linalg.inv(mat))) # trace of identity matrix

In [None]:
# Pseudorandom number generation

samples = np.random.normal(size=(4, 4)) # 4x4 matrix of samples
samples

from random import normalvariate

N = 1000000
%timeit samples = [normalvariate(0, 1) for _ in range(N)] # using Python's random module
%timeit np.random.normal(size=N) # using NumPy's random module
from datetime import datetime

np.random.seed(datetime.now().timestamp()) # set seed
np.random.rand(10) # 10 random numbers

np.seed(1234) # set seed
np.random.rand(10) # 10 random numbers
np.permutation(10) # random permutation of 10 numbers
np.shuffle(np.arange(10)) # shuffle 10 numbers
np.rand(10) # 10 random numbers
np.randint(0, 10, 10) # 10 random integers between 0 and 10
np.randn(10) # 10 random normal numbers
np.binomial(10, 0.5, 10) # 10 random binomial numbers
np.normal(0, 1, 10) # 10 random normal numbers
np.beta(1, 1, 10) # 10 random beta numbers
np.chisquare(1, 10) # 10 random chi-square numbers
np.gamma(1, 10) # 10 random gamma numbers
np.uniform(0, 1, 10) # 10 random uniform numbers

In [None]:
# Example: Random Walks

import random
import matplotlib.pyplot as plt

position = 0
walk = [position]
steps = 1000

for i in range(steps):
    step = 1 if random.randint(0, 1) else -1
    position += step
    walk.append(position)
    
plt.plot(walk[:100])
plt.show()

nsteps = 1000
draws = np.random.randint(0, 2, size=nsteps)
steps = np.where(draws > 0, 1, -1)
walk = steps.cumsum()

plt.plot(walk[:100])
plt.show()

# find first value that exceeds 10 or -10
(np.abs(walk) >= 10).argmax()

In [13]:
# Simulating many random walks at once

nwalks = 5000
nsteps = 1000
draws = np.random.randint(0, 2, size=(nwalks, nsteps)) # create 5000x1000 matrix
steps = np.where(draws > 0, 1, -1) # convert 0 to -1

walks = steps.cumsum(1) # cumulative sum by row
walks.max() # maximum
walks.min() # minimum

hits30 = (np.abs(walks) >= 30).any(1) # check if any walk hits 30
hits30.sum() # number of walks that hit 30

crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1) # crossing times
crossing_times.mean() # average crossing time

steps = np.random.normal(loc=0, scale=0.25, size=(nwalks, nsteps)) # create 5000x1000 matrix
walks = steps.cumsum(1) # cumulative sum by row
walks.max() # maximum
walks.min() # minimum


-33.11799970046636