In [65]:
import numpy as np
from numpy.random import randn
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1, dtype = np.float64)

data2 = [[1,2,3,4],[5,6,7,8]]
arr2 = np.array(data2)

data3_string = ['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe']
arr3 = np.array(data3_string)

arr4 = np.arange(32).reshape((8,4))
arr5 = randn(5)

zeros_arr = np.zeros((3,6))
ones_arr = np.ones((3,4))
empty_arr = np.empty((2,3,2))
range_arr = np.arange(15)
eye_arr = np.eye(3)
identity_arr = np.identity(3)

arr2.shape
arr2.ndim
arr1.dtype
print identity_arr

[[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]]


In [18]:
# convert or cast an array from one dtype to another using ndarray’s astype method
float_arr1 = arr2.astype(np.float64)
numeric_strings = np.array(['1.25', '9.6', '42'], dtype = np.string_)
float_arr2 = numeric_strings.astype(float_arr1.dtype)
print float_arr2
print float_arr2.dtype

[  1.25   9.6   42.  ]
float64


In [26]:
# Array vectorization Operations
print arr2 * arr2
print arr2 - arr2
print 1/float_arr1
print arr2 ** .5

[[ 1  4  9 16]
 [25 36 49 64]]
[[0 0 0 0]
 [0 0 0 0]]
[[ 1.          0.5         0.33333333  0.25      ]
 [ 0.2         0.16666667  0.14285714  0.125     ]]
[[ 1.          1.41421356  1.73205081  2.        ]
 [ 2.23606798  2.44948974  2.64575131  2.82842712]]


In [129]:
# Indexing and Slicing
# Array slices are views on the original array - any modifications to the view will be reflected in the source array
arr1_slice = arr1[2:4]
arr1_slice[:] = 123
print arr1
arr1_slice_copy = arr1[2:4].copy()    # a copy of a slice of an ndarray instead of a view
print arr2[0][2], arr2[0,2]
print arr2[:,:1]

data = randn(7,4)
print data
print data[(arr3 == 'Bob') | (arr3 == 'Will'), 2:]  # Selecting data from an array by boolean indexing always creates a copy of the data
data[data<-.5] = -.5
print data
# Facy indexing, always copies the data into a new array
print arr4[[1,5,7,2],[0,3,1,2]]
print arr4[[1,5,7,2]][:,[0,3,1,2]]

[   6.     7.5  123.   123.     1. ]
3 3
[[1]
 [5]]
[[-1.54027491  0.48956219 -0.04647866 -1.30865374]
 [ 0.97092838  0.16502575 -0.43966493  0.70601127]
 [ 0.12541253 -0.37191402 -1.29595168 -0.64166917]
 [-0.71507451 -1.54248968  0.40696114 -2.54185893]
 [ 1.12743354 -1.25926114 -0.49821217  1.29459096]
 [ 0.28415975  1.36648254 -0.4857136   0.07344752]
 [ 1.07384758 -0.71178372  0.61012353  0.78029749]]
[[-0.04647866 -1.30865374]
 [-1.29595168 -0.64166917]
 [ 0.40696114 -2.54185893]
 [-0.49821217  1.29459096]]
[[-0.5         0.48956219 -0.04647866 -0.5       ]
 [ 0.97092838  0.16502575 -0.43966493  0.70601127]
 [ 0.12541253 -0.37191402 -0.5        -0.5       ]
 [-0.5        -0.5         0.40696114 -0.5       ]
 [ 1.12743354 -0.5        -0.49821217  1.29459096]
 [ 0.28415975  1.36648254 -0.4857136   0.07344752]
 [ 1.07384758 -0.5         0.61012353  0.78029749]]
[ 4 23 29 10]
[[ 4  7  5  6]
 [20 23 21 22]
 [28 31 29 30]
 [ 8 11  9 10]]


In [62]:
# Transpose, return a view on the data without making a copy
print np.dot(arr2.T,arr)        #computing the inner matrix product XTX using np.dot
print arr2.transpose()
print arr2.swapaxes(0,1)

[[ 21.  27.  33.]
 [ 26.  34.  42.]
 [ 31.  41.  51.]
 [ 36.  48.  60.]]
[[1 5]
 [2 6]
 [3 7]
 [4 8]]
[[1 5]
 [2 6]
 [3 7]
 [4 8]]


In [68]:
# Universal Functions: Fast Element-wise Array Functions
print np.sqrt(arr1)
print np.exp(arr1)
print np.maximum(arr1, arr5)
print np.modf(arr1)    # returns the fractional and integral parts of a floating point array:


[ 2.44948974  2.73861279  2.82842712  0.          1.        ]
[  4.03428793e+02   1.80804241e+03   2.98095799e+03   1.00000000e+00
   2.71828183e+00]
[ 6.          7.5         8.          0.40254393  1.        ]
(array([ 0. ,  0.5,  0. ,  0. ,  0. ]), array([ 6.,  7.,  8.,  0.,  1.]))


In [82]:
points = np.arange(4)
xs, ys = np.meshgrid(points, points)  # The np.meshgrid function takes two 1D arrays and produces two 2D matrices corresponding to all pairs of (x, y) in the two arrays
print np.sqrt(xs**2+ys**2)

[[ 0.          1.          2.          3.        ]
 [ 1.          1.41421356  2.23606798  3.16227766]
 [ 2.          2.23606798  2.82842712  3.60555128]
 [ 3.          3.16227766  3.60555128  4.24264069]]


In [130]:
# Expressing Conditional Logic as Array Operations
result1 = np.where(arr3[:5]=='Bob', arr1, arr5)  # x if condition else y
result2 = np.where(data > 0, 1, -1)
result3 = np.where(data > 0, data, 0)
cond1 = (arr3 == 'Bob') | (arr3 == 'Joe')
cond2 = (arr3 == 'Bob') | (arr3 == 'Will')
result4 = np.where(cond1&cond2, 0, np.where(cond1, 1, np.where(cond2, 2, 3)))

In [148]:
# Mathematical and Statistical Methods
print arr2
print arr2.mean(), arr2.sum()
print np.mean(arr2), np.sum(arr2)
print arr2.mean(1)
print arr2.sum(0)
print arr2.cumsum(0)
print arr2.cumprod(1)
print arr2.argmax(1)

print cond1.any()    # any: tests whether one or more values in an array is True
print cond1.all()    # all: checks if every value is True

print data.sort(0)
print np.sort(data)
print np.unique(arr3)  # returns the sorted unique values in an array
print np.in1d(arr3, ['Bob', 'Will'])  # Compute a boolean array indicating whether each element ofxis contained iny

[[1 2 3 4]
 [5 6 7 8]]
4.5 36
4.5 36
[ 2.5  6.5]
[ 6  8 10 12]
[[ 1  2  3  4]
 [ 6  8 10 12]]
[[   1    2    6   24]
 [   5   30  210 1680]]
[3 3]
True
False
None
[[-0.5        -0.5        -0.5         0.12541253]
 [-0.5        -0.5        -0.37191402  0.40696114]
 [-0.5        -0.5        -0.04647866  0.48956219]
 [-0.5        -0.49821217  0.28415975  0.97092838]
 [-0.5         0.07344752  0.70601127  1.07384758]
 [-0.4857136   0.16502575  0.78029749  1.29459096]
 [-0.43966493  0.61012353  1.12743354  1.36648254]]
['Bob' 'Joe' 'Will']
[ True False  True  True  True False False]


In [154]:
# File Input and Output with Arrays
# Storing Arrays on Disk in Binary Format
np.save('some_array', arr1)     # Arrays are saved by default in an uncompressed raw binary format with file extension .npy
np.load('some_array.npy')
np.savez('array_archive.npz', a = arr1, b = arr2)   # save multiple arrays in a zip archive using np.savez and passing the arrays as key- word arguments
arch = np.load('array_archive.npz')   # When loading an .npz file, you get back a dict-like object which loads the individualarrays
arch['b']

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [161]:
# Linear Algebra, other functions: p122
print arr2.dot(arr2.T)
print np.dot(arr2, arr2.T)
print
from numpy.linalg import inv, qr
mat = data.T.dot(data)
print mat
print inv(mat)    # inverse
print mat.dot(inv(mat))
q, r = qr(mat)    # QR decomposition
print q
print r

[[ 30  70]
 [ 70 174]]
[[ 30  70]
 [ 70 174]]

[[ 1.67922295  0.61397715 -0.91058326 -2.7629508 ]
 [ 0.61397715  1.40309413  1.18612218  0.13153918]
 [-0.91058326  1.18612218  2.84964954  3.34801658]
 [-2.7629508   0.13153918  3.34801658  6.06010766]]
[[ 25.40139531   1.86755824 -17.72352365  21.33226117]
 [  1.86755824   6.37026983  -8.24332588   5.26736899]
 [-17.72352365  -8.24332588  21.09421884 -19.55554332]
 [ 21.33226117   5.26736899 -19.55554332  20.58039442]]
[[  1.00000000e+00  -1.77635684e-15   0.00000000e+00  -7.10542736e-15]
 [  3.55271368e-15   1.00000000e+00  -3.55271368e-15   1.77635684e-15]
 [  0.00000000e+00   0.00000000e+00   1.00000000e+00  -1.42108547e-14]
 [  0.00000000e+00   0.00000000e+00   0.00000000e+00   1.00000000e+00]]
[[-0.49177053 -0.28357    -0.56978301  0.59421975]
 [-0.1798069  -0.71210486  0.66260692  0.14672494]
 [ 0.26666978 -0.63040371 -0.48450932 -0.54472848]
 [ 0.80914674 -0.12282459 -0.03937136  0.57327617]]
[[-3.41464737 -0.13148444  3.70347719

In [162]:
# Random Number Generation, other functions: p123
normal_samples = np.random.normal(size = (4,4))

In [183]:
# Example: Random Walks
rand_numbers = randn(50,1000)
steps = np.where(rand_numbers>0,1,-1)
walks = steps.cumsum(1)
print walks.shape
hit30 = (np.abs(walks)>30).any(1)
print hit30.sum()
walks_hit30 = walks[hit30]
print walks_hit30.shape
crossing_time = (np.abs(walks[hit30])>30).argmax(1)
print crossing_time.mean()

(50, 1000)
35
(35, 1000)
514.4
