## Creating Numpy Arrays

In [1]:
import numpy as np


In [7]:
def test_run():
    # List to 1D array
    #print np.array([2,3,4])
    print np.array([(2,3,4), (5, 6, 7)])

In [8]:
if __name__ == "__main__":
    test_run()

[[2 3 4]
 [5 6 7]]


In [24]:
def test_run1():
    # Empty array
    #print np.empty(5)
    #print np.empty((5,4))
    #print np.ones((5,4))
    print np.ones((5,4), dtype= np.int_)

In [25]:
if __name__ == "__main__":
    test_run1()

[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]]


In [31]:
def test_run2():
    # Generate an array full of random numbers
    #print np.random.random((5,4))
    # Sample numbers from a Gaussian(normal)distribution
    #print np.random.normal(size=(2, 3)) #standard normal (mean=0, s.d=1)
    print np.random.normal(50, 10, size=(2,3))

In [32]:
if __name__ == "__main__":
    test_run2()

[[ 52.78765319  51.26390296  44.51430074]
 [ 44.0613047   45.80687283  43.07202576]]


In [35]:
def test_run3():
    #Random integers
    print np.random.randint(10) # a single integer in [0,10)
    print np.random.randint(0, 10) # same as above, specifying [low, high) explicit
    print np.random.randint(0, 10, size=5) # 5 random integers as a 1D array
    print np.random.randint(0, 10, size=(2,3)) # 2*3 array of random integers

In [36]:
if __name__ == "__main__":
    test_run3()

4
9
[1 9 1 5 9]
[[7 5 7]
 [3 4 9]]


## Array attributes

In [47]:
def test_run4():
    a = np.random.random((5, 4)) # 5*4 array of random numbers
    print a
    print a.shape
    print a.shape[0]
    print a.shape[1]
    print len(a.shape)
    print a.size
    print a.dtype
    

In [48]:
if __name__ == "__main__":
    test_run4()

[[ 0.11655401  0.19052906  0.98395018  0.91406646]
 [ 0.66992598  0.90162533  0.43184072  0.30788882]
 [ 0.08142162  0.10970675  0.92016876  0.11979829]
 [ 0.24088411  0.88816033  0.45542953  0.83311961]
 [ 0.33684177  0.61669066  0.46902725  0.80514616]]
(5L, 4L)
5
4
2
20
float64


## operations on ndarrays

In [55]:
def test_run5():
    np.random.seed(693) # seed the random num generator
    a = np.random.randint(0, 10, size=(5, 4))
    print "Array:\n", a
    
    # Sum of all elements
    print "Sum of all elements:" , a.sum()
    
    # Iterate over raws, to comput sum of each column
    print "Sum of each column:\n", a.sum(axis=0)
    
    # Iterate over colums to compute sum of each row
    print "Sum of each row:\n", a.sum(axis=1)
    
    # Statistics: min, max, mean(across rows, cols, and overall)
    print "Minimum of each column:\n", a.min(axis=0)
    print "Maximun of each row:\n", a.max(axis=1)
    print "Mean of all elements:", a.mean() # leave out axis arg.
    
    

In [56]:
if __name__ == "__main__":
    test_run5()

Array:
[[2 0 5 1]
 [1 3 4 4]
 [9 2 9 1]
 [9 3 7 5]
 [4 7 0 3]]
Sum of all elements: 79
Sum of each column:
[25 15 25 14]
Sum of each row:
[ 8 12 21 24 14]
Minimum of each column:
[1 0 0 1]
Maximun of each row:
[5 4 9 9 7]
Mean of all elements: 3.95


## Locate maximum value


In [57]:
def get_max_index(a):
    """Return the index the maximum value in given 1D array."""
    return a.argmax()

In [58]:
def test_run6():
    
    a = np.array([9, 6, 2, 3, 12, 14, 7, 10], dtype=np.int32) # 32 bit integer array.
    print "Array:\n", a
    
    print "Maximum value:", a.max()
    print "Index of max.:", get_max_index(a)

In [59]:
if __name__ == "__main__":
    test_run6()

Array:
[ 9  6  2  3 12 14  7 10]
Maximum value: 14
Index of max.: 5


## Timing Python operations

In [63]:
import time


def test_run7():
    t1 = time.time()
    print "ML4T"
    t2 = time.time()
    print "The time taken by print statement is", t2 - t1,"seconds"

In [62]:
if __name__ == "__main__":
    test_run7()

ML4T
The time taken by print statement is 0.0 seconds


In [70]:
import numpy as np
from time import time

def how_long(func, *args):
    """ Execute function with given arguments, and measures execution time."""
    t0 = time()
    result = func(*args)
    t1 = time()
    return result, t1 - t0

def manual_mean(arr):
    """ Compute mean (average) of all elements in the given 2D array."""
    sum = 0
    for i in xrange(0, arr.shape[0]):
        for j in xrange(0, arr.shape[1]):
            sum = sum + arr[i, j]
    return sum / arr.size

def numpy_mean(arr):
    """Compute mean(average) using Numpy"""
    return arr.mean()

In [77]:
def test_run8():
    """Function called by Test Run"""
    nd1 = np.random.random((1000, 10000)) # use a sufficiently large array
    
    # Time the two functions, retrieving results and execution times
    res_manual, t_manual = how_long(manual_mean, nd1)
    res_numpy, t_numpy = how_long(numpy_mean, nd1)
    print "Manual: {:.6f}({:.3f} secs.) vs. Numpy: {:.6f}({:.3f} secs.)".format(res_manual,t_manual, res_numpy, t_numpy)
    
    # Make sure both give us the same answer
    
    # Compute speed up
    speedup = t_manual / t_numpy
    print "Numpy mean is", speedup, "times faster than nanual for loops"

In [79]:
if __name__ == "__main__":
    test_run8()

Manual: 0.500139(2.922 secs.) vs. Numpy: 0.500139(0.012 secs.)
Numpy mean is 243.498291345 times faster than nanual for loops


## Accessing array elements

In [90]:
def test_run9():
    a = np.random.rand(5, 4)
    print "Array:\n", a
    
    # Accessing element at position(3,2)
    element = a[3, 2]
    print element
    print a[0, 1:3]
    print a[:, 0:3:2]
    # Assigning a value to a particular location
    a[0, 0] = 1
    print "\nModified (replaced one element): \n", a
    
    a[:, 3] = [1, 2, 3, 4, 5]
    print "\nModified (replaced a column with a list):\n", a
    
    

In [91]:
if __name__ == "__main__":
    test_run9()

Array:
[[ 0.01555187  0.69030769  0.9418863   0.88984321]
 [ 0.87055418  0.60803597  0.30997231  0.10427046]
 [ 0.10357885  0.70773864  0.91805017  0.36532294]
 [ 0.00613004  0.77027563  0.8053355   0.4649844 ]
 [ 0.41929022  0.99456605  0.48670849  0.10894879]]
0.805335499388
[ 0.69030769  0.9418863 ]
[[ 0.01555187  0.9418863 ]
 [ 0.87055418  0.30997231]
 [ 0.10357885  0.91805017]
 [ 0.00613004  0.8053355 ]
 [ 0.41929022  0.48670849]]

Modified (replaced one element): 
[[ 1.          0.69030769  0.9418863   0.88984321]
 [ 0.87055418  0.60803597  0.30997231  0.10427046]
 [ 0.10357885  0.70773864  0.91805017  0.36532294]
 [ 0.00613004  0.77027563  0.8053355   0.4649844 ]
 [ 0.41929022  0.99456605  0.48670849  0.10894879]]

Modified (replaced a column with a list):
[[ 1.          0.69030769  0.9418863   1.        ]
 [ 0.87055418  0.60803597  0.30997231  2.        ]
 [ 0.10357885  0.70773864  0.91805017  3.        ]
 [ 0.00613004  0.77027563  0.8053355   4.        ]
 [ 0.41929022  0.99456

## indexing an array with another array

In [96]:
def test_run10():
    a = np.random.rand(5)
    
    #accessing using list of indices
    indices = np.array([1, 1, 2, 3])
    print a
    print a[indices]

In [97]:
if __name__ == "__main__":
    test_run10()

[ 0.05096528  0.41485692  0.61708567  0.47245055  0.79264623]
[ 0.41485692  0.41485692  0.61708567  0.47245055]


## Boolean or "mask" index arrays

In [109]:
def test_run11():
    a = np.array([(20, 25, 10, 23,26, 32, 10, 5, 0),(0,2,50,20,0,1,28,5,0)])
    print a
    
    # calculating mean
    mean = a.mean()
    print mean
    
    # masking
    print a[a<mean]== mean
    print a
    

In [110]:
if __name__ == "__main__":
    test_run11()

[[20 25 10 23 26 32 10  5  0]
 [ 0  2 50 20  0  1 28  5  0]]
14.2777777778
[False False False False False False False False False False]
[[20 25 10 23 26 32 10  5  0]
 [ 0  2 50 20  0  1 28  5  0]]


## Arithmetic operations

In [114]:
def test_run12():
    a = np.array([(1,2,3,4,5),(10,20,30,40,50)])
    print "Original array a:\n", a
    
    b = np.array([(100, 200, 300, 400, 500),(1, 2, 3, 4, 5)])
    print "\nOriginal array b:n", b
    
    # Divide a by b
    print "\nDivide a by b:\n", a / b

In [116]:
if __name__ == "__main__":
    test_run12()

Original array a:
[[ 1  2  3  4  5]
 [10 20 30 40 50]]

Original array b:n [[100 200 300 400 500]
 [  1   2   3   4   5]]

Divide a by b:
[[ 0  0  0  0  0]
 [10 10 10 10 10]]
