# Numpy

Numerical Python, or "Numpy" for short, is a foundational package for most of the data science packages. 

Provides high performance multi-dimensional arrays 


The key features of numpy are:
-ndarrays: fast and space-efficient n-dimensional arrays of the same data type. 
    - large number of built-in methods for ndarrays
    - rapid processing of data without using loops 
    - Broadcasting: a useful tool which defines implicit behavior between multi-dimensional arrays of different sizes.
-Vectorization: enables numeric operations on ndarrays.
-Input/Output: simplifies reading and writing of data from/to file.

# Getting started with ndarray (N-dimensional array)

It is a collection of items of the same type. Items in the collection can be accessed using a zero-based index.

# creating nd array 
nd array is created using the following constructor:
    
numpy.array(object, dtype = None, copy = True, order = None, subok = False, ndmin = 0)

Parameters

object     Any object exposing the array interface method returns an array, or any (nested) sequence.
dtype      Desired data type of array, optional
copy       Optional. By default (true), the object is copied
order      C (row major) or F (column major) or A (any) (default)
subok      By default, returned array forced to be a base class array. If true, sub-classes passed through
ndimin     Specifies minimum dimensions of resultant array

In [3]:
import numpy as np 
a = np.array([1,2,3]) 
print( "1D nd array")
print(a)

# more than one dimensions 
import numpy as np 
a2 = np.array([[1, 2], [3, 4]]) 
print("2D nd array")
print(a2)


# dtype parameter 
import numpy as np 
a3 = np.array([1, 2, 3], dtype = complex) 
print("nd array with dtype as complex")
print(a3)

1D nd array
[1 2 3]
2D nd array
[[1 2]
 [3 4]]
nd array with dtype as complex
[ 1.+0.j  2.+0.j  3.+0.j]


In [4]:
import numpy as np
s = np.array([[1,2,1],[3,4,5],[5,6,7]])

#number of rows and columns in nd array
print("(rows, columns) in s", s.shape)

#type of array
print(type(s))

#nd arrays are mutable, thus we can change its  contents
s[0][1] = 7
print("updated array s ")
print(s)

#dimension of nd array
print("dimension ",s.ndim)

#itemsize returns the length of each element of array in bytes
print("itemsize",s.itemsize)

('(rows, columns) in s', (3, 3))
<type 'numpy.ndarray'>
updated array s 
[[1 7 1]
 [3 4 5]
 [5 6 7]]
('dimension ', 2)
('itemsize', 4)


# Indexing and Slicing

Similar to the use of slice indexing with lists and strings, we can use slice indexing to pull out sub-regions of ndarrays.

In [5]:
import numpy as np 
a = np.arange(10) 
s = slice(2,7,2) 
print("using slice(start,stop,step)",a[s])

b = a[2:7:2] 
print("using [start:stop:step]",b)

print("a[2:] = ",a[2:])
print("a[2:5] = ",a[2:5])

x = np.array([[11,12,13,14], [21,22,23,24], [31,32,33,34]])
print("x[:2,1:3] = ",x[:2, 1:3])

('using slice(start,stop,step)', array([2, 4, 6]))
('using [start:stop:step]', array([2, 4, 6]))
('a[2:] =', array([2, 3, 4, 5, 6, 7, 8, 9]))
('a[2:5] =', array([2, 3, 4]))
('x[:2,1:3] =', array([[12, 13],
       [22, 23]]))


# Advanced Indexing: Boolean

In [5]:
# create a 3x2 array
an_array = np.array([[11,12], [21, 22], [31, 32]])
print(an_array)

[[11 12]
 [21 22]
 [31 32]]


In [6]:
# create a filter which will be boolean values for whether each element meets this condition
filter = (an_array > 20)
filter

array([[False, False],
       [ True,  True],
       [ True,  True]], dtype=bool)

In [7]:
# we can now select just those elements which meet that criteria
print(an_array[filter])

[21 22 31 32]


In [8]:
# For short, we could have just used the approach below without the need for the separate filter array.

an_array[(an_array % 2 == 0)]

array([12, 22, 32])

In [9]:
#adding 100 to all even elements
an_array[an_array % 2 == 0] +=100
print(an_array)

[[ 11 112]
 [ 21 122]
 [ 31 132]]


# Arithmetic Array Operations

In [15]:
#adding two ndarrays (similarly subraction, multiplication and division)
x = np.array([[111,112],[121,122]], dtype = np.int)
y = np.array([[211.1,212.1],[221.1,222.1]], dtype = np.float64)

print( "x+y = " ,x+y)

('x+y = ', array([[ 322.1,  324.1],
       [ 342.1,  344.1]]))


In [11]:
# square root
print(np.sqrt(x))

[[ 10.53565375  10.58300524]
 [ 11.          11.04536102]]


In [12]:
# exponent (e ** x)
print(np.exp(x))

[[  1.60948707e+48   4.37503945e+48]
 [  3.54513118e+52   9.63666567e+52]]


# Array Manipulation

#reshaping an array
This function gives a new shape to an array without changing the data. It accepts the following parameters −

numpy.reshape(arr, newshape, order')

Parameters 
newshape int or tuple of int
order    'C' for C style, 'F' for Fortran style, 

In [6]:
import numpy as np
a = np.arange(8)
print('The original array:')
print(a)
print()
b = a.reshape(4,2)
print('The modified array:')
print(b)

The original array:
[0 1 2 3 4 5 6 7]
()
The modified array:
[[0 1]
 [2 3]
 [4 5]
 [6 7]]


# Basic Statistical Operations

In [7]:
# setup a random 2 x 4 matrix
arr = 10 * np.random.randn(2,5)
print(arr)

[[ 32.28207212 -10.6764703  -23.4424081    2.66246577   4.86278231]
 [  0.67165558   0.98909827   8.8954801   -1.41688078  -6.44073167]]


In [16]:
# compute the mean for all elements
print("mean of all elements = ",(arr.mean()))

# compute the means by row
print("mean of a row=",(arr.mean(axis = 1)))

# compute the means by column
print("mean of a column",(arr.mean(axis = 0)))

('mean of all elements = ', 0.83870632974369852)
('mean of a row=', array([ 1.13768836,  0.5397243 ]))
('mean of a column', array([ 16.47686385,  -4.84368601,  -7.273464  ,   0.62279249,  -0.78897468]))


In [16]:
# sum all the elements
print(arr.sum())

2.86488777918


# Sort, Search & Counting Functions

In [11]:
import numpy as np  
a = np.array([[3,7],[9,1]]) 

print( 'Our array is:' )
print(a) 

print('Applying sort() function:') 
print(np.sort(a))
   
print('Sort along axis 0:' )
print(np.sort(a, axis = 0) )

Our array is:
[[3 7]
 [9 1]]
Applying sort() function:
[[3 7]
 [1 9]]
Sort along axis 0:
[[3 1]
 [9 7]]


In [12]:
#returning the indices of non-zero elements in the input array
import numpy as np 
a = np.array([[30,40,0],[0,20,10],[50,0,60]]) 
  
print('Applying nonzero() function:' )
print(np.nonzero(a))

Applying nonzero() function:
(array([0, 0, 1, 1, 2, 2]), array([0, 1, 1, 2, 0, 2]))


In [13]:
#returns the indices of elements in an input array where the given condition is satisfied
import numpy as np 
a = np.arange(10)
x = np.where(a>5)
print(x)

(array([6, 7, 8, 9]),)


In [14]:
#finding unique elements
array = np.array([1,2,1,4,2,1,4,2])
print(np.unique(array))

[1 2 4]


# Random Number Generation

In [21]:
Z = np.random.randint(low=2,high=50,size=4)
print(Z)

[ 7 41 35 27]


In [22]:
#return a new ordering of elements in Z (permutation)
np.random.permutation(Z) 

array([41, 35, 27,  7])