# Data Science with Python

### Numpy

    NumPy is a Python package. It stands for 'Numerical Python'. It is a library consisting of multidimensional array objects and a collection of routines for processing of array.

###### 1. Imports

In [22]:
import numpy as np

###### 2. Ndarray Object

One dimensional

In [23]:
arr = np.array([1, 2, 3])
print arr

[1 2 3]


More than one dimension

In [285]:
arr = np.array([[1, 2], [3, 4]])
print arr

[[1 2]
 [3 4]]


###### 3. dtype

    Data types: bool, int(8, 16, 32, 64), uint(8, 16, 32, 64), float(8, 16, 32, 64), complex(64, 128)

###### 4. Array Attributes

Create empty array

In [65]:
arr = np.empty([3, 2], dtype=float)
print arr

[[ 0.  0.]
 [ 0.  0.]
 [ 0.  0.]]


Create an array filled with zeros

In [74]:
arr = np.zeros(5) # default dtype is float
print arr

arr = np.zeros([2, 2], dtype=int)
print arr

[ 0.  0.  0.  0.  0.]
[[0 0]
 [0 0]]


Create array filled with ones

In [286]:
arr = np.ones(5)
print arr

arr = np.ones([2, 2],dtype=int)
print arr

[ 1.  1.  1.  1.  1.]
[[1 1]
 [1 1]]


Create array from existing data

In [287]:
a = [[1, 2], [3, 4]]

arr = np.asarray(a, dtype=float)
print arr

[[ 1.  2.]
 [ 3.  4.]]


Create array with sequence with starting and ending and step size

In [83]:
arr = np.arange(1, 10, 2, dtype=float) # (start, stop, step, data_type)
print arr

[ 1.  3.  5.  7.  9.]


Create array with sequence with starting and ending with number of intervals
and values will be evenly spread in intervals

In [96]:
arr = np.linspace(1, 10, 6, dtype=float, retstep=True) # retstep for interval size
print arr

(array([  1. ,   2.8,   4.6,   6.4,   8.2,  10. ]), 1.8)


Ndarray.shape

In [93]:
arr = np.array([[1,2,3],[4,5,6]])
print arr.shape

(2, 3)


Resize the ndarray

In [39]:
arr = np.array([[1,2,3],[4,5,6]])
arr.shape = (3, 2)
print arr

[[1 2]
 [3 4]
 [5 6]]


Resize the ndarray with reshape function

In [43]:
arr = np.array([[1,2,3],[4,5,6]])
re_arr = arr.reshape(3, 2)
print re_arr

[[1 2]
 [3 4]
 [5 6]]


Get the dimension of array

In [288]:
re_arr.ndim

2

###### 5. Indexing and slicing

Inbuilt slice function, slice(start_index, stop_index, step)

In [289]:
arr = np.arange(1, 10)
sliced = slice(2, 7, 2)
print arr
print arr[sliced]

# another way
print arr[2:7:2]

[1 2 3 4 5 6 7 8 9]
[3 5 7]
[3 5 7]


Slicing a single item

In [104]:
print arr[4]

4


Slicing item starting from index

In [106]:
print arr[2:]

[3 4 5 6 7 8 9]


Slicing staring_index and stop_index

In [108]:
print arr[2:5]

[3 4 5]


Slicing ndimensional array

In [290]:
arr = np.array([[1,2,3],[3,4,5],[4,5,6]])
print 'Array: '
print arr, '\n'

print 'Getting items in second row: '
print arr[1,], '\n' # or print arr[1,...]

print 'Getting till second row: '
print arr[1:], '\n'

print 'Getting items in second column only: '
print arr[...,1], '\n'

print 'Getting items from second column onwards: '
print arr[...,1:], '\n'

Array: 
[[1 2 3]
 [3 4 5]
 [4 5 6]] 

Getting items in second row: 
[3 4 5] 

Getting till second row: 
[[3 4 5]
 [4 5 6]] 

Getting items in second column only: 
[2 4 5] 

Getting items from second column onwards: 
[[2 3]
 [4 5]
 [5 6]] 



Indexing

In [293]:
x = np.array([[1, 2], [3, 4], [5, 6]]) 
y = x[[0,1,2], [0,1,0]] 
print y #selecting values (0,0), (1,1), (2,x = np.array([[ 0,  1,  2],[ 3,  4,  5],[ 6,  7,  8],[ 9, 10, 11]]) 

[1 4 5]


Slicing and Indexing

In [292]:
x = np.array([[ 0,  1,  2],[ 3,  4,  5],[ 6,  7,  8],[ 9, 10, 11]]) 

print 'Our array is:' 
print x 
print '\n'  

# slicing 
z = x[1:4,1:3] 

print 'After slicing, our array becomes:' 
print z 
print '\n'  

# using advanced index for column 
y = x[1:4,[1,2]] 

print 'Slicing using advanced index for column:' 
print y

Our array is:
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


After slicing, our array becomes:
[[ 4  5]
 [ 7  8]
 [10 11]]


Slicing using advanced index for column:
[[ 4  5]
 [ 7  8]
 [10 11]]


Conditioning

In [158]:
x = np.array([[ 0,  1,  2],[ 3,  4,  5],[ 6,  7,  8],[ 9, 10, 11]]) 

print 'Our array is:' 
print x 
print '\n'  

# Now we will print the items greater than 5 
print 'The items greater than 5 are:' 
print x[x > 5]

Our array is:
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]]


The items greater than 5 are:
[ 6  7  8  9 10 11]


Removing NaN values from array

In [294]:
arr = np.array([np.nan, 1, 2, 3, np.nan, 4, 5])
print 'Our array is: ', arr

print 'Equivalent boolean: ',np.isnan(arr)

print 'Array without NaN: ',arr[~np.isnan(arr)]

Our array is:  [ nan   1.   2.   3.  nan   4.   5.]
Equivalent boolean:  [ True False False False  True False False]
Array without NaN:  [ 1.  2.  3.  4.  5.]


###### 6. Broadcasting
    Broadcasting is a powerful mechanism that allows numpy to work with arrays of different shapes when performing arithmetic operations

$\left[ \begin{array}{cccc}
1 & 2 & 3 \\
4 & 5 & 6 \\
7 & 8 & 9 \\
10 & 11 & 12 \\
\end{array}\right]
+ 
\left[ \begin{array}{cccc}
1 & 0 & 1 \\
\end{array} \right]
=
\left[ \begin{array}{cccc}
2 & 2 & 4 \\
5 & 5 & 7 \\
8 & 8 & 10 \\
11 & 11 & 13 \\
\end{array}\right]
$

In [184]:
a = np.array([[1,2,3], [4,5,6], [7,8,9],[10,11,12]], dtype=int)
b = np.array([1,0,1])
c = a + b
print c

[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [190]:
a = np.array([1,2,3])
b = np.array([4,5,6])
c = a * b
print c

(3,)


In [191]:
# Compute outer product of vectors
v = np.array([1,2,3])  # v has shape (3,)
w = np.array([4,5])    # w has shape (2,)
# To compute an outer product, we first reshape v to be a column
# vector of shape (3, 1); we can then broadcast it against w to yield
# an output of shape (3, 2), which is the outer product of v and w:
# [[ 4  5]
#  [ 8 10]
#  [12 15]]
print(np.reshape(v, (3, 1)) * w)

[[ 4  5]
 [ 8 10]
 [12 15]]


In [193]:
# Add a vector to each row of a matrix
x = np.array([[1,2,3], [4,5,6]])
# x has shape (2, 3) and v has shape (3,) so they broadcast to (2, 3),
# giving the following matrix:
# [[2 4 6]
#  [5 7 9]]
print(x + v)

[[2 4 6]
 [5 7 9]]


###### 7. Iterating over array

Row-wise

In [223]:
arr = np.arange(1,5).reshape([2,2])

for elmt in np.nditer(arr):
    print elmt,

1 2 3 4


Column-wise

In [229]:
# First converting array to column wise then printing
arr = arr.copy(order='F')

for elmt in np.nditer(arr):
    print elmt,
    
print ''    
# or to just print column wise
# printing column wise ordered array in row wise
for elmt in np.nditer(arr, order='C'):
    print elmt,


1 3 2 4 
1 2 3 4


Modifying array values while iterating

In [230]:
arr = np.arange(1,5).reshape([2,2])

for elmt in np.nditer(arr, op_flags=['readwrite']):
    elmt[...] = elmt*2
    
print arr

[[2 4]
 [6 8]]


External loop in while iterating

In [235]:
arr = np.arange(1,10).reshape([3,3])

for elmt in np.nditer(arr, flags=['external_loop'], order='F'):
    print elmt,

[1 4 7] [2 5 8] [3 6 9]


###### 8. Array Manipluation

reshape

In [248]:
arr = np.arange(1, 10)
print 'Our Array is:\n',arr, '\nwhose shape is ',arr.shape

arr = arr.reshape([3,3])
print '\nOur Array after changing shape is: \n',arr, '\n whose shape is ',arr.shape

Our Array is:
[1 2 3 4 5 6 7 8 9] 
whose shape is  (9,)

Our Array after changing shape is: 
[[1 2 3]
 [4 5 6]
 [7 8 9]] 
 whose shape is  (3, 3)


flat

In [251]:
arr = np.arange(1, 10).reshape([3,3])
print np.array(arr.flat)

[1 2 3 4 5 6 7 8 9]


flatten

In [252]:
arr = np.arange(1, 10).reshape([3,3])

print 'Array flatten:'
print arr.flatten()
print 'Array flatten column wise'
print arr.flatten(order='F')

Array flatten:
[1 2 3 4 5 6 7 8 9]
Array flatten column wise
[1 4 7 2 5 8 3 6 9]


transpose

In [253]:
arr = np.arange(1, 10).reshape([3,3])

print 'Using transpose function:'
print np.transpose(arr)

print 'Using numpy.ndarray.T:'
print arr.T

Using transpose function:
[[1 4 7]
 [2 5 8]
 [3 6 9]]
Using numpy.ndarray.T:
[[1 4 7]
 [2 5 8]
 [3 6 9]]


sqeeze

In [270]:
x = np.arange(9).reshape(1,3,3) 

print 'Array X:' 
print x 
print '\n'  
y = np.squeeze(x) 

print 'Array Y:' 
print y 
print '\n'  

print 'The shapes of X and Y array:' 
print x.shape, y.shape

Array X:
[[[0 1 2]
  [3 4 5]
  [6 7 8]]]


Array Y:
[[0 1 2]
 [3 4 5]
 [6 7 8]]


The shapes of X and Y array:
(1, 3, 3) (3, 3)


concatenate

In [271]:
a = np.array([[1,2],[3,4]]) 

print 'First array:' 
print a 
print '\n'  
b = np.array([[5,6],[7,8]]) 

print 'Second array:' 
print b 
print '\n'  
# both the arrays are of same dimensions 

print 'Joining the two arrays along axis 0:' 
print np.concatenate((a,b)) 
print '\n'  

print 'Joining the two arrays along axis 1:' 
print np.concatenate((a,b),axis = 1)

First array:
[[1 2]
 [3 4]]


Second array:
[[5 6]
 [7 8]]


Joining the two arrays along axis 0:
[[1 2]
 [3 4]
 [5 6]
 [7 8]]


Joining the two arrays along axis 1:
[[1 2 5 6]
 [3 4 7 8]]


stack

In [278]:
a = np.array([[1,2],[3,4]]) 

print 'First Array:' 
print a 
print '\n'
b = np.array([[5,6],[7,8]]) 

print 'Second Array:' 
print b
print '\n'  

print 'Stack the two arrays along axis 0:' 
print np.stack((a,b),0)
print '\n'  

print 'Stack the two arrays along axis 1:' 
print np.stack((a,b),1)

First Array:
[[1 2]
 [3 4]]


Second Array:
[[5 6]
 [7 8]]


Stack the two arrays along axis 0:
[[[1 2]
  [3 4]]

 [[5 6]
  [7 8]]]


Stack the two arrays along axis 1:
[[[1 2]
  [5 6]]

 [[3 4]
  [7 8]]]


hstack

In [282]:
a = np.array([[1,2],[3,4]]) 

print 'First array:' 
print a 
print '\n'  
b = np.array([[5,6],[7,8]]) 

print 'Second array:' 
print b 
print '\n'  

print 'Horizontal stacking:' 
c = np.hstack((a,b)) 
print c 
print '\n'

First array:
[[1 2]
 [3 4]]


Second array:
[[5 6]
 [7 8]]


Horizontal stacking:
[[1 2 5 6]
 [3 4 7 8]]




vstack

In [283]:
a = np.array([[1,2],[3,4]]) 

print 'First array:' 
print a 
print '\n'  
b = np.array([[5,6],[7,8]]) 

print 'Second array:' 
print b 
print '\n'  

print 'Horizontal stacking:' 
c = np.hstack((a,b)) 
print c 
print '\n'

First array:
[[1 2]
 [3 4]]


Second array:
[[5 6]
 [7 8]]


Horizontal stacking:
[[1 2 5 6]
 [3 4 7 8]]


