# Numpy
The scikit-learn library requires input tables of 2-dimensional NumPy arrays

## Initializing Numpy arrays

In [1]:
import numpy as np

In [2]:
np.arange(10).reshape((5,2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7],
       [8, 9]])

In [3]:
# Initialize an array of zeros with np.zeros
np.zeros((5,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

In [4]:
# initialize array of ones using np.ones. introduce dtype 
np.ones((5,2), dtype= int)

array([[1, 1],
       [1, 1],
       [1, 1],
       [1, 1],
       [1, 1]])

In [5]:
# use np.empty to allocate memory for an array
np.empty((5,2), dtype= float)

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.]])

## Indexing
look up the values of 2-dimensional arrays with indexing

In [10]:
# add 1 to every element of the array by broadcasting
array_1= np.arange(10).reshape((5,2))+1
array_1

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10]])

In [None]:
array_1.shape

In [None]:
array_1.ndim

In [11]:
# view first element
array_1[0,0]

1

In [13]:
# view first row
array_1[0,:]

array([1, 2])

In [14]:
# view first column
array_1[:,0]

array([1, 3, 5, 7, 9])

In [15]:
# view specific values along both axes
array_1[2:5,:]

array([[ 5,  6],
       [ 7,  8],
       [ 9, 10]])

In [16]:
# view second to fourth rows only along first column
array_1[2:5,0]

array([5, 7, 9])

## Boolean arrays

In [17]:
array_1>5

array([[False, False],
       [False, False],
       [False,  True],
       [ True,  True],
       [ True,  True]])

In [18]:
# to filter by boolean array
array_1[array_1>5]

array([ 6,  7,  8,  9, 10])

## Aritmetic Operations

In [24]:
array_1

array([[ 1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8],
       [ 9, 10]])

In [19]:
# add all the elements using sum method
array_1.sum()

55

In [23]:
# find sums by row
array_1.sum(axis=1)

array([ 3,  7, 11, 15, 19])

In [25]:
# find sums by column
array_1.sum(axis=0)

array([25, 30])

In [26]:
# mean of each column, the dtype will be np.float
array_1.mean(axis= 0)

array([5., 6.])

In [None]:
# element wise multiplication
array_2= np.arange(10)
array_2 * array_2

In [None]:
(array_2**2).reshape(5,2)

## NaN values
- scikit-learn will not accept np.nan values
- Either filter out the nans or set them to zero

In [27]:
array_3= np.array([np.nan, 0,1,2,np.nan])
np.isnan(array_3)

array([ True, False, False, False,  True])

In [28]:
# filter NaN values using boolean array
array_3[~np.isnan(array_3)]

array([0., 1., 2.])

In [29]:
# set NaN values to zero
array_3[np.isnan(array_3)]= 0
array_3

array([0., 0., 1., 2., 0.])