In [2]:
import numpy as np
import matplotlib.pyplot as plt

### Basic Numpy

In [48]:
vector = np.array([1.0, 3.3, 4.1, 5.3, 6.1])
print (vector)

x = np.arange(8)
x.shape = (4,2)
x[x<5] = 2
x

[1.  3.3 4.1 5.3 6.1]


array([[2, 2],
       [2, 2],
       [2, 5],
       [6, 7]])

In [8]:
matrix = np.array([ [1, 1, 1],[8, 4, 5] ])
print (matrix)

[[1 1 1]
 [8 4 5]]


In [49]:
print (type(vector))
print (type(matrix))

<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [50]:
print (np.shape(vector))
print (np.shape(matrix))

(5,)
(2, 3)


In [51]:
vector.shape = (5,1)
vector

array([[1. ],
       [3.3],
       [4.1],
       [5.3],
       [6.1]])

In [52]:
print (np.size(vector))
print (np.size(matrix))

5
6


In [53]:
print (vector.dtype)
print (matrix.dtype)

float64
int64


In [54]:
print (vector[1])
print (matrix[0,2])

[3.3]
1


In [55]:
matrix[0,] = 3.1
print (matrix)

[[3 3 3]
 [8 4 5]]


In [56]:
vector[1] = "string"

ValueError: could not convert string to float: 'string'

In [58]:
matrix = np.array([[1, 2], [3, 4]], dtype=np.float64)
print (matrix)

[[1. 2.]
 [3. 4.]]


### Array Generation

#### Generates arrays in 1-D, which you can reshape

In [59]:
x = np.arange(1, 10, 0.25)
print (x)
x.shape = (12,3)
x

[1.   1.25 1.5  1.75 2.   2.25 2.5  2.75 3.   3.25 3.5  3.75 4.   4.25
 4.5  4.75 5.   5.25 5.5  5.75 6.   6.25 6.5  6.75 7.   7.25 7.5  7.75
 8.   8.25 8.5  8.75 9.   9.25 9.5  9.75]


array([[1.  , 1.25, 1.5 ],
       [1.75, 2.  , 2.25],
       [2.5 , 2.75, 3.  ],
       [3.25, 3.5 , 3.75],
       [4.  , 4.25, 4.5 ],
       [4.75, 5.  , 5.25],
       [5.5 , 5.75, 6.  ],
       [6.25, 6.5 , 6.75],
       [7.  , 7.25, 7.5 ],
       [7.75, 8.  , 8.25],
       [8.5 , 8.75, 9.  ],
       [9.25, 9.5 , 9.75]])

In [60]:
print (np.linspace(0, 10, 15))

[ 0.          0.71428571  1.42857143  2.14285714  2.85714286  3.57142857
  4.28571429  5.          5.71428571  6.42857143  7.14285714  7.85714286
  8.57142857  9.28571429 10.        ]


### Generate arrays with specific values n-dim

In [61]:
print(np.zeros((3,3)))
print(np.ones((3,3)))

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [62]:
x = np.random.random((2,3,4))
print(x)

[[[0.62019092 0.1998687  0.4897405  0.34254051]
  [0.33899834 0.99401079 0.97346192 0.15175402]
  [0.65366425 0.66203432 0.66513209 0.35686794]]

 [[0.68336883 0.66627186 0.41949834 0.26679858]
  [0.41100429 0.4909167  0.43327801 0.48324831]
  [0.78875909 0.91030804 0.42835738 0.9505693 ]]]


### Slicing and indexing

In [63]:
x[0]

array([[0.62019092, 0.1998687 , 0.4897405 , 0.34254051],
       [0.33899834, 0.99401079, 0.97346192, 0.15175402],
       [0.65366425, 0.66203432, 0.66513209, 0.35686794]])

In [64]:
x[:,2,:2]

array([[0.65366425, 0.66203432],
       [0.78875909, 0.91030804]])

In [65]:
x[[0],[2,0],2]

array([0.66513209, 0.4897405 ])

### Basic Arthmetics

In [87]:
a = np.arange(4)
b = np.arange(4,8)
a.shape = (2,2) # [[0, 1], [2, 3]]
b.shape = (2,2) # [[4, 5], [6, 7]]
print(a+b)
#print(a*10)
#print(a*b) # array([[0, 5], [12, 21]])
#print(np.dot(a,b)) # array([[6, 7], [26, 31]])
#print(np.dot(a,b)) # array([[10, 19], [14, 27]])

[[ 4  6]
 [ 8 10]]


In [67]:
x = np.random.random((2,2))
print(x)
print(x.sum()) 
print(x.max()) 
print(x.min(axis=0)) 
print(x.max(axis=1)) 

[[0.81937824 0.91126841]
 [0.91321592 0.48323822]]
3.127100793680372
0.9132159246435322
[0.81937824 0.48323822]
[0.91126841 0.91321592]


In [69]:
a = np.random.rand(1000000,1)
a

array([[0.94376623],
       [0.04747431],
       [0.67491897],
       ...,
       [0.38605252],
       [0.22897277],
       [0.24536381]])

### Time difference for vectorized operations

In [70]:
%timeit a**2

1.07 ms ± 14.2 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [73]:
%timeit [a[i]**2 for i in range(1000000)]


1.9 s ± 100 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Real World Example

In [74]:
import os
filename = os.path.join('data','iris.csv')

In [75]:
!head data/iris.csv

sepal_length,sepal_width,pedal_length,pedal_width,class
5.1,3.5,1.4,0.2,Iris-setosa
4.9,3.0,1.4,0.2,Iris-setosa
4.7,3.2,1.3,0.2,Iris-setosa
4.6,3.1,1.5,0.2,Iris-setosa
5.0,3.6,1.4,0.2,Iris-setosa
5.4,3.9,1.7,0.4,Iris-setosa
4.6,3.4,1.4,0.3,Iris-setosa
5.0,3.4,1.5,0.2,Iris-setosa
4.4,2.9,1.4,0.2,Iris-setosa


In [76]:
iris = np.loadtxt(filename, delimiter=',', usecols=(0,1,2,3), skiprows=1)
print (iris[0:5:,])

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]


In [77]:
iris.shape

(150, 4)

Let's calculate the mean and then the distance of mean from each of the points. We know that distance between(euclidean) two points x and y is given by:
    $d = \sqrt{ \sum (x_i - y_i)^2 }$

In [78]:
mean = iris.mean(axis=0)
mean.shape

(4,)

In [79]:
mean

array([5.84333333, 3.054     , 3.75866667, 1.19866667])

In [80]:
sq_dist = np.square(iris - mean)
print(iris.shape)
print(sq_dist.shape)

(150, 4)
(150, 4)


In [81]:
sq_dist

array([[5.52544444e-01, 1.98916000e-01, 5.56330844e+00, 9.97335111e-01],
       [8.89877778e-01, 2.91600000e-03, 5.56330844e+00, 9.97335111e-01],
       [1.30721111e+00, 2.13160000e-02, 6.04504178e+00, 9.97335111e-01],
       [1.54587778e+00, 2.11600000e-03, 5.10157511e+00, 9.97335111e-01],
       [7.11211111e-01, 2.98116000e-01, 5.56330844e+00, 9.97335111e-01],
       [1.96544444e-01, 7.15716000e-01, 4.23810844e+00, 6.37868444e-01],
       [1.54587778e+00, 1.19716000e-01, 5.56330844e+00, 8.07601778e-01],
       [7.11211111e-01, 1.19716000e-01, 5.10157511e+00, 9.97335111e-01],
       [2.08321111e+00, 2.37160000e-02, 5.56330844e+00, 9.97335111e-01],
       [8.89877778e-01, 2.11600000e-03, 5.10157511e+00, 1.20706844e+00],
       [1.96544444e-01, 4.17316000e-01, 5.10157511e+00, 9.97335111e-01],
       [1.08854444e+00, 1.19716000e-01, 4.65984178e+00, 9.97335111e-01],
       [1.08854444e+00, 2.91600000e-03, 5.56330844e+00, 1.20706844e+00],
       [2.38187778e+00, 2.91600000e-03, 7.06850844e

In [82]:
sum_sq = sq_dist.sum(axis=1)

In [83]:
print(sum_sq.shape)

(150,)


In [84]:
sum_sq[:20]

array([ 7.312104  ,  7.45343733,  8.370904  ,  7.646904  ,  7.56997067,
        5.78823733,  8.036504  ,  6.92983733,  8.66757067,  7.20063733,
        6.71277067,  6.86543733,  7.86183733, 10.66037067,  8.440904  ,
        7.571704  ,  7.59517067,  7.12237067,  5.62277067,  7.01823733])

In [85]:
sum_sq.shape = (150,1)

In [86]:
sum_sq[:20]

array([[ 7.312104  ],
       [ 7.45343733],
       [ 8.370904  ],
       [ 7.646904  ],
       [ 7.56997067],
       [ 5.78823733],
       [ 8.036504  ],
       [ 6.92983733],
       [ 8.66757067],
       [ 7.20063733],
       [ 6.71277067],
       [ 6.86543733],
       [ 7.86183733],
       [10.66037067],
       [ 8.440904  ],
       [ 7.571704  ],
       [ 7.59517067],
       [ 7.12237067],
       [ 5.62277067],
       [ 7.01823733]])