In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

iris = sns.load_dataset('iris')

ndarray - N-dimensional array (tensor)

N = 0 -> scalar  
N = 1 -> vector  
N = 2 -> matrix  

In [6]:
n = 10_000

In [9]:
%timeit [ e**2 for e in range(n) ]

2.46 ms ± 89.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
%timeit np.arange(n) ** 2

11.2 µs ± 37.3 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


## Creating arrays

In [12]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [47]:
np.arange(10, 0 ,-1)

array([10,  9,  8,  7,  6,  5,  4,  3,  2,  1])

In [14]:
np.arange(10.)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [15]:
x = np.arange(10, dtype = np.float)

In [16]:
x

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [17]:
type(x)

numpy.ndarray

In [19]:
x.dtype.name

'float64'

In [20]:
np.arange(10).dtype.name

'int64'

In [21]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [23]:
x = np.empty(10)

In [24]:
print(x)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [25]:
np.array([3, 3, 6, 7, 8, 10])

array([ 3,  3,  6,  7,  8, 10])

In [27]:
np.array([3, 3, 6, 7, 8, 0], dtype = np.bool)

array([ True,  True,  True,  True,  True, False])

In [28]:
x.ndim

1

In [29]:
x.shape

(10,)

In [34]:
A = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

In [35]:
A.ndim

2

In [36]:
A.shape

(3, 3)

In [37]:
A.ravel()

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [38]:
A.ravel().ndim

1

In [39]:
A.ravel().shape

(9,)

In [55]:
A = np.array([
    [1, 2, 3, 55],
    [4, 5, 6, 66],
    [7, 8, 9, 88]
])

In [56]:
A

array([[ 1,  2,  3, 55],
       [ 4,  5,  6, 66],
       [ 7,  8,  9, 88]])

In [57]:
A.reshape((-1, 2 ))

array([[ 1,  2],
       [ 3, 55],
       [ 4,  5],
       [ 6, 66],
       [ 7,  8],
       [ 9, 88]])

## random numbers

In [58]:
np.random.seed(123)

In [59]:
# U[0, 1]
np.random.rand(5)

array([0.69646919, 0.28613933, 0.22685145, 0.55131477, 0.71946897])

In [63]:
np.random.rand(3, 2)

array([[0.84943179, 0.72445532],
       [0.61102351, 0.72244338],
       [0.32295891, 0.36178866]])

In [74]:
# N(0, 1)
np.random.normal(size = (5, 2))

array([[ 1.41729905,  0.80723653],
       [ 0.04549008, -0.23309206],
       [-1.19830114,  0.19952407],
       [ 0.46843912, -0.83115498],
       [ 1.16220405, -1.09720305]])

In [71]:
np.random.randint(0, 10, size = 5)

array([9, 2, 3, 3, 3])

In [72]:
k = ['a', 'b', 'c']

In [73]:
np.random.choice(k, 7, p = [.4, .5, .1])

array(['b', 'b', 'a', 'b', 'a', 'a', 'b'], dtype='<U1')

np.r_  
np.c_

In [76]:
np.array([1, 2, 3])

array([1, 2, 3])

In [77]:
np.r_[1, 2,3]

array([1, 2, 3])

In [79]:
np.r_[5:8]

array([5, 6, 7])

In [80]:
np.r_[7:1:-1, 6, 6, 6, [1, 2] * 3]

array([7, 6, 5, 4, 3, 2, 6, 6, 6, 1, 2, 1, 2, 1, 2])

In [81]:
np.c_[ [1,2,3], [3] * 3]

array([[1, 3],
       [2, 3],
       [3, 3]])

## Arithmetic OPS

In [92]:
a = np.r_[1:5]
b = np.r_[1, 10, 100, 1000]

In [93]:
c = []
for i in range(a.shape[0]):
    c.append(a[i] * b[i])

In [94]:
a * b

array([   1,   20,  300, 4000])

In [95]:
A

array([[ 1,  2,  3, 55],
       [ 4,  5,  6, 66],
       [ 7,  8,  9, 88]])

In [96]:
A * A

array([[   1,    4,    9, 3025],
       [  16,   25,   36, 4356],
       [  49,   64,   81, 7744]])

In [98]:
A @ A.T

array([[3039, 3662, 4890],
       [3662, 4433, 5930],
       [4890, 5930, 7938]])

In [99]:
A * (-1)

array([[ -1,  -2,  -3, -55],
       [ -4,  -5,  -6, -66],
       [ -7,  -8,  -9, -88]])

In [101]:
A

array([[ 1,  2,  3, 55],
       [ 4,  5,  6, 66],
       [ 7,  8,  9, 88]])

In [102]:
A * np.r_[-1, 100, 10 ,1]

array([[ -1, 200,  30,  55],
       [ -4, 500,  60,  66],
       [ -7, 800,  90,  88]])

## Aggregation Function

In [103]:
np.r_[1, 2, 3, 4].sum()

10

In [104]:
np.sum(np.r_[1, 2, 3, 4])

10

In [105]:
A

array([[ 1,  2,  3, 55],
       [ 4,  5,  6, 66],
       [ 7,  8,  9, 88]])

In [106]:
A.sum()

254

In [107]:
A.sum(0)

array([ 12,  15,  18, 209])

In [108]:
np.sum(A, 0)

array([ 12,  15,  18, 209])

In [109]:
np.sum(A, 1)

array([ 61,  81, 112])

In [110]:
np.std(A, 1)

array([22.96056402, 26.42323788, 34.64823228])

In [115]:
def stand_vector(v):
    return (v - np.mean(v)) / np.std(v)

In [116]:
stand_vector([1, 2, 3])

array([-1.22474487,  0.        ,  1.22474487])

In [113]:
A = np.array(iris.iloc[:10, :4])

In [118]:
stand_vector(A[:, 0])

array([ 0.86828953,  0.14471492, -0.57885968, -0.94064699,  0.50650222,
        1.95365143, -0.94064699,  0.50650222, -1.66422159,  0.14471492])

In [119]:
(A - np.mean(A, 0)) / np.std(A, 0)

array([[ 0.86828953,  0.65207831, -0.48795004, -0.26726124],
       [ 0.14471492, -1.06391725, -0.48795004, -0.26726124],
       [-0.57885968, -0.37751902, -1.46385011, -0.26726124],
       [-0.94064699, -0.72071813,  0.48795004, -0.26726124],
       [ 0.50650222,  0.99527742, -0.48795004, -0.26726124],
       [ 1.95365143,  2.02487476,  2.43975018,  2.40535118],
       [-0.94064699,  0.3088792 , -0.48795004,  1.06904497],
       [ 0.50650222,  0.3088792 ,  0.48795004, -0.26726124],
       [-1.66422159, -1.40711636, -0.48795004, -0.26726124],
       [ 0.14471492, -0.72071813,  0.48795004, -1.60356745]])

## Relational and Comparison OPS

In [121]:
A = np.c_[1:4, 11:14]

In [122]:
A

array([[ 1, 11],
       [ 2, 12],
       [ 3, 13]])

In [124]:
# < > <= >= == !=
A > 10

array([[False,  True],
       [False,  True],
       [False,  True]])

In [127]:
A > np.r_[1, 11]

array([[False, False],
       [ True,  True],
       [ True,  True]])

In [128]:
A > np.r_[1, 11, 100]

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

In [142]:
# np.all
# np.any

In [130]:
np.all(A == A)

True

In [131]:
np.any(A > 10)

True

In [132]:
np.sum(A > 10)

3

In [135]:
x = np.random.rand(10)
x

array([0.35591487, 0.76254781, 0.59317692, 0.6917018 , 0.15112745,
       0.39887629, 0.2408559 , 0.34345601, 0.51312815, 0.66662455])

In [141]:
# [.25, .75]
np.mean((x >= .25) & (x <= .75))

0.7

## Indexing

In [147]:
# scalar indexing
x = [1, 33, 44, 5]
x[2]

44

In [148]:
# slice indexing
x[:3]

[1, 33, 44]

In [149]:
x[::-1]

[5, 44, 33, 1]

In [150]:
x2 = np.r_[1, 33, 44, 5]

In [152]:
x2[2]

44

In [153]:
x2[:3]

array([ 1, 33, 44])

1. scalar-based indexing  
2. slice-based indexing
3. integer_vector-based indexing
4. boolean_vector-based indexing

In [156]:
x2[[1, 0, 1, 3]]

array([33,  1, 33,  5])

In [157]:
x2[[True, True, False, False]]

array([ 1, 33])

In [158]:
x2[[0, -1]]

array([1, 5])

In [159]:
x2[0]

1

In [160]:
x2[[0]]

array([1])

In [161]:
x2[ x2 > 2]

array([33, 44,  5])

In [163]:
x2[ (x2 > 2) & (x2 < 40)]

array([33,  5])

In [167]:
x = np.round( np.random.normal(size = 20), 2 )

In [168]:
x

array([-0.43,  1.24, -0.74,  0.5 ,  1.01,  0.28, -1.37, -0.33,  1.96,
       -2.03, -0.28, -0.55,  0.12,  0.75,  1.61, -0.27,  0.81,  0.5 ,
        0.47, -0.56])

In [169]:
# print all values in [-2, -1] U [1, 2]

In [170]:
x[ ( (x >= -2) & (x <= -1) ) | ( (x >= 1) & (x <= 2) ) ]

array([ 1.24,  1.01, -1.37,  1.96,  1.61])