# NumPy Basics

In [1]:
import numpy as np

In [2]:
arr = np.arange(1_000_000)
l0 = list(range(1_000_000))

In [3]:
%%timeit

2 * arr

814 µs ± 60 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [4]:
%%timeit

[2 * x for x in l0]

44.4 ms ± 931 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


## The NumPy ndarray

In [5]:
data = np.array([[1.5, -0.1, 3], [0, -3, 6.5]])

In [6]:
data

array([[ 1.5, -0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [7]:
10 * data

array([[ 15.,  -1.,  30.],
       [  0., -30.,  65.]])

In [8]:
data + data

array([[ 3. , -0.2,  6. ],
       [ 0. , -6. , 13. ]])

In [9]:
data * data

array([[2.250e+00, 1.000e-02, 9.000e+00],
       [0.000e+00, 9.000e+00, 4.225e+01]])

In [10]:
data.shape

(2, 3)

In [11]:
data.dtype

dtype('float64')

In [12]:
l0 = [6, 7.5, 8, 0, 1]
arr0 = np.array(l0)

In [13]:
arr0

array([6. , 7.5, 8. , 0. , 1. ])

In [16]:
l1 = [[0],[1],[3]]
arr1 = np.array(l1)

In [17]:
arr1

array([[0],
       [1],
       [3]])

In [19]:
arr1.shape

(3, 1)

In [20]:
arr1.ndim

2

In [21]:
arr1.dtype

dtype('int64')

In [22]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [23]:
np.zeros((3, 6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [24]:
np.empty((3, 6))

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [25]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [26]:
np.arange(15, dtype=np.float64)

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14.])

### Exercise

1. Create an array with 10 rows and 5 columns of all ones.
2. Create an array with 10 rows and 5 columns of all negative twos.
3. Add, subtract, multiple, and divide the arrays.

In [37]:
a = np.ones((10, 5))

In [32]:
np.zeros((10, 10)) - 2

array([[-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.]])

In [33]:
np.ones((10, 10)) * -2

array([[-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2., -2., -2., -2., -2., -2.]])

In [36]:
b = np.full((10, 5), -2)

In [38]:
a + b

array([[-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.],
       [-1., -1., -1., -1., -1.]])

In [39]:
a - b

array([[3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.],
       [3., 3., 3., 3., 3.]])

In [40]:
a * b

array([[-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.],
       [-2., -2., -2., -2., -2.]])

In [41]:
a / b

array([[-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5],
       [-0.5, -0.5, -0.5, -0.5, -0.5]])

### Exercise:

1. Use the `np.identity` function to create a (10, 10) identity matrix.

In [43]:
np.identity(10)

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])

In [47]:
np.diag(2 * np.ones(10))

array([[2., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 2., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 2., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 2., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 2., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 2., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 2., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 2., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 2., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 2.]])

In [48]:
np.diag(np.arange(10))

array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 2, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 3, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 4, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 5, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 6, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 7, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 8, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 9]])

In [50]:
a = np.random.rand(5, 5)
a

array([[0.97595782, 0.27497745, 0.54239295, 0.86178038, 0.86017728],
       [0.78807208, 0.42353646, 0.22360107, 0.9254719 , 0.92785037],
       [0.01082887, 0.3516742 , 0.90154295, 0.73910616, 0.59498355],
       [0.83661847, 0.20842367, 0.51438382, 0.05103325, 0.40940692],
       [0.53669719, 0.0762167 , 0.91874884, 0.25799008, 0.49646022]])

In [51]:
np.diag(a)

array([0.97595782, 0.42353646, 0.90154295, 0.05103325, 0.49646022])

In [52]:
np.diag(np.diag(a))

array([[0.97595782, 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.42353646, 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.90154295, 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.05103325, 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.49646022]])

In [60]:
arr4 = np.array(
    [[1.2, 3, 4.5, 6],
     [3.3, -4.5, 6, 7.7],
     [2.3, 4.5, 6.8, 9]],
    dtype=np.int8)

In [61]:
arr4.dtype

dtype('int8')

In [63]:
arr4

array([[ 1,  3,  4,  6],
       [ 3, -4,  6,  7],
       [ 2,  4,  6,  9]], dtype=int8)

In [64]:
arr5 = np.array([[1., 2., 3.], [4., 5., 6.]])
arr5

array([[1., 2., 3.],
       [4., 5., 6.]])

In [65]:
arr5 * arr5

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [66]:
arr5 - arr5

array([[0., 0., 0.],
       [0., 0., 0.]])

In [67]:
1 / arr5

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [68]:
arr5**2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [69]:
arr6 = np.array([[0., 4., 1.], [7., 2., 12.]])

In [70]:
arr6

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [71]:
arr5 > arr6

array([[ True, False,  True],
       [False,  True, False]])

In [72]:
arr5.shape

(2, 3)

In [73]:
arr6.shape

(2, 3)

In [76]:
arr7 = np.arange(10, 20, 1)

In [77]:
arr7

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [78]:
arr7[3]

13

In [79]:
arr7[5:8]

array([15, 16, 17])

In [80]:
arr7[5:8] = 12

In [81]:
arr7

array([10, 11, 12, 13, 14, 12, 12, 12, 18, 19])

In [82]:
arr7[5:8] = np.array([42,41,50])

In [83]:
arr7

array([10, 11, 12, 13, 14, 42, 41, 50, 18, 19])

In [84]:
arr6

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [85]:
arr6[0, 1]

4.0

In [86]:
arr6[0][1]

4.0

In [89]:
arr8 = np.array([[1.3, 2.3, 4.5], [3.2, 4.4, 6.7], [2.2, 1., 4.9]])

In [90]:
arr8

array([[1.3, 2.3, 4.5],
       [3.2, 4.4, 6.7],
       [2.2, 1. , 4.9]])

In [93]:
arr8[1:, 1:]

array([[4.4, 6.7],
       [1. , 4.9]])

In [95]:
arr9 = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])
arr9

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [97]:
arr9[1]

array([[ 7,  8,  9],
       [10, 11, 12]])

In [98]:
arr9.shape

(2, 2, 3)

In [99]:
arr9

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [101]:
arr8

array([[1.3, 2.3, 4.5],
       [3.2, 4.4, 6.7],
       [2.2, 1. , 4.9]])

In [103]:
r0 = arr8[2]
r0.shape

(3,)

In [105]:
r0

array([2.2, 1. , 4.9])

In [104]:
r1 = arr8[2, :]
r1.shape

(3,)

In [106]:
r1

array([2.2, 1. , 4.9])

In [107]:
r2 = arr8[2:, :]
r2.shape

(1, 3)

In [108]:
r2

array([[2.2, 1. , 4.9]])

In [109]:
names = np.array([
    "Bob",
    "Joe",
    "Ahmed",
    "Sara",
    "Yasmeen",
    "David"
])

data = np.array([[4, 7], [0, 2], [-5, 6], [0, 0], [-12, -4], [3, 4]])

In [110]:
names.shape

(6,)

In [111]:
data.shape

(6, 2)

In [112]:
names == "David"

array([False, False, False, False, False,  True])

In [113]:
data[names == "David"]

array([[3, 4]])

In [114]:
data[names == "David", 1]

array([4])

In [115]:
data[names != "David"]

array([[  4,   7],
       [  0,   2],
       [ -5,   6],
       [  0,   0],
       [-12,  -4]])

In [116]:
data[~(names == "David")]

array([[  4,   7],
       [  0,   2],
       [ -5,   6],
       [  0,   0],
       [-12,  -4]])

In [117]:
mask = (names == "David") | (names == "Ahmed")
data[mask]

array([[-5,  6],
       [ 3,  4]])

In [118]:
mask = (names == "David") | (names == "Ahmed")
data[mask] = 12

In [119]:
data

array([[  4,   7],
       [  0,   2],
       [ 12,  12],
       [  0,   0],
       [-12,  -4],
       [ 12,  12]])

In [126]:
arr10 = np.arange(15).reshape((3, 5))

In [127]:
arr10

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [128]:
arr10.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [130]:
arr10.T, arr10

(array([[ 0,  5, 10],
        [ 1,  6, 11],
        [ 2,  7, 12],
        [ 3,  8, 13],
        [ 4,  9, 14]]),
 array([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]]))

In [131]:
np.dot(arr10.T, arr10)

array([[125, 140, 155, 170, 185],
       [140, 158, 176, 194, 212],
       [155, 176, 197, 218, 239],
       [170, 194, 218, 242, 266],
       [185, 212, 239, 266, 293]])

In [132]:
np.dot(arr10, arr10.T)

array([[ 30,  80, 130],
       [ 80, 255, 430],
       [130, 430, 730]])

In [133]:
arr10 @ arr10.T

array([[ 30,  80, 130],
       [ 80, 255, 430],
       [130, 430, 730]])

### Exercise:

1. Create an array with shape (10, 2) of all ones called `X`
2. Replace the data in column 1 with 10 integers starting from 15.
3. Create a (10, 1) array called `y` of a mix of ones and zeros.
4. Multiply the transpose of `X` with `X`. What is the shape of the result?
5. Multiply the tranpose of `X` and `y`. What is the shape of the result? 

In [134]:
number_training_samples = 10
number_features = 2
X = np.ones((number_training_samples, number_features))

In [136]:
X.shape

(10, 2)

In [139]:
X[:, 1] = np.arange(15, 25)

In [140]:
X

array([[ 1., 15.],
       [ 1., 16.],
       [ 1., 17.],
       [ 1., 18.],
       [ 1., 19.],
       [ 1., 20.],
       [ 1., 21.],
       [ 1., 22.],
       [ 1., 23.],
       [ 1., 24.]])

In [142]:
y = np.ones((number_training_samples, 1))
y[[1,3,4,7,9], 0] = 0.0 

In [143]:
y

array([[1.],
       [0.],
       [1.],
       [0.],
       [0.],
       [1.],
       [1.],
       [0.],
       [1.],
       [0.]])

In [144]:
X.T @ X

array([[  10.,  195.],
       [ 195., 3885.]])

In [145]:
X.T @ y

array([[ 5.],
       [96.]])

## Pseudo-random Number Generation

In [146]:
np.random.standard_normal(size=(10, 2))

array([[-0.653594  , -0.20929885],
       [ 0.76119494,  0.72695216],
       [ 0.81888788,  0.11856993],
       [ 0.69593639,  1.3483651 ],
       [-0.72681905,  0.09604754],
       [-0.25999652,  1.75676738],
       [ 1.3289482 ,  0.1349885 ],
       [ 0.12296749, -0.18047462],
       [-0.39369667, -0.45487637],
       [ 0.67294238,  1.10185892]])

In [147]:
import random

In [151]:
%%timeit

xs = [random.normalvariate(0, 1) for _ in range(1_000_000)]

741 ms ± 175 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [153]:
%%timeit

xs = np.random.standard_normal(1_000_000)

28.2 ms ± 6.14 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [162]:
prng = np.random.RandomState(seed=43)

In [163]:
prng.standard_normal((10, 2))

array([[ 0.25739993, -0.90848143],
       [-0.37850311, -0.5349156 ],
       [ 0.85807335, -0.41300998],
       [ 0.49818858,  2.01019925],
       [ 1.26286154, -0.43921486],
       [-0.34643789,  0.45531966],
       [-1.66866271, -0.8620855 ],
       [ 0.49291085, -0.1243134 ],
       [ 1.93513629, -0.61844265],
       [-1.04683899, -0.88961759]])

In [164]:
prng.standard_normal((10, 2))

array([[ 0.01404054, -0.16082969],
       [ 2.23035965, -0.39911572],
       [ 0.05444456,  0.88418182],
       [-0.10798056,  0.55560698],
       [ 0.39490664,  0.83720502],
       [-1.40787817,  0.80784941],
       [-0.13828364,  0.18717859],
       [-0.38665814,  1.65904873],
       [-2.04706913,  1.39931699],
       [-0.67900712,  1.52898513]])

### Exercise:

Suppose that you have an dataset with 60,000 rows and 784 columns.

1. Create an array `X` with shape (n_rows, n_cols + 1) of all random numbers from a standard normal distribution.
2. Replace the data in column 0 with all ones.
3. Create a (n_rows, 10) array called `y` of all zeros.
4. For each row, randomly choose one of the columns and replace the zero in that column with a one.  
5. Multiply the transpose of `X` with `X`. Predict the shape of the result. Where you correct?
6. Multiply the tranpose of `X` and `y`. Predict the shape of the result. Where you correct? 

In [183]:
X = np.random.standard_normal((60_000, 784 + 1))

In [184]:
X[:, 0] = 1

In [185]:
X[:5, :]

array([[ 1.        , -0.53408547, -0.55211385, ..., -0.06100026,
         0.18866716,  0.08056619],
       [ 1.        , -1.81482909, -0.14988385, ...,  0.11919851,
        -1.70852472, -0.55993987],
       [ 1.        ,  0.29413733, -1.42779162, ..., -0.38712699,
         0.49318989,  0.66780622],
       [ 1.        , -0.50935977,  1.21131769, ...,  1.09050486,
         1.59023014,  1.93966869],
       [ 1.        , -1.15355485,  0.24542612, ...,  0.97528057,
         1.35691962, -1.20631388]])

In [186]:
y = np.zeros((60_000, 10))

In [192]:
random_column_idxs = np.random.randint(low=0, high=10, size=60_000)

In [193]:
random_column_idxs[:5]

array([2, 3, 9, 1, 3])

In [191]:
row_idxs = np.arange(60_000)

In [194]:
row_idxs[:5]

array([0, 1, 2, 3, 4])

In [196]:
y[row_idxs, random_column_idxs] = 1

In [197]:
y[:10, :]

array([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.]])

In [198]:
result = X.T @ X

In [199]:
result.shape

(785, 785)

In [200]:
result2 = X.T @ y

In [201]:
result2.shape

(785, 10)