In [1]:
import numpy as np

In [4]:
x = np.array([3, 4, 5])
y = np.array([4, 9, 7])
x, y

(array([3, 4, 5]), array([4, 9, 7]))

In [5]:
x + y

array([ 7, 13, 12])

In [6]:
x = np.array([[1, 2], [3, 4]])
x

array([[1, 2],
       [3, 4]])

In [7]:
x.ndim

2

In [8]:
x.dtype

dtype('int64')

In [9]:
np.array([[1, 2], [3.0, 4]]).dtype

dtype('float64')

In [None]:
np.array?

In [12]:
np.array([[1, 2], [3, 4]], float).dtype

dtype('float64')

In [13]:
x.shape

(2, 2)

In [15]:
# Given an array x, the expression x.sum() sums all of its elements, using the sum() method for arrays.
# The call x.sum() automatically provides x as the first argument to its sum() method
x = np.array([1, 2, 3, 4])
x.sum()

np.int64(10)

In [16]:
# We could also sum the elements of x by passing in x as an argument to the np.sum() function
x = np.array([1, 2, 3, 4])
np.sum(x)

np.int64(10)

In [18]:
# The reshape() methor returns a new array with the same elements as x, but a different shape. We do this by passing in a tuple()
# in our call to reshape(), in this case(2, 3). This tuple specifies that we would like to create a two-dimensional array with 2 rows and 3 columns.
x = np.array([1, 2, 3, 4, 5, 6])
print('beginning x:\n', x)
x_reshape = x.reshape((2, 3))
print('reshaped x:\n', x_reshape)

beginning x:
 [1 2 3 4 5 6]
reshaped x:
 [[1 2 3]
 [4 5 6]]


In [19]:
# The previous output reveals that numpy arrays are specified as a sequence of rows. This is called row-major ordering as opposed to column-major ordering.
# Python (and hence numpy) uses 0-based indexing. This means that to access the top left element of x_reshape, we type in x_reshape[0, 0]

x_reshape[0, 0]

np.int64(1)

In [20]:
x_reshape[1, 2]

np.int64(6)

In [21]:
x[2]

np.int64(3)

In [22]:
# Now let's modify the top left element of x_reshape. To our surprise, we discover that the first element of x has been modified as well!
print('x before we modify x_reshape :\n', x)
print('x_reshape before we modify x _reshape :\n', x_reshape)
x_reshape[0, 0] = 5
print('x_reshape after we modify its top left element :\n', x_reshape)
print('x after we modify top left element of x_reshape :\n', x)

x before we modify x_reshape :
 [1 2 3 4 5 6]
x_reshape before we modify x _reshape :
 [[1 2 3]
 [4 5 6]]
x_reshape after we modify its top left element :
 [[5 2 3]
 [4 5 6]]
x after we modify top left element of x_reshape :
 [5 2 3 4 5 6]


Modifying x_reshape also modified x because the two objects occupy the same space in memory.

In [23]:
# We just saw that we can modify an element of an array. Can we also modify a tuple? It turns out that we cannot, and trying to do so introduces
# an exception, or error
my_tuple = (3, 4, 5)
my_tuple[0] = 2

TypeError: 'tuple' object does not support item assignment

In [24]:
# We now briefly mention some attributes of arrays that will come in handy.
# An array's shape attribute contains its dimension; this is always a tuple.
# The ndim attribute yields the number of dimensions, and T provides its transpose.
x_reshape.shape, x_reshape.ndim, x_reshape.T

((2, 3),
 2,
 array([[5, 4],
        [2, 5],
        [3, 6]]))

Notice that the three individual outputs (2, 3), 2, and array ([[5, 4], [2, 5], [3, 6]]) are themselves output as a tuple.

In [25]:
# We will often want to apply functions to arrays. For instance, we can compute the square root of the entries using the np.sqrt() function
np.sqrt(x)

array([2.23606798, 1.41421356, 1.73205081, 2.        , 2.23606798,
       2.44948974])

In [26]:
# We can also square the elements
x ** 2

array([25,  4,  9, 16, 25, 36])

In [27]:
# We can compute the square roots using the same notation, raising to the power of 1/2 instead of 2
x ** 0.5

array([2.23606798, 1.41421356, 1.73205081, 2.        , 2.23606798,
       2.44948974])

Throughout this book, we will often want to generate random data. The np.random.normal() function generates a vector of random normal variables.
We can learn more about this function by looking at the help page, via a call to np.random.normal?. The first line of the help page reads
normal(loc=0.0, scale=1.0, size=None). This signature line tells us that the function's arguments are loc, scale, and size. These are keyword arguments, which means that when they are passed into the function, they can be referred to by name(in any order). By default, this function will generate random normal variables(s) with mean (loc) 0 and standard deviation (scale) 1; furthermore, a single random variable will be generated unless the argument to size is changed.

In [30]:
np.random.normal?

[31mSignature:[39m np.random.normal(loc=[32m0.0[39m, scale=[32m1.0[39m, size=[38;5;28;01mNone[39;00m)
[31mDocstring:[39m
normal(loc=0.0, scale=1.0, size=None)

Draw random samples from a normal (Gaussian) distribution.

The probability density function of the normal distribution, first
derived by De Moivre and 200 years later by both Gauss and Laplace
independently [2]_, is often called the bell curve because of
its characteristic shape (see the example below).

The normal distributions occurs often in nature.  For example, it
describes the commonly occurring distribution of samples influenced
by a large number of tiny, random disturbances, each with its own
unique distribution [2]_.

.. note::
    New code should use the `~numpy.random.Generator.normal`
    method of a `~numpy.random.Generator` instance instead;
    please see the :ref:`random-quick-start`.

Parameters
----------
loc : float or array_like of floats
    Mean ("centre") of the distribution.
scale : float or ar

In [28]:
# We now generate 50 independent random variables from a N(0, 1) distribution.
x = np.random.normal(size=50)
x

array([ 0.15784199,  0.61440991, -0.46145374,  0.76589359,  1.27846699,
        0.87160691,  1.80805911,  0.14929493,  1.44120734,  0.36891996,
       -1.45203511,  0.63879341,  2.24516684, -0.58279896,  0.71151663,
        0.69516101, -1.59291061, -0.05646906, -1.03213808, -0.83431407,
        0.31770512,  0.29635831,  0.19870698, -1.28265366,  1.01833666,
       -0.96082045, -0.82273362, -0.61217834,  1.23326161,  0.33948569,
       -0.07762703,  0.13493198,  0.90365006,  0.10651485, -0.58043953,
        0.88133643,  1.6451784 , -0.78124208, -0.2879252 ,  1.80371887,
        1.09104249, -0.40492281,  0.71394905, -1.1531886 ,  1.09884811,
        0.35270909, -1.65125357, -1.3797584 ,  2.35105616,  0.87378915])

In [32]:
# We can create an array y by adding an independent N(50, 1) random variable to each element of x.
y = x + np.random.normal(loc=50, scale=1, size=50)
y

array([51.17895926, 52.37730938, 48.17907565, 49.96613584, 50.75692283,
       50.31008436, 51.21788366, 50.0263998 , 51.82795643, 49.90950196,
       48.35737219, 50.06962673, 51.63904863, 49.45011702, 51.18076463,
       50.83874348, 48.86168431, 48.92119605, 48.54393659, 49.14227134,
       51.74539213, 50.81333886, 48.8859305 , 47.94903994, 50.05427811,
       49.3848503 , 49.95350952, 50.02572632, 52.63628762, 50.18964537,
       48.61751546, 50.06965699, 51.1110394 , 49.50134099, 49.53738258,
       51.2696458 , 51.12163077, 48.8463265 , 48.188902  , 51.96018552,
       52.65354216, 50.08307034, 50.50227404, 49.74102616, 53.63562952,
       50.29694142, 49.48515677, 49.1798307 , 53.02524876, 50.76985564])

In [33]:
# The np.corrcoef() function computes the correlation matrix between x and y. The off-diagonal elements give the correlation between x and y.
np.corrcoef(x, y)

array([[1.        , 0.76896827],
       [0.76896827, 1.        ]])

In [34]:
#Each time we call np.random.normal(), we will get a different answer, as shown in the following example.
print(np.random.normal(scale=5, size=2))
print(np.random.normal(scale=5, size=2))

[ 0.03914093 -2.85265776]
[0.4698382  7.58329415]


In [35]:
#In order to ensure that our code provides the same results each time it is run, we can set a random seed using the np.random.default_rng() function.
#This function takes an aritrary, user-specified interger argument.
#If we set a random seed before generating random data, then re-running our code will yield the same results. The object rng has essentially all the 
#random number generating methods found in np.random. Hence, to generate normal data we use rng.normal().
rng = np.random.default_rng(1303)
print(rng.normal(scale=5, size=2))
rng2 = np.random.default_rng(1303)
print(rng2.normal(scale=5, size=2))

[ 4.09482632 -1.07485605]
[ 4.09482632 -1.07485605]


Throughout the labs, we use np.random.default_rng() whenever we perform calculations involving random quantities with numpy

In [36]:
# The np.mean(), np.var(), and np.std() functions can be used to compute the mean, variance, and standard deviation of arrays. These functions are
#also available as methods on the arrays.
rng = np.random.default_rng(3)
y = rng.standard_normal(10)
np.mean(y), y.mean()

(np.float64(-0.1126795190952861), np.float64(-0.1126795190952861))

In [38]:
np.var(y), y.var(), np.mean((y - y.mean()) ** 2)

(np.float64(2.7243406406465125),
 np.float64(2.7243406406465125),
 np.float64(2.7243406406465125))

Notice that by default np.var() divides by the sample size n rather than n - 1; see the ddof argument in np.var?

In [39]:
np.var?

[31mSignature:[39m      
np.var(
    a,
    axis=[38;5;28;01mNone[39;00m,
    dtype=[38;5;28;01mNone[39;00m,
    out=[38;5;28;01mNone[39;00m,
    ddof=[32m0[39m,
    keepdims=<no value>,
    *,
    where=<no value>,
    mean=<no value>,
    correction=<no value>,
)
[31mCall signature:[39m  np.var(*args, **kwargs)
[31mType:[39m            _ArrayFunctionDispatcher
[31mString form:[39m     <function var at 0x000001B3C9D707C0>
[31mFile:[39m            c:\users\anthonny.paz\documents\github\ml-projects\ml_proj\lib\site-packages\numpy\_core\fromnumeric.py
[31mDocstring:[39m      
Compute the variance along the specified axis.

Returns the variance of the array elements, a measure of the spread of a
distribution.  The variance is computed for the flattened array by
default, otherwise over the specified axis.

Parameters
----------
a : array_like
    Array containing numbers whose variance is desired.  If `a` is not an
    array, a conversion is attempted.
axis : None or in

In [40]:
np.sqrt(np.var(y)), np.std(y)

(np.float64(1.6505576756498128), np.float64(1.6505576756498128))

In [42]:
# The np.mean(), np.var(), and np.std() functions can also be applied to the rows and columns of a matrix. To see this, we construct a 10 x 3 matrix of
#N(0, 1) random variables, and consider computing its row sums.
X = rng.standard_normal((10, 3))
X

array([[ 0.22578661, -0.35263079, -0.28128742],
       [-0.66804635, -1.05515055, -0.39080098],
       [ 0.48194539, -0.23855361,  0.9577587 ],
       [-0.19980213,  0.02425957,  1.54582085],
       [ 0.54510552, -0.50522874, -0.18283897],
       [ 0.54052513,  1.93508803, -0.26962033],
       [-0.24355868,  1.0023136 , -0.88645994],
       [-0.29172023,  0.88253897,  0.58035002],
       [ 0.0915167 ,  0.67010435, -2.82816231],
       [ 1.02130682, -0.95964476, -1.66861984]])

In [44]:
# Since arrays are row-major ordered, the first axis, i.e. axis = 0, refers to its rows. We pass this argument into the mean() method for the object X.
X.mean(axis=0)

array([ 0.15030588,  0.14030961, -0.34238602])

In [46]:
# The following yields the same result.
X.mean(0)

array([ 0.15030588,  0.14030961, -0.34238602])