## Learning Numpy

In [1]:
import numpy as np # importing numpy

### Getting in shape

In [2]:
temperatures = np.array([
    # sample data for reshaping the array
    29.3, 42.1, 18.8, 16.1, 38.0, 12.5,
   12.6, 49.9, 38.6, 31.3, 9.2, 22.2
]).reshape(2,2,3)

In [3]:
temperatures.shape # printing the shape of array.

(2, 2, 3)

In [4]:
temperatures

array([[[29.3, 42.1, 18.8],
        [16.1, 38. , 12.5]],

       [[12.6, 49.9, 38.6],
        [31.3,  9.2, 22.2]]])

In [5]:
np.swapaxes(temperatures, 1, 2) # transpose

array([[[29.3, 16.1],
        [42.1, 38. ],
        [18.8, 12.5]],

       [[12.6, 31.3],
        [49.9,  9.2],
        [38.6, 22.2]]])

### Broadcasting 

In [6]:
table = np.array([
    [5, 3, 7, 1],
    [2, 6, 7 ,9],
    [1, 1, 1, 1],
    [4, 3, 2, 0],
])

By default `.max()` returns the greatest value in the array no matter how many dimesnions are there.
However if you specify the axis it'll do the calculations for the elements lying on that axis.

For example, with an argument of axis=0, `.max()`selects the maximum value in each of the four vertical sets of values in table and returns an array that has been **flattened**, or aggregated into a one-dimensional array.

In [7]:
table.max()

9

In [8]:
table.max(axis=0)

array([5, 6, 7, 9])

In [9]:
table.max(axis=1)

array([7, 9, 1, 4])

In [10]:
A = np.arange(32).reshape(4,1,8)

`A` has 4 planes with 1 row and 8 columns.

The number of elements in A can be calculated as 
$4×1×8$ = 32

In [11]:
A

array([[[ 0,  1,  2,  3,  4,  5,  6,  7]],

       [[ 8,  9, 10, 11, 12, 13, 14, 15]],

       [[16, 17, 18, 19, 20, 21, 22, 23]],

       [[24, 25, 26, 27, 28, 29, 30, 31]]])

In [12]:
B = np.arange(48).reshape(1,6,8)

`B` has 4 planes with 6 rows and 8 columns.
The number of elements in `B` can be calculated as 
$1×6×8$ = 48

In [13]:
B

array([[[ 0,  1,  2,  3,  4,  5,  6,  7],
        [ 8,  9, 10, 11, 12, 13, 14, 15],
        [16, 17, 18, 19, 20, 21, 22, 23],
        [24, 25, 26, 27, 28, 29, 30, 31],
        [32, 33, 34, 35, 36, 37, 38, 39],
        [40, 41, 42, 43, 44, 45, 46, 47]]])

In [14]:
A + B # adding both arrays

array([[[ 0,  2,  4,  6,  8, 10, 12, 14],
        [ 8, 10, 12, 14, 16, 18, 20, 22],
        [16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54]],

       [[ 8, 10, 12, 14, 16, 18, 20, 22],
        [16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62]],

       [[16, 18, 20, 22, 24, 26, 28, 30],
        [24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62],
        [56, 58, 60, 62, 64, 66, 68, 70]],

       [[24, 26, 28, 30, 32, 34, 36, 38],
        [32, 34, 36, 38, 40, 42, 44, 46],
        [40, 42, 44, 46, 48, 50, 52, 54],
        [48, 50, 52, 54, 56, 58, 60, 62],
        [56, 58, 60, 62, 64, 66, 68, 70],
        [64, 66, 68, 70, 72,

The way broadcasting works is that NumPy duplicates the plane in B three times so that you have a total of four, matching the number of planes in A. It also duplicates the single row in A five times for a total of six, matching the number of rows in B. Then it adds each element in the newly expanded A array to its counterpart in the same location in B. The result of each calculation shows up in the corresponding location of the output.

### Data Science Operations 

#### Indexing

In [15]:
square = np.array([
    [16, 3, 2, 13],
    [5, 10, 11, 8],
    [9, 6, 7, 12],
    [4, 15, 14, 1]
])

In [16]:
for i in range(4):
    assert square[:, i].sum() == 34
    assert square[i, :].sum() == 34

In [17]:
assert square[:2, :2].sum() == 34
assert square[:2, 2:].sum() == 34
assert square[:2, :2].sum() == 34
assert square[2:, 2:].sum() == 34

#### Masking

In [18]:
numbers = np.linspace(
    5, 50, 24, dtype=int).reshape(4, -1)

In [19]:
numbers

array([[ 5,  6,  8, 10, 12, 14],
       [16, 18, 20, 22, 24, 26],
       [28, 30, 32, 34, 36, 38],
       [40, 42, 44, 46, 48, 50]])

In [20]:
mask = numbers % 4 == 0

In [21]:
mask

array([[False, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False],
       [ True, False,  True, False,  True, False]])

In [22]:
numbers[mask]

array([ 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48])

In [23]:
# alternate one liner 
by_four = numbers[numbers % 4 == 0]

In [24]:
by_four

array([ 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48])

[Normal distribution](https://en.wikipedia.org/wiki/Normal_distribution)

In [25]:
from numpy.random import default_rng 

In [26]:
rng = default_rng()

In [27]:
values = rng.standard_normal(10000)

In [28]:
values[:5]

array([ 1.59598261,  1.75304294, -1.10507866, -0.64407664, -0.44981403])

In [29]:
std = values.std()

In [30]:
std

1.006274644390239

In [31]:
mask = (values > -2 * std) & (values < 2 * std)

In [32]:
filtered = values[mask]

In [33]:
filtered.size

9565

In [34]:
values.size

10000

In [35]:
(filtered.size / values.size) * 100

95.65

#### Transposition 