# Numpy Basics

In [1]:
import numpy as np

In [2]:
arr = np.array([-1, 2, 5], dtype=np.float32)
print(repr(arr))

array([-1.,  2.,  5.], dtype=float32)


Numpy arrays can take any n-d dimensions

In [3]:
arr = np.array([[0, 1, 2], [3, 4, 5]], dtype=np.float32)
print(repr(arr))

array([[0., 1., 2.],
       [3., 4., 5.]], dtype=float32)


If numpy array has mixed types, the array's type will be upcast to the highest level type. i.e. int -> float. And numbers -> strings

In [4]:
arr = np.array(["Hayford", 2, 3.0])
print(repr(arr))

array(['Hayford', '2', '3.0'], dtype='<U32')


Numpy arrays are mutable. To create a new object, _copy_ is used.

In [5]:
a = np.array([0, 1])
b = np.array([9, 8])
c = a
print(f"Array a :{repr(a)}")
c[0] = 5
print(f"Array a: {repr(a)}")

d = b.copy()
d[0] = 6
print(f"Array b: {repr(b)}")

Array a :array([0, 1])
Array a: array([5, 1])
Array b: array([9, 8])


Casting of an array is done through __*astype*__ function. 

In [6]:
arr = np.array([0, 1, 2])
print(arr.dtype)

arr = arr.astype(np.float32)
print(arr.dtype)

int64
float32


To indicate that an index contains no particular value, we use **np.nan** as a placeholder.

In [7]:
arr = np.array([np.nan, 1, 2])
print(repr(arr))
arr = np.array([np.nan, "abc"])
print(repr(arr))

array([nan,  1.,  2.])
array(['nan', 'abc'], dtype='<U32')


If **np.nan** is used in addition to other types, the dtype specified matters

In [8]:
# np.array([np.nan, 1, 2], dtype=np.int32)  # --> cannot convert float NaN to integer
arr = np.array([np.nan, 1, 2], dtype=np.float32)
print(repr(arr))

array([nan,  1.,  2.], dtype=float32)


To represent a huge number, **np.inf** is used

In [9]:
print(np.inf > 1000000000)

arr = np.array([np.inf, 5])
print(repr(arr))

arr = np.array([-np.inf, 1])
print(repr(arr))

True
array([inf,  5.])
array([-inf,   1.])


np.inf is of type float.   
The following will cause an error if uncommented out

In [10]:
# np.array([np.inf, 3], dtype=np.int32) # cannot convert float infinity to integer

Similar to range, there is **np.arange** that creates a new array with the values as the range specified

In [11]:
arr = np.arange(5)
print(repr(arr))

arr = np.arange(5.1)
print(repr(arr))

arr = np.arange(-1, 4, dtype=np.float32)
print(repr(arr))

arr = np.arange(-1.5, 4, 2)
print(repr(arr))

array([0, 1, 2, 3, 4])
array([0., 1., 2., 3., 4., 5.])
array([-1.,  0.,  1.,  2.,  3.], dtype=float32)
array([-1.5,  0.5,  2.5])


**np.linspace** is used to specify the number of elements in the returned array, rather than the step size. Required arguments are: start and end (inclusive). There is optional argument endpoint that if set to *False*, the end is not inclusive. To specify the number of elements, we set *num* keyword argument (has default value 50). 

In [12]:
arr = np.linspace(5, 11)
print(repr(arr))

arr = np.linspace(5, 11, num=4)
print(repr(arr))

arr = np.linspace(5, 11, num=4, endpoint=False)
print(repr(arr))

arr = np.linspace(5, 11, num=4)

array([ 5.        ,  5.12244898,  5.24489796,  5.36734694,  5.48979592,
        5.6122449 ,  5.73469388,  5.85714286,  5.97959184,  6.10204082,
        6.2244898 ,  6.34693878,  6.46938776,  6.59183673,  6.71428571,
        6.83673469,  6.95918367,  7.08163265,  7.20408163,  7.32653061,
        7.44897959,  7.57142857,  7.69387755,  7.81632653,  7.93877551,
        8.06122449,  8.18367347,  8.30612245,  8.42857143,  8.55102041,
        8.67346939,  8.79591837,  8.91836735,  9.04081633,  9.16326531,
        9.28571429,  9.40816327,  9.53061224,  9.65306122,  9.7755102 ,
        9.89795918, 10.02040816, 10.14285714, 10.26530612, 10.3877551 ,
       10.51020408, 10.63265306, 10.75510204, 10.87755102, 11.        ])
array([ 5.,  7.,  9., 11.])
array([5. , 6.5, 8. , 9.5])


Numpy arrays can also be reshaped to a different dimension using **np.reshape**, which takes in the array to be reshaped and the new dimensions as required arguments. 

In [13]:
arr = np.arange(8)

reshaped_arr = np.reshape(arr, (2, 4))
print(repr(reshaped_arr))
print(f"New shape: {reshaped_arr.shape}")

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])
New shape: (2, 4)


The shape passed in as argument can have -1 in at most one dimension to allow the new shape to contain all the elements of the array;

In [14]:
reshaped_arr = np.reshape(arr, (-1, 2, 2))
print(repr(reshaped_arr))
print(f"New shape: {reshaped_arr.shape}")

array([[[0, 1],
        [2, 3]],

       [[4, 5],
        [6, 7]]])
New shape: (2, 2, 2)


There is also the option to flatten an array to 1D using **flatten** function.

In [15]:
arr = np.reshape(arr, (2, 4))
flattened = arr.flatten()
print(repr(arr))
print(f"arr shape: {arr.shape}")

print(repr(flattened))
print(f"flattened shape: {flattened.shape}")

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])
arr shape: (2, 4)
array([0, 1, 2, 3, 4, 5, 6, 7])
flattened shape: (8,)


There **np.transpose** to convert arr to a different format and dimensions

In [16]:
arr = np.reshape(arr, (4, 2))
transposed = np.transpose(arr)
print(repr(arr))
print(f"arr shape: {arr.shape}")

print(repr(transposed))
print(f"transposed shape: {transposed.shape}")

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])
arr shape: (4, 2)
array([[0, 2, 4, 6],
       [1, 3, 5, 7]])
transposed shape: (2, 4)


**np.transpose** has an optional argument *axes*, which represents the new permutation of the dimensions. The permutation is a list or tuple of integers. 

In [17]:
arr = np.arange(24)
arr = np.reshape(arr, (3 ,4, 2))
transposed = np.transpose(arr, axes=(1, 2, 0))
print(f"arr shape: {arr.shape}")
print(repr(transposed))
print(f"transposed shape: {transposed.shape}")

arr shape: (3, 4, 2)
array([[[ 0,  8, 16],
        [ 1,  9, 17]],

       [[ 2, 10, 18],
        [ 3, 11, 19]],

       [[ 4, 12, 20],
        [ 5, 13, 21]],

       [[ 6, 14, 22],
        [ 7, 15, 23]]])
transposed shape: (4, 2, 3)


Numpy provides the option to have an array fill with solely ones or zeros using **np.zeros** or **np.ones**

In [18]:
arr = np.zeros(4)
print(repr(arr))

arr = np.ones((2, 3))
print(repr(arr))

arr = np.ones((2, 3), dtype=np.int32)
print(repr(arr))

array([0., 0., 0., 0.])
array([[1., 1., 1.],
       [1., 1., 1.]])
array([[1, 1, 1],
       [1, 1, 1]], dtype=int32)


We can also create array with zeros or ones with same shape as another array using **np.zeros_like** or **np.ones_like**

In [19]:
arr = np.array([[1, 2], [3, 4]])
print(repr(np.zeros_like(arr)))

arr = np.array([[0., 1.], [1.2, 4.]])
print(repr(np.ones_like(arr)))
print(repr(np.ones_like(arr, dtype=np.int32)))

array([[0, 0],
       [0, 0]])
array([[1., 1.],
       [1., 1.]])
array([[1, 1],
       [1, 1]], dtype=int32)


## Numpy Math

In [20]:
arr = np.array([[1, 2], [3, 4]])

# Add 1 to every element
print(repr(arr + 1))

# Subtract 1.2 from element values
print(repr(arr - 1.2))

# Double element values 
print(repr(arr * 2))

# Halve element values
print(repr(arr / 2))

# floor or integer division 
print(repr(arr // 2))

# Square each element 
print(repr(arr ** 2))

# square root each element
print(repr(arr ** 0.5))

array([[2, 3],
       [4, 5]])
array([[-0.2,  0.8],
       [ 1.8,  2.8]])
array([[2, 4],
       [6, 8]])
array([[0.5, 1. ],
       [1.5, 2. ]])
array([[0, 1],
       [1, 2]])
array([[ 1,  4],
       [ 9, 16]])
array([[1.        , 1.41421356],
       [1.73205081, 2.        ]])


It is easy to convert huge dataset with only a few operations

In [21]:
def f2c(temps):
  return (5/9) * (temps - 32)

fahrenheits = np.array([32, -4, 14, -40])
celsius = f2c(fahrenheits)
print(f"Celsius: {repr(celsius)}")

Celsius: array([  0., -20., -10., -40.])


NB: Performing arithmetic on numpy arrays *do not change the original array.* A new array is created for the result of the operation. 

## Non-linear functions
e.g. **np.exp** --> performs a base *e* exponential on an array  
     **np.exp2** --> performs a base 2 exponential on an array  
     **np.log** --> performs logarithms using base *e*  
     **np.log2** --> performs logarithms using base 2  
     **np.log10** --> performs logarithms using base 10

In [22]:
arr = np.array([[1, 2], [3, 4]])
print(f"Original arr: {repr(arr)}")

# raise to power of e
print(f"Raising a power of e based on the elements: \n\t\t\t\t{repr(np.exp(arr))}")

print(f"Raising a power of 2 based on the elements: \n\t\t\t\t{repr(np.exp2(arr))}")

arr2 = np.array([[1, 10], [np.e, np.pi]])
print(f"Natural logarithm: \n\t\t\t\t{repr(np.log(arr2))}")
print(f"Base 10 logarithm: \n\t\t\t\t{repr(np.log10(arr2))}")


Original arr: array([[1, 2],
       [3, 4]])
Raising a power of e based on the elements: 
				array([[ 2.71828183,  7.3890561 ],
       [20.08553692, 54.59815003]])
Raising a power of 2 based on the elements: 
				array([[ 2.,  4.],
       [ 8., 16.]])
Natural logarithm: 
				array([[0.        , 2.30258509],
       [1.        , 1.14472989]])
Base 10 logarithm: 
				array([[0.        , 1.        ],
       [0.43429448, 0.49714987]])


**np.power** is used to do a regular power operation with any base. First argument is the base and the second is the power.

In [23]:
arr = np.array([[1, 2], [3, 4]])

print(f"Raise 3 to power of each number in arr: {repr(np.power(3, arr))}")

arr2 = np.array([[10.2, 4], [3, 5]])
print(f"Raise arr2 to power of each number in arr: {repr(np.power(arr2, arr))}")

Raise 3 to power of each number in arr: array([[ 3,  9],
       [27, 81]])
Raise arr2 to power of each number in arr: array([[ 10.2,  16. ],
       [ 27. , 625. ]])


[This documentation](https://numpy.org/doc/stable/reference/routines.math.html) contains a list of the numpy mathematical functions.

## Matrix multiplication

**np.matmul** takes two vector/matrix arrays as input and produces a dot product or matrix multiplication. Dimensions should be valid.

In [24]:
arr1 = np.array([1, 2, 3])
arr2 = np.array([-3, 0, 10])

print(f"Dot product: {np.matmul(arr1, arr2)}")

arr3 = np.array([[1, 2], [3, 4], [5, 6]])
arr4 = np.array([[-1, 0, 1], [3, 2, -4]])
print(f"Matrix multiplication A3 x A4: {repr(np.matmul(arr3, arr4))}")
print(f"Matrix multiplication A4 x A3: {repr(np.matmul(arr4, arr3))}")

# This causes error: size 3 is different from 2
# print(repr(np.matmul(arr3, arr3)))

Dot product: 27
Matrix multiplication A3 x A4: array([[  5,   4,  -7],
       [  9,   8, -13],
       [ 13,  12, -19]])
Matrix multiplication A4 x A3: array([[  4,   4],
       [-11, -10]])


## Random Operations

Numpy has **np.random** which is similar to Python's random module. This is used for pseudo-random number generation and extends to multi-dimensional arrays. 

In [25]:
print(f"Random number generated: {np.random.randint(5)}")
print(f"Another random number generated: {np.random.randint(4, 200)}")

random_arr = np.random.randint(-3, high=14, size=(2, 2))
print(f"Random 2D array: {random_arr}")

Random number generated: 4
Another random number generated: 150
Random 2D array: [[-2 -3]
 [10 12]]


Utility functions from the *np.random* module include **np.random.seed, np.random.shuffle** 

**np.random.seed** is used to set the random seed to control the outputs of the pseudo-random functions. 

In [26]:
print("First seed results:")
np.random.seed(1)
print(f"Random number within [0, 10): {np.random.randint(10)}")

random_arr = np.random.randint(3, high=100, size=(2, 2))
print(f"Random 2-D array: {random_arr}")

print("............................................")
print("Second seed results")
np.random.seed(2)
print(f"Random number with [0, 10): {np.random.randint(10)}")
random_arr = np.random.randint(3, high=100, size=(2, 2))
print(f"Random 2-D array: {random_arr}")

print("............................................")
print("Back to first seed:")
np.random.seed(1)
print(f"Random number within [0, 10): {np.random.randint(10)}")

random_arr = np.random.randint(3, high=100, size=(2, 2))
print(f"Random 2-D array: {random_arr}")


First seed results:
Random number within [0, 10): 5
Random 2-D array: [[15 75]
 [12 78]]
............................................
Second seed results
Random number with [0, 10): 8
Random 2-D array: [[18 75]
 [25 46]]
............................................
Back to first seed:
Random number within [0, 10): 5
Random 2-D array: [[15 75]
 [12 78]]


**np.random.shuffle** allows randomly shuffling of arrays, which is done in place. Also shuffling multi-dimensional array only shuffles the first dimension. 

In [27]:
vec = np.arange(1, 6)
print(f"Original vec: {vec}")
np.random.shuffle(vec)
print(f"Shuffled vec: {vec}")

matrix = np.arange(1, 10)
matrix = matrix.reshape((3, 3))
print(f"Original matrix: {matrix}")
np.random.shuffle(matrix)
print(f"Shuffled matrix: {matrix}")

Original vec: [1 2 3 4 5]
Shuffled vec: [3 4 2 5 1]
Original matrix: [[1 2 3]
 [4 5 6]
 [7 8 9]]
Shuffled matrix: [[7 8 9]
 [1 2 3]
 [4 5 6]]


## Distributions

**np.random** can also draw samples from probability distributions. One example is **np.random.uniform** used to draw samples from a uniform distribution.

In [28]:
print(f"Uniform distribution with no argument: {np.random.uniform()}")
print(f"Uniform distribution from [-1.5, 2.2): {np.random.uniform(low=-1.5, high=2.2)}")
print(f"Uniform distribution with size specified: {np.random.uniform(size=3)}")
print(f"A 2x2 matrix showing uniform distributions from [-3.4, 5.9): {np.random.uniform(low=-3.4, high=5.9, size=(2, 2))}")

Uniform distribution with no argument: 0.39676747423066994
Uniform distribution from [-1.5, 2.2): 0.4936219158124208
Uniform distribution with size specified: [0.41919451 0.6852195  0.20445225]
A 2x2 matrix showing uniform distributions from [-3.4, 5.9): [[ 4.76649216 -3.14529538]
 [ 2.83534784  0.48093466]]


There is the normal (Gaussian) distribution within Numpy module using **np.random.normal**

In [29]:
print(f"Drawing from a normal distribution: {np.random.normal()}")
print(f"Drawing from a normal distribution centered at 1.5: {np.random.normal(loc=1.5, scale=3.5)}")
print(f"Drawing into a matrix: {np.random.normal(loc=-2.4, scale=4.0, size=(2, 2))}")

Drawing from a normal distribution: -1.1103630547363093
Drawing from a normal distribution centered at 1.5: 2.13424993322389
Drawing into a matrix: [[-0.14262054 -4.66604092]
 [ 0.51990239 -0.90802484]]


There is also the choice to sample from your custom distribution with **np.random.choice**

In [30]:
colors = ["red", "blue", "green"]
print(f"Ranodomly choosing a color from colors: {np.random.choice(colors)}")
print(f"Can also choose with size: {np.random.choice(colors, size=2)}")
print(f"Choosing with some probabilities: {np.random.choice(colors, size=(2, 2), p=(0.8, 0.19, 0.01))}")

Ranodomly choosing a color from colors: red
Can also choose with size: ['blue' 'red']
Choosing with some probabilities: [['blue' 'red']
 ['red' 'red']]


## Array Accessing

Numpy arrays can be accessed similar to python lists.

In [31]:
arr = np.arange(1, 6)
print(f"Value at index 0: {arr[0]}")
print(f"Value at index 4: {arr[4]}")

arr = np.array([[6, 3], [0, 2]])
print(f"Row 0: {repr(arr[0])}")

Value at index 0: 1
Value at index 4: 5
Row 0: array([6, 3])


Slicing can also be done on Numpy arrays, including negative indexing.

In [32]:
arr = np.arange(1, 6)
print(f"Shallow copy: {repr(arr[:])}")
print(f"Slicing off first element: {repr(arr[1:])}")
print(f"Other slicing: {arr[-2:]}")

Shallow copy: array([1, 2, 3, 4, 5])
Slicing off first element: array([2, 3, 4, 5])
Other slicing: [4 5]


For multi-dimentional arrays, a comma can be used to separate slices across each dimension

In [33]:
arr = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])
print(f"Shallow copy: {repr(arr[:])}")
print(f"Slicing off row 0: {repr(arr[1:])}")
print("....................................")
print(f"To select last column -> arr[:, -1]: {repr(arr[:, -1])}")
print(f"To select column 2 to end -> arr[:, 1:]: {repr(arr[:, 1:])}")
print(f"arr[0:1, 1:]: {repr(arr[0:1, 1:])}")
print(f"arr[0, 1:]: {repr(arr[0, 1:])}")

Shallow copy: array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])
Slicing off row 0: array([[4, 5, 6],
       [7, 8, 9]])
....................................
To select last column -> arr[:, -1]: array([3, 6, 9])
To select column 2 to end -> arr[:, 1:]: array([[2, 3],
       [5, 6],
       [8, 9]])
arr[0:1, 1:]: array([[2, 3]])
arr[0, 1:]: array([2, 3])


## Argmin & Argmax  

We can use **np.argmin** and **np.argmax** to find the indices of the minimum and maximum elements, respectively.

In [34]:
arr = np.array([[7, -1, 8],
                [4, 5, -8],
                [-1, 9, 1]])
print(f"Index of minimum in row 0: {repr(np.argmin(arr[0]))}")
print(f"Index of maximum in last row: {repr(np.argmax(arr[-1]))}")
print(f"Index of minimum across the flattened array: {repr(np.argmin(arr))}")

Index of minimum in row 0: 1
Index of maximum in last row: 1
Index of minimum across the flattened array: 5


An axes can be specified on which to find the max or min index.

In [35]:
print(f"Indices of the minimum element in each row for each column: {np.argmin(arr, axis=0)}")
print(f"Indices of the maximum element in each column for each row: {np.argmax(arr, axis=1)}")

Indices of the minimum element in each row for each column: [2 0 1]
Indices of the maximum element in each column for each row: [2 1 1]


## Filtering Data


Filtering is done on each element in the array

In [36]:
arr = np.array([[0, 2, 3],
                [1, 3, -6],
                [-3, -2, 1]])
print(f"arr == 3: {repr(arr == 3)}")
print(f"arr > 0: {repr(arr > 0)}")
print(f"arr != 1: {repr(arr != 1)}")
# same as arr == 1
print(f"~(arr != 1): {repr(~(arr != 1))}")


arr == 3: array([[False, False,  True],
       [False,  True, False],
       [False, False, False]])
arr > 0: array([[False,  True,  True],
       [ True,  True, False],
       [False, False,  True]])
arr != 1: array([[ True,  True,  True],
       [False,  True,  True],
       [ True,  True, False]])
~(arr != 1): array([[False, False, False],
       [ True, False, False],
       [False, False,  True]])


In [37]:
arr = np.array([[0, 2, np.nan],
                [1, np.nan, -6], 
                [np.nan, -2, 1]])
print(f"Checking isnan: {repr(np.isnan(arr))}")

Checking isnan: array([[False, False,  True],
       [False,  True, False],
       [ True, False, False]])


## Filtering in Numpy

Filtering in numpy is done using **np.where** function. It takes a required argument of boolean array and returns indices that meet the True condition.

In [38]:
print(f"Using 1D array: {repr(np.where([True, False, True]))}")
arr = np.array([0, 3, 5, 3, 1])
print(repr(np.where(arr == 3)))

arr = np.array([[0, 2, 3],
                [1, 0, 0], 
                [-3, 0, 0]])
x, y = np.where(arr != 0)
print(f"X indices where there are non-zero elements: {repr(x)}")
print(f"Y indices where there are non-zeor elements: {repr(y)}")

print(f"Values at pos: {repr(arr[x, y])}")

Using 1D array: (array([0, 2]),)
(array([1, 3]),)
X indices where there are non-zero elements: array([0, 0, 1, 2])
Y indices where there are non-zeor elements: array([1, 2, 0, 0])
Values at pos: array([ 2,  3,  1, -3])


**np.where** either takes in 1 or 3 arguments. When 3 arguments are provided, the second argument is what True should be replaced with and the third is what False should be replaced with. 

In [39]:
np_filter = np.array([[True, False], [False, True]])
positives = np.array([[1, 2], [3, 4]])
negatives = np.array([[-2, -5], [-1, -8]])
print(f"With three arguments for np.where: {repr(np.where(np_filter, positives, negatives))}")

np_filter = positives > 2
print(f"New condition: {repr(np.where(np_filter, positives, negatives))}")

np_filter = negatives > 0
print(f"Another new condition: {repr(np.where(np_filter, positives, negatives))}")

With three arguments for np.where: array([[ 1, -5],
       [-1,  4]])
New condition: array([[-2, -5],
       [ 3,  4]])
Another new condition: array([[-2, -5],
       [-1, -8]])


If we want a specified what to be put where a condition is meet, we can incorporate *broadcasting*

In [40]:
np_filter = np.array([[True, False], [False, True]])
positives = np.array([[1, 2], [3, 4]])
print(repr(np.where(np_filter, positives, -1)))

array([[ 1, -1],
       [-1,  4]])


There is also **axis-wise filtering** where we use **np.any** or **np.all** functions, which take boolean arrays.

In [41]:
arr = np.array([[-2, -1, -3],
                [4, 5, -6], 
                [3, 9, 1]])
print(f"Filter array: {repr(arr > 0)}")
print(f"Any > 0: {np.any(arr > 0)}")
print(f"All > 0: {np.all(arr > 0)}")

Filter array: array([[False, False, False],
       [ True,  True, False],
       [ True,  True,  True]])
Any > 0: True
All > 0: False


If a multidimensional array is given as input, we can specify the *axis* value. Note that the returned value in this case is an array. *axis=0* means the first dimension and *axis=1* means the second dimension.

In [42]:
arr = np.array([[-2, -1, -3], 
                [4, 5, 6],
                [3, 9, 1]])
print(f"arr > 0: {repr(arr > 0)}")
# axis = 0 is for column
# axis = 1 is for the row
print(f"Any on axis 0: {repr(np.any(arr > 0, axis=0))}")
print(f"Any on axis 1: {repr(np.any(arr > 0, axis=1))}")
print(f"All on axis 1: {repr(np.all(arr > 0, axis=1))}")

arr > 0: array([[False, False, False],
       [ True,  True,  True],
       [ True,  True,  True]])
Any on axis 0: array([ True,  True,  True])
Any on axis 1: array([False,  True,  True])
All on axis 1: array([False,  True,  True])


In [43]:
has_positive = np.any(arr > 0, axis = 1)
print(has_positive)
print(f"Any in tandem with np.where: {repr(arr[np.where(has_positive)])}")

[False  True  True]
Any in tandem with np.where: array([[4, 5, 6],
       [3, 9, 1]])


## Statistics

We use *min* and *max* to find the minimum and maximum in arrays. It's possible to provide axis to represent the dimension to look at.

In [44]:
arr = np.array([[0, 72, 3],
                [1, 3, -60],
                [-3, -2, 4]])
print(f"Min in 2D array: {repr(arr.min())}")
print(f"Max in 2D array: {repr(arr.max())}")

print(f"Min in each column: {repr(arr.min(axis=0))}")
print(f"Max in each row: {repr(arr.max(axis=-1))}")

Min in 2D array: -60
Max in 2D array: 72
Min in each column: array([ -3,  -2, -60])
Max in each row: array([72,  3,  4])


Statistical metrics such as **np.mean**, **np.var**, and **np.median** are used to calculate the mean, variance, and median of the data.

In [45]:
print(f"Mean of the arr: {repr(np.mean(arr))}")
print(f"Variance of the arr: {repr(np.var(arr))}")
print(f"Median of the data: {repr(np.median(arr))}")

print(f"Median across each row: {repr(np.median(arr, axis=-1))}")

Mean of the arr: 2.0
Variance of the arr: 977.3333333333334
Median of the data: 1.0
Median across each row: array([ 3.,  1., -2.])


## Aggregation

Similar to Python module, Numpy has **np.sum** to sum the values within an array

In [48]:
print(f"Sum of all values in arr: {repr(np.sum(arr))}")
print(f"Sum of values in each column: {repr(np.sum(arr, axis=0))}")
print(f"Sum of values in each row: {repr(np.sum(arr, axis=1))}")

Sum of all values in arr: 18
Sum of values in each column: array([ -2,  73, -53])
Sum of values in each row: array([ 75, -56,  -1])


There is also **np.cumsum** that returns the cumulative sums for the (flattened) array, if no further argument is provided. There is the option to provide axis to specify which dimension to find the cumulative sum on.

In [50]:
arr = np.array([[0, 72, 3], 
                [1, 3, -60],
                [-3, -2, 4]])
print(f"Cumulative sum of the flattend array: {repr(np.cumsum(arr))}")
print(f"Cumulative sum of each column: {repr(np.cumsum(arr, axis=0))}")
print(f"Cumulative sum of each row: {repr(np.cumsum(arr, axis=1))}")

Cumulative sum of the flattend array: array([ 0, 72, 75, 76, 79, 19, 16, 14, 18])
Cumulative sum of each column: array([[  0,  72,   3],
       [  1,  75, -57],
       [ -2,  73, -53]])
Cumulative sum of each row: array([[  0,  72,  75],
       [  1,   4, -56],
       [ -3,  -5,  -1]])


There is the possibility to combine multiple datasets using **np.concatenate**. It uses the *axis* keyword argument, but if no argument for axis is provided, *0* is used. 

In [57]:
arr1 = np.array([[0, 72, 3],
                 [1, 3, -60],
                 [-3, -2, 4]])
arr2 = np.array([[-15, 6, 1],
                 [8, 9, -4], 
                 [5, -21, 18]])
print(f"Combine the two arrays into one by extending the rows: {repr(np.concatenate([arr1, arr2]))}")
print(f"Combine the two arrays into one by extending the columns: {repr(np.concatenate([arr1, arr2], axis=1))}")

Combine the two arrays into one by extending the rows: array([[  0,  72,   3],
       [  1,   3, -60],
       [ -3,  -2,   4],
       [-15,   6,   1],
       [  8,   9,  -4],
       [  5, -21,  18]])
Combine the two arrays into one by extending the columns: array([[  0,  72,   3, -15,   6,   1],
       [  1,   3, -60,   8,   9,  -4],
       [ -3,  -2,   4,   5, -21,  18]])


## Saving


It is a good practice to save data in a file for future use. To do this, **np.save** is used.

In [58]:
arr = np.arange(1000)
np.save("arr.npy", arr)
np.save("arr", arr)

## Loading saved file

**np.load** is used to load data from a directory. 

In [59]:
load_arr = np.load("arr.npy")
print(f"Loaded data: {repr(load_arr)}")

Loaded data: array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177,