In [1]:
import numpy as np 

# Array Creation

`np.array()`: Creates a NumPy array from a Python sequence (like a list, tuple, or other array-like object).

-	`np.asarray()`: Converts an input data to a NumPy array if it's not already one.

	-	If the input data is already a NumPy array, `np.asarray()` will return a reference to it without creating a copy, unless necessary

-	`np.arange()`: Behaves like `np.array(range(...))`

`np.zeros()`: Creates a new array filled with zeros.

-	`np.zeros_like()`: Return an array of zeros with the same shape and type as a given array_like.

`np.ones()`: Creates a new array filled with ones.

-	`np.ones_like()`: Return an array of ones with the same shape and type as a given array_like.

`np.empty()`: Creates a new array filled with uninitialized values (garbage values).

`np.eye()` OR `np.identity()`: Creates a new identity matrix.

`np.full(dimension, default)`: fill the array with a customized value instead of zeros (`np.zeros()`) or ones (`np.ones()`)

`np.random.rand(#rows,#cols)`

`np.linspace()`: Creates an array of evenly spaced values within a specified range.


In [3]:
# numpy arrays can work on only one single type
arr = np.array(["str", True, 10]) # it will convert them all to str
arr

array(['str', 'True', '10'], dtype='<U11')

In [3]:
print(np.zeros((2, 2)), '\n')
print(np.ones((2, 2)), '\n')
print(np.eye(2), '\n')
print(np.full((2,3), 8), '\n')
print(np.random.rand(2,2), '\n')
np.linspace(0, 10, 20) 

[[0. 0.]
 [0. 0.]] 

[[1. 1.]
 [1. 1.]] 

[[1. 0.]
 [0. 1.]] 

[[8 8 8]
 [8 8 8]] 

[[0.43924    0.7040575 ]
 [0.51950711 0.93028607]] 



array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,
        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,
        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,
        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])

### Some attributes of an `ndarray` object

`ndarray.ndim`: the number of axes (dimensions) of the array

`ndarray.shape`: a tuple looks like **(#rows, #cols)**

`ndarray.size`: **#rows $\times$ #cols**

`ndarray.dtype`: data type

`ndarray.itemsize`: the size (in bytes) of each element in the array

`ndarray.data`: a pointer to the actual data (the buffer) stored in the array

`ndarray.nbytes`: ndarray.size * ndarray.itemsize

In [4]:
arr = np.array([[1,2,3], [4,5,6]])
print(arr.ndim, '\n')
print(arr.shape, '\n')
print(arr.size, '\n')
print(arr.dtype, '\n')
print(arr.itemsize, '\n')
print(arr.data)
print(arr.nbytes)

2 

(2, 3) 

6 

int32 

4 

<memory at 0x0000018A98C8EDC0>
24


# Element-wise Calculations

In [2]:
height = [2, 1.5, 1.7, 1.9, 1.6]
weight = [100, 80, 60, 70, 90]

height_arr = np.array(height)
weight_arr = np.array(weight)

# you can treat numpy arrays as variables in operations (element-wise calculations) --- Note: You cannot do that with lists
bmi_arr = weight_arr / (height_arr ** 2)
bmi_arr

array([25.        , 35.55555556, 20.76124567, 19.39058172, 35.15625   ])

# Filtering A Numpy Array

In [3]:
boolean_arr = bmi_arr > 23
boolean_arr

array([ True,  True, False, False,  True])

In [4]:
# filtering a numpy array: np_array[boolean_array or boolean_list with the same size of np_array]
filtered_bmi = bmi_arr[bmi_arr > 23] 
print(bmi_arr[[True, False, True, False, True]])
filtered_bmi

[25.         20.76124567 35.15625   ]


array([25.        , 35.55555556, 35.15625   ])

# Element-wise Logical Operations on Two or More Input Arrays.
If you want to implement element-wise logical AND operation on:
-   two input arrays: `np.logical_and(exp1, exp2)`
-   three or more input arrays:
    -   Chaining: `np.logical_and(np.logical_and(exp1, exp2), exp3)`
    -   Reduce: 
        ```
        from functools import reduce
        arrays = [exp1, exp2, exp3]
        result = reduce(np.logical_and, arrays)
        ``` 
AND Logical Operator: `np.logical_and(exp1, exp2)`<br>
OR Logical Operator: `np.logical_or(exp1, exp2)`<br>
NOT Logical Operator: `np.logical_not(exp)`

In [5]:
# print(bmi_arr[bmi_arr > 21 and bmi_arr < 25]) # Error
np.logical_and(bmi_arr > 20, bmi_arr <= 25)

array([ True, False,  True, False, False])

In [6]:
type(bmi_arr) # numpy. = a defined type in the numpy package , ndarray = n dimensional array

numpy.ndarray

# 2D Array

In [7]:
arr_2d = np.array([height,
                  weight])
arr_2d

array([[  2. ,   1.5,   1.7,   1.9,   1.6],
       [100. ,  80. ,  60. ,  70. ,  90. ]])

In [8]:
arr_2d.shape # shape is an attribute of a numpy object

(2, 5)

In [9]:
# arr_2d[0][2] is equivalent to arr_2d[0, 2], but arr_2d[0, 2] is a better syntax as it has more capabilities 
print(arr_2d[:, 1:3]) # subset of the whole 2d array
arr_2d[1, 1:3] # subset of the 2nd row

[[ 1.5  1.7]
 [80.  60. ]]


array([80., 60.])

# Broadcasting
Create a 1D array with the same number of elements as the columns of another ndarray. This allows for element-wise operations across each row of the larger array.

In [6]:
# Broadcasting Feature
# 3 cols: height, weight, age
np_baseball = np.array([[180, 78.4, 22], 
                        [215, 102.7, 28], 
                        [210, 98.5, 26]])

# 1D array with 3 elements which is equal to the number of columns of np_baseball array
conversion = np.array([0.0254, 0.453592, 1])
 
result = np_baseball * conversion
result

array([[ 4.572    , 35.5616128, 22.       ],
       [ 5.461    , 46.5838984, 28.       ],
       [ 5.334    , 44.678812 , 26.       ]])

In [9]:
# swap
result =  conversion * np_baseball
result

array([[ 4.572    , 35.5616128, 22.       ],
       [ 5.461    , 46.5838984, 28.       ],
       [ 5.334    , 44.678812 , 26.       ]])

# Matrix Multiplication

In [46]:
A = np.array([[1, 2, 3], [4, 5, 6]]) # shape (2, 3)
B = np.array([[7], [8], [9]]) # shape (3, 1)
C = np.dot(A, B) # shape (2, 1)
C # equivalent to: C = A @ B

array([[ 50],
       [122]])

### `ndarray.transpose`

After transposing an array, rows become columns and column become rows → so its shape change from (n, m) to (m, n)

In [48]:
A.transpose()

array([[1, 4],
       [2, 5],
       [3, 6]])

## Array concatenation

We can concatenate arrays vertically using `axis=0` or horizontally using `axis=1`

In [60]:
arr1 = [1,2,3] ; arr2 = [4,5,6]

# method 1:
print(np.append(arr1, arr2))

# method 2:
print(np.concatenate([arr1, arr2]))

# method 3:
print(arr1.__add__(arr2)) # returns a list, not an array

[1 2 3 4 5 6]
[1 2 3 4 5 6]
[1, 2, 3, 4, 5, 6]


### `np.stack`, 
### `np.vstack` **=** `np.row_stack`, and 
### `np.hstack` **=** `np.column_stack`

<br>

stack arrays 

In [46]:
arr1 = [[1,2,3], 
        [4,5,6]] 

arr2 = [[6,7,8], 
        [9,10,11]]

In [47]:
print(np.stack((arr1, arr2), axis=0), '\n')

print('=========')

print(np.stack((arr1, arr2), axis=1), '\n')

print('=========')

print(np.vstack((arr1, arr2)), '\n')
print('---------')
print(np.row_stack((arr1, arr2)), '\n')

print('=========')

print(np.hstack((arr1, arr2)), '\n')
print('---------')
print(np.column_stack((arr1, arr2)), '\n')

[[[ 1  2  3]
  [ 4  5  6]]

 [[ 6  7  8]
  [ 9 10 11]]] 

[[[ 1  2  3]
  [ 6  7  8]]

 [[ 4  5  6]
  [ 9 10 11]]] 

[[ 1  2  3]
 [ 4  5  6]
 [ 6  7  8]
 [ 9 10 11]] 

---------
[[ 1  2  3]
 [ 4  5  6]
 [ 6  7  8]
 [ 9 10 11]] 

[[ 1  2  3  6  7  8]
 [ 4  5  6  9 10 11]] 

---------
[[ 1  2  3  6  7  8]
 [ 4  5  6  9 10 11]] 



#### `np.c_[col1, col2]`

concatenate multiple columns into a single `ndarray`

In [4]:
np.c_[arr1, arr2]

array([[1, 4],
       [2, 5],
       [3, 6]])

# Splitting

### `np.split()` AND `np.array_split()`

`np.split()` → even splits

`np.array_split()` → uneven splits

In [51]:
arr = np.array([1, 2, 3, 4, 5, 6])
print(np.split(arr, 3))
# print(np.split(arr, 4)) # ValueError: array split does not result in an equal division
print(np.array_split(arr, 4))

[array([1, 2]), array([3, 4]), array([5, 6])]
[array([1, 2]), array([3, 4]), array([5]), array([6])]


# Reshaping

### `arr.reshape(Shape)` OR `np.reshape(arr, Shape)` 

`Shape` can be two numbers or a tuple contains two numbers (#rows, #cols)

The multiplication of #rows and #cols in the new shape must be equal to the multiplication of #rows and #cols in the old shape.
-	size1 = size2

In [19]:
arr = np.array([1,2,3,4])
# OR: put dimensions in a tuple: arr.reshape((2, 2)) 
# OR: np.reshape(arr, (2,2))
print(arr.reshape(2, 2)) 

[[1 2]
 [3 4]]


In [25]:
# We can pass only one dimension and set the other to be unknown (-1)
print(arr.reshape(2, -1), '\n') 
print(arr.reshape(-1, 4), '\n') 
print(arr.reshape(4, -1), '\n')  
# Note: only one dimension can be set to '-1'

[[1 2]
 [3 4]] 

[[1 2 3 4]] 

[[1]
 [2]
 [3]
 [4]] 



### `ndarray.flatten()` vs. `ndarray.ravel()`

- **View vs. Copy:**  
  `ravel()` returns a view of the original array whenever possible, which means that modifying the result might affect the original array. If a view is not possible (for example, if the array is not contiguous), then it returns a copy.

- **Difference from `flatten()`:**  
  The `flatten()` method always returns a copy of the array, while `ravel()` tries to return a view. *This makes `ravel()` generally more efficient when you just need to iterate over elements in one dimension*.


In [34]:
# 3D-ndarray
arr = np.array([
    			[[1, 2], [3, 4]],
                [[5, 6], [7, 8]]
				])
flat = arr.flatten() # = arr.reshape(-1)
print('flat:', flat, '\n') 
view = arr.ravel() # returns a view
print('view:', view, '\n')
view[0] = 123
print('arr:', arr, '\n')
print('flat:', flat, '\n')
print('view:', view, '\n')

flat: [1 2 3 4 5 6 7 8] 

view: [1 2 3 4 5 6 7 8] 

arr: [[[123   2]
  [  3   4]]

 [[  5   6]
  [  7   8]]] 

flat: [1 2 3 4 5 6 7 8] 

view: [123   2   3   4   5   6   7   8] 



## Insertion

In [32]:
# you can convert the array into a list. 
# Then convert the new modified list into an array. 

arr = [1,2,3]

# insert at the end
arr.append(4)
print(arr)

# insert at a certain index
arr.insert(2, 5)
print(arr)

[1, 2, 3, 4]
[1, 2, 5, 3, 4]


## Deletion

In [71]:
# you can convert the array into a list. 
# Then convert the new modified list into an array. 

# delete an element with a certain index

arr = [1,2,3]

# method 1: 
del arr[0]
print(arr)

# method 2: 
arr.__delitem__(0)
print(arr)

[2, 3]
[3]


## `numpy.linalg`

It's the linear algebra module in NumPy.

### Inverse & determinant of an ndarray matrix

In [53]:
arr = [[1,2], [4,5]]

print(np.linalg.det(arr), '\n')
print(np.linalg.inv(arr), '\n')
print(np.linalg.matrix_power(arr, 2), '\n')
print(np.linalg.matrix_rank(arr), '\n')
print(np.linalg.eig(arr), '\n')

# Notes:
# If you try to invert a matrix with a determinant of zero (singular matrix), NumPy will raise a `LinAlgError`.
# These functions only work on square matrices. For example, a 2x2 or 3x3 array, but not a 2x3 array.

-2.9999999999999996 

[[-1.66666667  0.66666667]
 [ 1.33333333 -0.33333333]] 

[[ 9 12]
 [24 33]] 

2 

EigResult(eigenvalues=array([-0.46410162,  6.46410162]), eigenvectors=array([[-0.80689822, -0.34372377],
       [ 0.59069049, -0.9390708 ]])) 



# Mathematical Functions

In [10]:
arr = np.array([2,4,9,16])
print(np.sqrt(arr), '\n')
print(np.exp(arr), '\n')
print(np.log(arr), '\n')
print(np.sin(arr), '\n')
print(np.cos(arr), '\n')

[1.41421356 2.         3.         4.        ] 

[7.38905610e+00 5.45981500e+01 8.10308393e+03 8.88611052e+06] 

[0.69314718 1.38629436 2.19722458 2.77258872] 

[ 0.90929743 -0.7568025   0.41211849 -0.28790332] 

[-0.41614684 -0.65364362 -0.91113026 -0.95765948] 



## Trigonometric functions

takes angles in radian → angle_in_radian = (angle_in_degrees * `np.pi`) / 180

In [93]:
angle = 30
print(np.sin(angle*np.pi/180), '\n') # 0.5
print(np.cos(angle*np.pi/180)) # sqrt(3) / 2

0.49999999999999994 

0.8660254037844387


## Rounding

In [100]:
# np.round(1.4564, 2) = round(1.4564, 2) 

arr = [1.123, 2.456, 3.789]
print(np.around(arr, decimals=1)) # keep one number after the decimal point
print(np.floor(arr)) 
print(np.ceil(arr)) 

[1.1 2.5 3.8]
[1. 2. 3.]
[2. 3. 4.]


## Sorting

In [107]:
arr = [[5,3,1], [4,2,6]]

print(np.sort(arr), '\n') # sort each row (default)
print(np.sort(arr, axis=0)) # sort each column

[[1 3 5]
 [2 4 6]] 

[[4 2 1]
 [5 3 6]]


## Flatting an array_like 

-	`np.concatenate` flats the array one level back → `np.concatenate(3D_arr_like)` returns a 2D array

-	`arr.flatten()` OR `arr.flat[:]` completely flats the array to its 1D version → `np.array(3D_arr_like).flatten()` returns a 1D array

# Basic Statistics
### Standard Deviation
- Standard deviation tells us, on average, how much each data point deviates from the mean.
It is widely used because it is expressed in the same units as the data, making it easier to understand and compare.
A high standard deviation indicates that data points are spread out over a large range of values.
A low standard deviation indicates that data points tend to be close to the mean.

- Practical Example (Weather Data)
    - If we measure daily temperatures over a month and calculate the standard deviation, a high standard deviation would mean that temperatures fluctuate widely from day to day, whereas a low standard deviation would mean that temperatures are relatively stable.

- Equation
    - For a population standard deviation: $\sigma = \sqrt{\frac{\sum (x_i - \mu)^2}{N}}$
    - For a sample standard deviation: $s = \sqrt{\frac{\sum (x_i - \bar{x})^2}{n - 1}}$


In [12]:
height_mean = np.mean(np_baseball[:, 0])
print(height_mean)
height_median = np.median(np_baseball[:, 0])
print(height_median)
height_std = np.std(np_baseball[:, 0])
print(height_std)

# np.sum() & np.sort() are much faster than sort() and sum() methods in lists because numpy deals with only one data type 
print(np.sum(np_baseball[:, 0]))

# check whether height and weight are correlated using Pearson's r (numeric vs. numeric)
np.corrcoef(np_baseball[:, 0], np_baseball[:, 1])

201.66666666666666
210.0
15.456030825826172
605.0


array([[1.        , 0.99955168],
       [0.99955168, 1.        ]])

In [13]:
arr = np.array([1,2,3,4])
print("MAX:", arr.max()) # OR: np.max(arr)
print("arg_MAX:", arr.argmax()) # OR: np.argmax(arr)
print("MIN:", arr.min()) # OR: np.min(arr)
print("arg_MIN:", arr.argmin()) # OR: np.argmin(arr)

MAX: 4
arg_MAX: 3
MIN: 1
arg_MIN: 0


# `numpy.random` vs. `random` module

`numpy.random` is more powerful than the `random` module. If you are working on simple random operations, the `random` module suffices. However, for data science, numerical computations, or scientific simulations, `numpy.random` is the better choice due to its **supporting for arrays and many distributions** → source: ChatGPT 😊

**N.B.** When using multiple threads in your program, `random.seed()` may be safer than `np.random.seed()` → [source](https://stackoverflow.com/questions/7029993/differences-between-numpy-random-and-random-random-in-python)

A [Source](https://geoffruddock.com/python-random-module-faster-than-numpy/) for showing that some functions in the `random` module are much more efficient than their corresponding functions in `numpy.random`


### `np.random.choice()` → Selecting element(s) from a sequence randomly

We can use `np.random.choice()` instead of using `random.sample()`, `random.choices()`, and `random.choice()`


In [None]:
# np.random.choice([1, 2, 3, 4], size=3, replace=False) = random.sample([1, 2, 3, 4], k=3) # without replacement 
# np.random.choice([1, 2, 3, 4], size=3, replace=True) = random.choices([1, 2, 3, 4], k=3) # with replacement ("resampling") 
# np.random.choice([1, 2, 3, 4]) = random.choice([1, 2, 3, 4]) # choose a single element 

In [13]:
# Generate a single random number from a normal distribution with mean 0 and standard deviation 1
random_num = np.random.normal()
print(random_num)

# Generate an array of 5 random numbers from a normal distribution with mean 10 and standard deviation 2
random_array = np.random.normal(loc=10, scale=2, size=5)
print(random_array)

h = np.round(np.random.normal(1.75, 0.20, 5000), 2) 
w = np.round(np.random.normal(60.32, 15, 5000), 2) 

np_city = np.column_stack((h, w))
np_city

0.3795442607682439
[ 9.51072506 14.51242039  6.96380592  9.13052     8.77769593]


array([[ 1.8 , 53.76],
       [ 1.75, 45.1 ],
       [ 1.79, 71.29],
       ...,
       [ 1.75, 36.54],
       [ 1.85, 63.04],
       [ 1.81, 69.47]])

In [14]:
array_2d = np.array([[1,2,3], [4,5,6]])
iterator = np.nditer(array_2d) # order parameter is by default = 'C' (row-major order, C-style)
for x in iterator:
    print(x)
print("\n")
iterator = np.nditer(array_2d, order='F') # order = 'F' (column-major order, Fortran-style)
for x in iterator:
    print(x)

1
2
3
4
5
6


1
4
2
5
3
6


# Miscellaneous 

#### `np.ptp`

calculate the difference between the 1st and last value in an array-like object

In [12]:
arr1 = [1,2,3]
np.ptp(arr1)

2

### `np.unique()`

In [54]:
np.unique([1,2,3,4,5,1,2,3,4,5])

array([1, 2, 3, 4, 5])