In [5]:
import numpy as np

In [7]:
data = np.array(np.random.randint(1, 100, size=25)).reshape(5,5)
print(data)

[[52 25 48 54  7]
 [30 41 72 94 28]
 [99 30 99  2 87]
 [86 29  9 44 21]
 [ 8 91 85 24 24]]


In [10]:
#1 Matrix manipulation
data_matrix = np.array([[34, 12, 78, 64, 51],
                        [93, 87, 32, 56, 81],
                        [45, 98, 29, 22, 48],
                        [23, 42, 69, 70, 95],
                        [78, 31, 15, 35, 64]])

# Swap the second and fourth rows using array indexing
data_matrix[[1, 3]] = data_matrix[[3, 1]]

print("Data Matrix with Swapped Rows:")
print(data_matrix)

Data Matrix with Swapped Rows
[[34 12 78 64 51]
 [23 42 69 70 95]
 [45 98 29 22 48]
 [93 87 32 56 81]
 [78 31 15 35 64]]


In [11]:
#2 Normalization
data_matrix = np.array([[34, 12, 78, 64, 51],
                        [93, 87, 32, 56, 81],
                        [45, 98, 29, 22, 48],
                        [23, 42, 69, 70, 95],
                        [78, 31, 15, 35, 64]])

# Calculate the minimum and maximum values
min_value = np.min(data_matrix)
max_value = np.max(data_matrix)

# Normalize the data matrix
normalized_matrix = (data_matrix - min_value) / (max_value - min_value)

print("Normalized Data Matrix:")
print(normalized_matrix)

Normalized Data Matrix:
[[0.25581395 0.         0.76744186 0.60465116 0.45348837]
 [0.94186047 0.87209302 0.23255814 0.51162791 0.80232558]
 [0.38372093 1.         0.19767442 0.11627907 0.41860465]
 [0.12790698 0.34883721 0.6627907  0.6744186  0.96511628]
 [0.76744186 0.22093023 0.03488372 0.26744186 0.60465116]]


In [12]:
#3 Z-Score Normalization
data_matrix = np.array([[34, 12, 78, 64, 51],
                        [93, 87, 32, 56, 81],
                        [45, 98, 29, 22, 48],
                        [23, 42, 69, 70, 95],
                        [78, 31, 15, 35, 64]])

# Calculate the mean and standard deviation
mean_value = np.mean(data_matrix)
std_value = np.std(data_matrix)

# Standardize the data matrix using Z-score normalization
z_score_normalized_matrix = (data_matrix - mean_value) / std_value

print("Z-score Normalized Data Matrix:")
print(z_score_normalized_matrix)

Z-score Normalized Data Matrix:
[[-0.77617895 -1.62657422  0.92461158  0.38345096 -0.11905534]
 [ 1.50442653  1.27250055 -0.85348761  0.07421631  1.04057457]
 [-0.35098132  1.69769818 -0.9694506  -1.24003091 -0.23501833]
 [-1.20137658 -0.46694431  0.57672261  0.61537694  1.58173519]
 [ 0.92461158 -0.89214194 -1.51061123 -0.73752462  0.38345096]]


In [13]:
#4 Array Splitting
data_matrix = np.array([[34, 12, 78, 64, 51],
                        [93, 87, 32, 56, 81],
                        [45, 98, 29, 22, 48],
                        [23, 42, 69, 70, 95],
                        [78, 31, 15, 35, 64]])

# Reshape the data matrix into a vector
flattened_vector = np.ravel(data_matrix)

# Split the flattened vector into five equal-sized sub-arrays
num_sub_arrays = 5
sub_arrays = np.array_split(flattened_vector, num_sub_arrays)

print("Original Data Matrix:")
print(data_matrix)
print("\nFlattened Vector:")
print(flattened_vector)
print("\nSub-Arrays:")
for i, sub_array in enumerate(sub_arrays):
    print(f"Sub-Array {i+1}: {sub_array}")

Original Data Matrix:
[[34 12 78 64 51]
 [93 87 32 56 81]
 [45 98 29 22 48]
 [23 42 69 70 95]
 [78 31 15 35 64]]

Flattened Vector:
[34 12 78 64 51 93 87 32 56 81 45 98 29 22 48 23 42 69 70 95 78 31 15 35
 64]

Sub-Arrays:
Sub-Array 1: [34 12 78 64 51]
Sub-Array 2: [93 87 32 56 81]
Sub-Array 3: [45 98 29 22 48]
Sub-Array 4: [23 42 69 70 95]
Sub-Array 5: [78 31 15 35 64]


In [14]:
#5 Dot Product
# Create two vectors of size 5 with any values
vector1 = np.array([2, 5, 7, 1, 3])
vector2 = np.array([9, 4, 6, 8, 2])

# Compute the dot product using np.dot()
dot_product = np.dot(vector1, vector2)

# Alternatively, you can compute the dot product using the @ operator
# dot_product = vector1 @ vector2

print("Vector 1:", vector1)
print("Vector 2:", vector2)
print("Dot Product:", dot_product)

Vector 1: [2 5 7 1 3]
Vector 2: [9 4 6 8 2]
Dot Product: 94


In [15]:
#6 Matrix Multiplication
data = np.array([[34, 12, 78, 64, 51],
                 [93, 87, 32, 56, 81],
                 [45, 98, 29, 22, 48],
                 [23, 42, 69, 70, 95],
                 [78, 31, 15, 35, 64]])

# Create another 3x3 matrix data2 with any values
data2 = np.array([[1, 2, 3],
                  [4, 5, 6],
                  [7, 8, 9]])

# Perform matrix multiplication using np.dot() or @ operator
result = np.dot(data[:3, :3], data2)
# Alternatively, you can use result = data[:3, :3] @ data2

print("Matrix data:")
print(data[:3, :3])
print("\nMatrix data2:")
print(data2)
print("\nMatrix Multiplication Result:")
print(result)

Matrix data:
[[34 12 78]
 [93 87 32]
 [45 98 29]]

Matrix data2:
[[1 2 3]
 [4 5 6]
 [7 8 9]]

Matrix Multiplication Result:
[[ 628  752  876]
 [ 665  877 1089]
 [ 640  812  984]]


In [16]:
#7 Inverse of a Matrix
# Create a 3x3 identity matrix
identity_matrix = np.identity(3)

# Multiply the identity matrix by 2
scaled_matrix = identity_matrix * 2

# Compute the inverse of the scaled matrix
inverse_matrix = np.linalg.inv(scaled_matrix)

print("Identity Matrix:")
print(identity_matrix)
print("\nScaled Matrix:")
print(scaled_matrix)
print("\nInverse of the Scaled Matrix:")
print(inverse_matrix)

Identity Matrix:
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]

Scaled Matrix:
[[2. 0. 0.]
 [0. 2. 0.]
 [0. 0. 2.]]

Inverse of the Scaled Matrix:
[[0.5 0.  0. ]
 [0.  0.5 0. ]
 [0.  0.  0.5]]


In [17]:
#8 Eigenvalues and Eigenvectors
data = np.array([[34, 12, 78, 64, 51],
                 [93, 87, 32, 56, 81],
                 [45, 98, 29, 22, 48],
                 [23, 42, 69, 70, 95],
                 [78, 31, 15, 35, 64]])

# Get the first 3x3 part of the data matrix
submatrix = data[:3, :3]

# Compute eigenvalues and eigenvectors using np.linalg.eig()
eigenvalues, eigenvectors = np.linalg.eig(submatrix)

print("First 3x3 Submatrix:")
print(submatrix)
print("\nEigenvalues:")
print(eigenvalues)
print("\nEigenvectors:")
print(eigenvectors)

First 3x3 Submatrix:
[[34 12 78]
 [93 87 32]
 [45 98 29]]

Eigenvalues:
[170.34019176 +0.j         -10.17009588+45.45680714j
 -10.17009588-45.45680714j]

Eigenvectors:
[[-0.40613931+0.j         -0.67059406+0.j         -0.67059406-0.j        ]
 [-0.68531486+0.j          0.38423037+0.32490761j  0.38423037-0.32490761j]
 [-0.60447863+0.j          0.32063384-0.44079431j  0.32063384+0.44079431j]]


In [9]:
#9 Find Missing Values
# import numpy as np
data = np.array([[34, 12, 78, 64, 51],
                 [93, 87, 32, 56, 81],
                 [45, 98, 29, 22, 48],
                 [23, 42, 69, 70, 95],
                 [78, 31, 15, 35, 64]])

# Replace 5 random elements with np.nan
num_missing_values = 5
missing_indices = np.random.choice(data.size, num_missing_values, replace=False)
data_flattened = data.ravel().astype('float32')
data_flattened[missing_indices] = np.nan

# Reshape the modified data_flattened back into a matrix
data_with_nans = data_flattened.reshape(data.shape)

# Find the indices of the missing values
missing_indices = np.argwhere(np.isnan(data_with_nans))

print("Data Matrix with Missing Values:")
print(data_with_nans)
print("\nIndices of Missing Values:")
for row_idx, col_idx in missing_indices:
    print(f"Row: {row_idx}, Column: {col_idx}")

Data Matrix with Missing Values:
[[34. 12. 78. 64. 51.]
 [93. 87. 32. 56. nan]
 [45. nan nan 22. 48.]
 [23. nan 69. 70. 95.]
 [78. nan 15. 35. 64.]]

Indices of Missing Values:
Row: 1, Column: 4
Row: 2, Column: 1
Row: 2, Column: 2
Row: 3, Column: 1
Row: 4, Column: 1


In [2]:
#10 Replace Missing Values
data = np.array([[34, 12, 78, 64, 51],
                 [93, 87, np.nan, 56, 81],
                 [45, 98, 29, 22, 48],
                 [23, 42, 69, np.nan, 95],
                 [78, 31, np.nan, 35, 64]])

# Calculate the mean of the matrix ignoring NaN values
mean_value = np.nanmean(data)

# Replace missing values with the mean
data[np.isnan(data)] = mean_value

print("Data Matrix with Missing Values Replaced:")
print(data)

Data Matrix with Missing Values Replaced:
[[34.         12.         78.         64.         51.        ]
 [93.         87.         56.13636364 56.         81.        ]
 [45.         98.         29.         22.         48.        ]
 [23.         42.         69.         56.13636364 95.        ]
 [78.         31.         56.13636364 35.         64.        ]]


In [None]:
# Useful Commands
# Indexing and slicing: Use indexing to select individual elements and slicing to select a range of elements in a matrix. Example: matrix[1,:] returns the second row of the matrix.


# np.min, np.max: These functions return the minimum and maximum values of an array respectively.


# np.mean, np.std: These functions return the mean and standard deviation of an array respectively.


# np.ravel: This function converts a multi-dimensional array into a 1-dimensional array.


# np.split: This function splits an array into multiple sub-arrays.


# np.dot: This function computes the dot product of two arrays. For 2-D arrays it is equivalent to matrix multiplication.


# np.linalg.inv: This function computes the (multiplicative) inverse of a matrix.


# np.eye: This function returns a 2-D array with ones on the diagonal and zeros elsewhere (identity matrix).


# np.linalg.eig: This function computes the eigenvalues and right eigenvectors of a square array.


# np.isnan: This function returns a boolean array where True represents NaN values.


# np.where: This function returns the indices of elements in an input array where the given condition is satisfied.


# np.nanmean: This function computes the arithmetic mean along the specified axis, ignoring NaNs.


# Short Explanations For Some Of The Used Terms
# Z-score normalization: In simple terms, z-score normalization is a method used to scale or standardize values in a dataset so that they can be compared accurately. For example, imagine you have two different tests scored out of 100 and 10. If you scored 80 on the first test and 8 on the second, it’s hard to compare these directly because they’re on different scales. But if we convert these scores to z-scores, we’ll be able to compare them directly because they’ll be on the same scale. Z-scores tell you how many standard deviations a value is from the average (mean). A z-score of 0 indicates the value is exactly average, a positive z-score indicates it’s above average, and a negative z-score indicates it’s below average.


# Dot product of two vectors: The dot product is a way of multiplying two vectors together to get a single number (a scalar). This can tell you a lot about the relationship between the two vectors. For example, if the dot product is zero, the vectors are perpendicular to each other. In a 3D space, think of it as the amount one vector ‘goes in the direction’ of another. It’s used in many areas of data science and computer graphics.


# Identity matrix: An identity matrix is a special kind of square matrix where all the elements of the principal diagonal are ones and all other elements are zeros. It’s called the “identity” matrix because, when it is multiplied by any matrix, the original matrix is returned, similar to how multiplying a number by one leaves the original number unchanged.


# Eigenvalues and Eigenvectors: These are a way of breaking down matrices, which can make them easier to analyze. An eigenvector is a vector that only changes by a scalar factor when a matrix is multiplied by it. The corresponding eigenvalue is the factor by which the eigenvector is scaled. These concepts are foundational to many areas of linear algebra and are used in many algorithms in data science and machine learning.


# Mean: The mean is a measure of central tendency, commonly known as the average. To find the mean of a dataset, you add up all of the numbers and then divide by the number of numbers. For example, the mean of 2, 3, and 4 is (2+3+4) / 3 = 3. The mean gives a ‘middle’ or ‘typical’ value for a set of data, but it can be influenced by extreme values (outliers).