In [13]:
import numpy as np

In [15]:
data = np.array(np.random.randint(1, 100, size=25)).reshape(5,5)

In [16]:
print(data)

[[36 42 27 24 31]
 [93 62 23 68 85]
 [33 51 41 21 97]
 [13 38 48 17 92]
 [83  3 85 40 25]]


#### Matrix manipulation - Swap the second and fourth rows of the data matrix.

In [17]:
data[[1, 3]] = data[[3, 1]]

In [18]:
print(data)

[[36 42 27 24 31]
 [13 38 48 17 92]
 [33 51 41 21 97]
 [93 62 23 68 85]
 [83  3 85 40 25]]


#### Normalization - Normalize all the elements in the data matrix such that they are scaled to range between 0 and 1.

In [19]:
print(data / (100-0))

[[0.36 0.42 0.27 0.24 0.31]
 [0.13 0.38 0.48 0.17 0.92]
 [0.33 0.51 0.41 0.21 0.97]
 [0.93 0.62 0.23 0.68 0.85]
 [0.83 0.03 0.85 0.4  0.25]]


#### Z-score normalization - Standardize the data matrix using Z-score normalization. That is, all the elements should be scaled to have a mean* of 0 and a standard deviation of 1. Z-score is calculated as (X - mean) / std.

In [30]:
the_mean = np.mean(data)
rescaled = data - the_mean
print(f'{np.average(rescaled):.14f}')
rescaled = rescaled / 100
print(rescaled)

0.00000000000000
[[-0.1112 -0.0512 -0.2012 -0.2312 -0.1612]
 [-0.3412 -0.0912  0.0088 -0.3012  0.4488]
 [-0.1412  0.0388 -0.0612 -0.2612  0.4988]
 [ 0.4588  0.1488 -0.2412  0.2088  0.3788]
 [ 0.3588 -0.4412  0.3788 -0.0712 -0.2212]]


#### Array splitting - Reshape the data matrix into a vector (Hint: use np.ravel) and split this array into five equal-sized sub-arrays.

In [31]:
raveled = np.ravel(data)

In [33]:
print(np.split(raveled,5))

[array([36, 42, 27, 24, 31]), array([13, 38, 48, 17, 92]), array([33, 51, 41, 21, 97]), array([93, 62, 23, 68, 85]), array([83,  3, 85, 40, 25])]


#### Dot product - Create two vectors of size 5 with any values. Compute the dot product of the two vectors*

In [38]:
vector1 = np.array(np.random.randint(1, 10, size=5))
vector2 = np.array(np.random.randint(1, 10, size=5))

In [39]:
print(np.dot(vector1, vector2))

223


In [40]:
data2 = np.array(np.random.randint(1, 100, size=9)).reshape(3,3)

In [41]:
print(data2)

[[34 96 69]
 [46 27 55]
 [50 50 62]]


In [50]:
print(np.dot(data[:3,:3], data2))

[[4506 5940 6468]
 [4590 4674 5963]
 [5518 6595 7624]]


#### Inverse of a matrix - Create a 3x3 identity matrix*, multiply it with 2 and compute its inverse.

In [51]:
iden = np.eye(3)

In [53]:
iden = iden * 2

In [54]:
print(np.linalg.inv(iden))

[[0.5 0.  0. ]
 [0.  0.5 0. ]
 [0.  0.  0.5]]


#### Eigenvalues and eigenvectors - For the first 3x3 part of the data matrix, compute the eigenvalues and eigenvectors*.

In [55]:
print(np.linalg.eig(data[:3,:3]))

EigResult(eigenvalues=array([109.98045653,   8.94021777,  -3.92067431]), eigenvectors=array([[-0.54009892, -0.78311006,  0.41665912],
       [-0.53226775,  0.60280549, -0.73760983],
       [-0.65190812, -0.15285345,  0.53134444]]))


#### Find missing values - Replace random 5 elements in the data matrix with np.nan. Find the indices of the missing values.

In [64]:
np.dtype(data[0][0])
data_float = data.astype('float32')
print(data_float)

[[36. 42. 27. 24. 31.]
 [13. 38. 48. 17. 92.]
 [33. 51. 41. 21. 97.]
 [93. 62. 23. 68. 85.]
 [83.  3. 85. 40. 25.]]


In [67]:
data_float[0][1] = np.nan
data_float[2][1] = np.nan
data_float[3][3] = np.nan
data_float[1][4] = np.nan
data_float[4][0] = np.nan

In [68]:
print(data_float)

[[36. nan 27. 24. 31.]
 [13. 38. 48. 17. nan]
 [33. nan 41. 21. 97.]
 [93. 62. 23. nan 85.]
 [nan  3. 85. 40. 25.]]


In [77]:
print(np.where(np.isnan(data_float )))

(array([0, 1, 2, 3, 4], dtype=int64), array([1, 4, 1, 3, 0], dtype=int64))


#### Replace missing values - Replace the missing values in the data matrix with the mean of the matrix (ignoring the missing values while computing the mean).

In [82]:
mean_data_float = np.nanmean(data_float)

In [83]:
data_float[np.isnan(data_float)] = mean_data_float

In [84]:
print(data_float)

[[36.  42.1 27.  24.  31. ]
 [13.  38.  48.  17.  42.1]
 [33.  42.1 41.  21.  97. ]
 [93.  62.  23.  42.1 85. ]
 [42.1  3.  85.  40.  25. ]]
