In [2]:
import numpy as np

##### <u>Create arrays: 1D, 2D, 3D</u>

In [4]:
first = np.array([1,2,3],dtype=float)
first

array([1., 2., 3.])

In [8]:
second = np.array([[1,2,3],[1,2,3]])
second

array([[1, 2, 3],
       [1, 2, 3]])

In [9]:
third = np.array([[1,2,3],[4,5,6],[7,8,9]])
third

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

##### <u>dtype</u>

In [10]:
first.dtype # dtype needs practice

dtype('float64')

##### shape / reshape # shape needs practice

In [None]:
second.reshape(3,2)

array([[1, 2],
       [3, 1],
       [2, 3]])

# Core attributes

In [12]:
a = np.array([[1,2,3],[4,5,6]])

In [14]:
a.shape

(2, 3)

In [15]:
a.ndim

2

In [16]:
a.size

6

# Boolean masking

In [None]:
x = np.array([1,-2,3,-4,5])

In [None]:
mask = x > 0

In [None]:
x[mask]

array([1, 3, 5])

In [None]:
x[a < 0]

array([-2, -4])

# Reshape vs Ravel vs Transpose

In [24]:
a = np.array([[1,2,3],[4,5,6]])
a.shape

(2, 3)

In [25]:
a.reshape(3,2)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [27]:
a.ravel()

array([1, 2, 3, 4, 5, 6])

In [30]:
a.T

array([[1, 4],
       [2, 5],
       [3, 6]])

In [32]:
a.transpose()

array([[1, 4],
       [2, 5],
       [3, 6]])

# Aggregations

In [None]:
a.sum() # The total obtained by adding all values in a set.

np.int64(21)

In [None]:
a.mean() # The sum of the values divided by the number of values.

np.float64(3.5)

In [None]:
a.var() # Variance = average of squared deviations from the mean.

np.float64(2.9166666666666665)

In [None]:
a.std() # Standard deviation measures how spread out the values are from the mean.

np.float64(1.707825127659933)

In [None]:
a.argmax() # The index (position) of the maximum value, not the value itself.

np.int64(5)

# Aggregations with axis

In [38]:
a.sum(axis=0) # column-wise

array([5, 7, 9])

In [39]:
a.sum(axis=1) # row-wise

array([ 6, 15])

# Random numbers

In [None]:
np.random.rand(3,2) # from 0 to 1 # uniform [0, 1)

array([[0.26138713, 0.8927742 ],
       [0.93156612, 0.15544597],
       [0.93454906, 0.2278306 ]])

In [None]:
np.random.randn(3,2) # values cluster (centered) around 0

array([[-1.45093046, -0.04131041],
       [ 0.38744338,  0.69870108],
       [ 1.05959014,  1.15318895]])

# Seed
fixes (or “seeds”) NumPy’s random number generator so that it produces the same sequence of random numbers every time you run the code.

In [None]:
np.random.seed(42) 

In [55]:
np.random.rand(3,2) # must reset seed to get same rand

array([[0.83244264, 0.21233911],
       [0.18182497, 0.18340451],
       [0.30424224, 0.52475643]])

# Re-practice

##### <u>Create (4,3) random normal matrix</u>

In [58]:
a = np.random.rand(4,3)
print(a)

[[0.03438852 0.9093204  0.25877998]
 [0.66252228 0.31171108 0.52006802]
 [0.54671028 0.18485446 0.96958463]
 [0.77513282 0.93949894 0.89482735]]


##### <u>Computer column-wise mean</u>

In [59]:
a.mean(axis=0)

array([0.50468848, 0.58634622, 0.660815  ])

##### <u>Zero out negative values</u>

In [60]:
b = np.random.randn(4,3)
print(b)

[[ 0.73846658  0.17136828 -0.11564828]
 [-0.3011037  -1.47852199 -0.71984421]
 [-0.46063877  1.05712223  0.34361829]
 [-1.76304016  0.32408397 -0.38508228]]


In [62]:
mask = b < 0
b[mask]

array([-0.11564828, -0.3011037 , -1.47852199, -0.71984421, -0.46063877,
       -1.76304016, -0.38508228])

##### <u>flatten</u>

In [64]:
b.ravel()

array([ 0.73846658,  0.17136828, -0.11564828, -0.3011037 , -1.47852199,
       -0.71984421, -0.46063877,  1.05712223,  0.34361829, -1.76304016,
        0.32408397, -0.38508228])

In [66]:
b.argmax()

np.int64(7)

In [67]:
b.transpose()

array([[ 0.73846658, -0.3011037 , -0.46063877, -1.76304016],
       [ 0.17136828, -1.47852199,  1.05712223,  0.32408397],
       [-0.11564828, -0.71984421,  0.34361829, -0.38508228]])

Q- What does ndim tell you?
A- Number of dimensions

Q- Why is boolean masking powerful?
A- To root out values, to flush out values

Q- Difference between reshape and ravel?
A- Reshape, reshapes the matrix into a different dimention, as long as correct with the original one, eg: from 2x3 to 3x2 ... Ravel flatens out the matrix eg: 3x2 to 1

Q- Why does axis matter in ML? 
A- Axis 0 computes or targets the column, vertical. Axis 1 computes or targets the rows, horizontal.

### Mini–exercise set

Solve these in the same notebook:

Normalize a vector manually

Compute Euclidean distance between two vectors

Row-wise mean of a 2D array

Replace all values < 0 with 0

Standardize a matrix (z-score)

##### <u>Normalize a vector manually</u>

In [None]:
# vector
v1 = np.array([3,4])

# magnitute is root of sum of squared values
mag = np.sqrt(np.sum(v1 ** 2))
print(mag)

5.0


In [None]:
# unit vector is components of vector divided by the magnitude
v1_norm = v1 / mag 
print(v1_norm)

[0.6 0.8]


##### <u>Euclidean distance between two vectors</u>

In [None]:
# define 2 arrays
v = np.array([3,4])
w = np.array([5,1])
# calculate difference
diff = v - w
print(diff)


[-2  3]


In [None]:
# calculate magniture of resulting difference vector
euclid = np.sqrt(np.sum(diff ** 2))
print(euclid)

3.605551275463989


##### <u>Row-wise mean of a 2D array</u>

In [83]:
d2 = np.array([[1,2,3,4],[5,6,7,8]])
d2.mean(axis=1)

array([2.5, 6.5])

##### <u>Replace all values < 0 with 0</u>

In [75]:
randomn = np.random.randn(3,3)
print(randomn)

[[-0.676922    0.61167629  1.03099952]
 [ 0.93128012 -0.83921752 -0.30921238]
 [ 0.33126343  0.97554513 -0.47917424]]


In [None]:
# off point step
mask = randomn < 0
randomn[mask]

array([-0.676922  , -0.83921752, -0.30921238, -0.47917424])

In [80]:
randomn[randomn < 0] = 0
print(randomn)

[[0.         0.61167629 1.03099952]
 [0.93128012 0.         0.        ]
 [0.33126343 0.97554513 0.        ]]


##### <u>Standardize a matrix (z-score)</u>

In [82]:
# will use mean and std
matrix = np.array([[1,2,3],[4,5,6]])
getMean = matrix.mean()
getStd = matrix.std()
zScore = (matrix-getMean)/getStd
print(zScore)

[[-1.46385011 -0.87831007 -0.29277002]
 [ 0.29277002  0.87831007  1.46385011]]


# More Practice!

### 1. Array creation & properties

Create a 1D array with values [10, 20, 30, 40, 50]

Print its shape, ndim, dtype

Convert it to float64 type

In [90]:
a = np.array([10,20,30,40,50])
shape = a.shape
ndim = a.ndim
dtype = a.dtype
print(shape,"\n",ndim,"\n",dtype,"\n")
a.dtype=float
print(a)
a.dtype

(5,) 
 1 
 int64 

[4.94e-323 9.88e-323 1.48e-322 1.98e-322 2.47e-322]


dtype('float64')

### 2. Indexing & slicing

Take the array above and extract:

First 3 elements

Last 2 elements

Every other element

Reverse the array

In [99]:
a = np.array([10,20,30,40,50])
a[:3]

array([10, 20, 30])

In [100]:
a[-2:]

array([40, 50])

In [102]:
a[::2]

array([10, 30, 50])

In [104]:
a[::-1] # reverse the array

array([50, 40, 30, 20, 10])

### 3. 2D arrays & math ops

Create a 3×4 array of numbers from 1 to 12 (np.arange + reshape)

Compute:

Sum along rows

Mean along columns

Standard deviation of the whole array

Find argmax and argmin

In [109]:
a = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print(a.sum(axis=1))
print(a.mean(axis=0))
print(a.std())
print(f"max = {a.argmax()}, min = {a.argmin()}")

[10 26 42]
[5. 6. 7. 8.]
3.452052529534663
max = 11, min = 0


### 4. Boolean masking

Replace all even numbers with 0

Extract only numbers > 5

In [117]:
a = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
mask = a % 2 == 0
a[mask] = 0
a

array([[ 1,  0,  3,  0],
       [ 5,  0,  7,  0],
       [ 9,  0, 11,  0]])

In [119]:
mask = a > 5
a[mask]


array([ 7,  9, 11])

### 5. Broadcasting

Add [1, 2, 3, 4] to each row of the 3×4 array

Multiply each column by [1, 10, 100, 1000]

In [None]:
b = np.array([[1,2,3,4],[1,2,3,4],[1,2,3,4]])
c = np.hstack((a,b))
print(c) # wrong, misunderstood question

[[ 1  2  3  4  1  2  3  4]
 [ 5  6  7  8  1  2  3  4]
 [ 9 10 11 12  1  2  3  4]]


In [127]:
a = np.array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
a + np.array([1,2,3,4])


array([[ 2,  4,  6,  8],
       [ 6,  8, 10, 12],
       [10, 12, 14, 16]])

In [128]:
a * np.array([1,10,100,1000])

array([[    1,    20,   300,  4000],
       [    5,    60,   700,  8000],
       [    9,   100,  1100, 12000]])

### 6. Random numbers

Create a 4×4 array of random floats between 0 and 1

Set a seed so results are reproducible

Replace all values < 0.5 with 0

In [136]:
np.random.seed(42)
a = np.random.rand(4,4)
a[a < 0.5] = 0
a

array([[0.        , 0.95071431, 0.73199394, 0.59865848],
       [0.        , 0.        , 0.        , 0.86617615],
       [0.60111501, 0.70807258, 0.        , 0.96990985],
       [0.83244264, 0.        , 0.        , 0.        ]])

# 7. Normalization & standardization

Pick a row vector, normalize it (unit vector) manually

Standardize the 3×4 array using z-score

In [None]:
v = np.array([5,7])
magnitute = np.sqrt(np.sum(v ** 2))
v_norm = v / magnitute
print(v_norm) # normalized

[0.58123819 0.81373347]


In [144]:
# z-score
mean = v.mean()
std = v.std()
print(mean)
print(std)
zScore = (v - mean) / std
print(zScore)

6.0
1.0
[-1.  1.]


### 8. Mini challenge

Generate a 5×5 random array

Set values < mean to 0

Compute the row-wise sum

Print the final array

In [151]:
m = np.random.randint(0, 11, size=(5,5))
mean = m.mean()
m[m < mean] = 0
rowSum = m.sum(axis=1)
print(rowSum)


[33 24 22 25 26]
