# Arrays 101 (10 min)

In [1]:
import numpy as np

a = np.array([1, 2, 3])
b = np.array([[1, 2], [3, 4]])
z = np.zeros((3, 4))
o = np.ones((2, 3))
r = np.arange(0, 10, 2)      # 0,2,4,6,8
l = np.linspace(0, 1, 5)     # 0..1 equally spaced


In [2]:
print(b.shape)   # (2, 2)
print(b.ndim)    # 2
print(b.dtype)   # int64 or int32

(2, 2)
2
int64


Exercise 1 (quick)
	
1.	Make a 3×3 array of all 7s
2.	Make numbers 1..12 and reshape to (3,4)
    

# Indexing, slicing, masking

In [3]:
x = np.array([10, 20, 30, 40, 50])
x[0], x[-1]

(10, 50)

In [4]:
# 2D indexing

M = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
M[0, 1]     # 2
M[:, 1]     # column 1
M[1, :]     # row 1
M[0:2, 1:3] # submatrix

array([[2, 3],
       [5, 6]])

In [6]:
#boolean masking

v = np.array([0.2, 1.5, -0.7, 3.3, 2.2])
mask = v > 1.0
v[mask]

array([1.5, 3.3, 2.2])

Exercise 2

given: v = np.array([1, 4, 2, 9, 5, 3])

	•	Select values >= 4
	•	Replace values < 4 with 0
	•	Get indices where v is even

# Vectorization & Broadcasting 

In [7]:
x = np.array([1, 2, 3])
y = np.array([10, 20, 30])
x + y
x * y
np.sqrt(y)

array([3.16227766, 4.47213595, 5.47722558])

In [8]:
X = np.random.randn(5, 3)        # 5 samples, 3 features
mu = X.mean(axis=0)              # (3,)
X_centered = X - mu              # broadcast mu across rows

In [9]:
X_centered

array([[-0.66614013, -0.14207195, -0.39292049],
       [ 1.70407322, -0.76365495,  1.71181526],
       [-0.98225038, -0.99171943, -1.29842805],
       [-0.63316505,  1.19058534, -0.15096999],
       [ 0.57748234,  0.70686099,  0.13050326]])

Exercise 3

	•	Create X shape (6, 2) random
	•	Compute feature-wise mean and std
	•	Standardize: X_std = (X - mean) / std

# ML-style reductions

In ML, we often compute statistics per feature (column) across samples (rows):

	•	mean/std for normalization
	•	min/max for scaling
	•	sums for loss/gradients
	•	covariance for feature relationships

In [12]:
# X: shape (n_samples, n_features)
# Here: 4 samples, 3 features
X = np.random.randn(4, 3)

print("X:\n", X)
print("Shape:", X.shape)  # (4, 3)

 # Global sum: reduces everything to a single number
print("Global sum:", X.sum())

# axis=0: reduce over rows -> gives one value per column (per feature)
print("Sum per feature (axis=0):", X.sum(axis=0))

# axis=1: reduce over columns -> gives one value per row (per sample)
print("Sum per sample (axis=1):", X.sum(axis=1))

X:
 [[ 1.13596353e+00 -9.50524631e-01  8.45725455e-01]
 [-1.21119759e-01  1.69972173e+00 -1.43862514e+00]
 [-1.20811393e-03 -4.29597696e-01  1.30924099e+00]
 [ 4.26560492e-01 -1.27161794e+00 -1.60433759e+00]]
Shape: (4, 3)
Global sum: -0.39981866957337187
Sum per feature (axis=0): [ 1.44019615 -0.95201853 -0.88799629]
Sum per sample (axis=1): [ 1.03116436  0.13997683  0.87843518 -2.44939504]


In [13]:

#Mean, std, min, max (common in preprocessing)
# Feature-wise mean (1 value per feature)
mu = X.mean(axis=0)
print("Mean per feature:", mu)

# Feature-wise std (1 value per feature)
sigma = X.std(axis=0)
print("Std per feature:", sigma)

# Feature-wise min/max
print("Min per feature:", X.min(axis=0))
print("Max per feature:", X.max(axis=0))

Mean per feature: [ 0.36004904 -0.23800463 -0.22199907]
Std per feature: [0.49206128 1.1583968  1.31108453]
Min per feature: [-0.12111976 -1.27161794 -1.60433759]
Max per feature: [1.13596353 1.69972173 1.30924099]


In [14]:
#standardization z-score

# Standardization: (X - mean) / std
# Broadcasting: mu and sigma (shape (3,)) are applied across all rows
X_std = (X - mu) / (sigma + 1e-8)  # small epsilon to avoid divide-by-zero
print("Standardized X:\n", X_std)

# Check standardized stats (should be ~0 mean and ~1 std per feature)
print("Std mean per feature:", X_std.mean(axis=0))
print("Std std per feature:", X_std.std(axis=0))

Standardized X:
 [[ 1.57686554 -0.61509147  0.81438267]
 [-0.97786354  1.67276563 -0.92795395]
 [-0.73417104 -0.16539502  1.16791863]
 [ 0.13516904 -0.89227914 -1.05434736]]
Std mean per feature: [-6.93889390e-18  2.77555756e-17  0.00000000e+00]
Std std per feature: [0.99999998 0.99999999 0.99999999]


Exercise 4 (more ML-relevant)

Create random data X of shape (100, 4). Compute:

	1.	per-feature mean
	2.	per-feature variance
	3.	covariance matrix (feature-feature relationship)

In [15]:
# TODO: Create X with 100 samples and 4 features
X = np.random.randn(100, 4)

# 1) Mean per feature
mu = X.mean(axis=0)

# 2) Variance per feature
var = X.var(axis=0)

# 3) Covariance matrix (4x4), rowvar=False => columns are variables (features)
cov = np.cov(X, rowvar=False)

print("Mean:", mu)
print("Variance:", var)
print("Covariance matrix:\n", cov)
print("Cov shape:", cov.shape)  # should be (4, 4)

Mean: [-0.11836194 -0.00676208 -0.02444873 -0.05770895]
Variance: [0.98310324 0.97802013 0.8170721  0.9792776 ]
Covariance matrix:
 [[ 0.99303358  0.0479365   0.10944914  0.00539522]
 [ 0.0479365   0.98789913 -0.02629481  0.00943185]
 [ 0.10944914 -0.02629481  0.82532536 -0.01362089]
 [ 0.00539522  0.00943185 -0.01362089  0.9891693 ]]
Cov shape: (4, 4)


# Reshaping & stacking (ML data engineering)

Why this matters in ML

	•	Reshape is used for flattening images, making batches, aligning shapes for matrix math
	•	Stacking is used when combining datasets, adding features, adding bias terms

In [16]:
# a is a 1D vector with 12 elements
a = np.arange(12)
print("a:", a)
print("a shape:", a.shape)  # (12,)

# Reshape into a matrix: 3 rows, 4 columns
A = a.reshape(3, 4)
print("A:\n", A)
print("A shape:", A.shape)  # (3, 4)

# Transpose swaps rows/columns
print("A.T:\n", A.T)
print("A.T shape:", A.T.shape)  # (4, 3)



a: [ 0  1  2  3  4  5  6  7  8  9 10 11]
a shape: (12,)
A:
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
A shape: (3, 4)
A.T:
 [[ 0  4  8]
 [ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]]
A.T shape: (4, 3)


In [17]:
A = np.ones((3, 2))
B = np.zeros((3, 2))

# Concatenate along axis=1 => append columns (more features)
C_feat = np.concatenate([A, B], axis=1)
print("Feature concat (axis=1):\n", C_feat)
print("Shape:", C_feat.shape)  # (3, 4)

# Concatenate along axis=0 => append rows (more samples)
C_samp = np.concatenate([A, B], axis=0)
print("Sample concat (axis=0):\n", C_samp)
print("Shape:", C_samp.shape)  # (6, 2)

# vstack/hstack are convenience wrappers (mostly for 2D)
print("vstack shape:", np.vstack([A, B]).shape)
print("hstack shape:", np.hstack([A, B]).shape)

Feature concat (axis=1):
 [[1. 1. 0. 0.]
 [1. 1. 0. 0.]
 [1. 1. 0. 0.]]
Shape: (3, 4)
Sample concat (axis=0):
 [[1. 1.]
 [1. 1.]
 [1. 1.]
 [0. 0.]
 [0. 0.]
 [0. 0.]]
Shape: (6, 2)
vstack shape: (6, 2)
hstack shape: (3, 4)


# Common NumPy mistakes

In [18]:
# Mistake 1: Confusing * with @

X = np.random.randn(5, 3)
w = np.random.randn(3)

# Correct matrix-vector multiplication
y1 = X @ w

# Wrong: elementwise multiply (will broadcast and give (5,3))
y2 = X * w

print("X @ w shape:", y1.shape)  # (5,)
print("X * w shape:", y2.shape)  # (5, 3)

X @ w shape: (5,)
X * w shape: (5, 3)


In [19]:
# Mistake 2: (n,) vs (n,1) broadcasting trap

a = np.random.randn(5)      # shape (5,)
b = np.random.randn(5, 1)   # shape (5,1)

print("a shape:", a.shape)
print("b shape:", b.shape)

# This broadcasts into a (5,5) matrix unintentionally
c = a + b
print("a + b shape:", c.shape)  # (5,5) surprise!

a shape: (5,)
b shape: (5, 1)
a + b shape: (5, 5)


In [20]:
# fix explicitly reshape

a_col = a.reshape(-1, 1)  # (5,1)
print("a_col + b shape:", (a_col + b).shape)  # (5,1)

a_col + b shape: (5, 1)


Quick rule

	•	Use reshape(-1, 1) when it’s a column (one feature or target column).
	•	Use reshape(1, -1) when it’s a row (one sample with many features).

In [21]:
x_row = np.array([1,2,3]).reshape(1, -1)   # (1,3)
x_col = np.array([1,2,3]).reshape(-1, 1)   # (3,1)  

x_row, x_col

(array([[1, 2, 3]]),
 array([[1],
        [2],
        [3]]))

In [29]:
# a is a 1D vector with 12 elements
a = np.arange(12)
print("a:", a)
print("a shape:", a.shape)  # (12,)

# Reshape into a matrix: 3 rows, 4 columns
a= a.reshape(3, 4)
a

a: [ 0  1  2  3  4  5  6  7  8  9 10 11]
a shape: (12,)


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [30]:
a.min(), a.max(), a.sum()

(0, 11, 66)

In [31]:
a.sum(axis=0) #col 

array([12, 15, 18, 21])

In [32]:
a.sum(axis=1) #row

array([ 6, 22, 38])