#### What is NumPy?

Definition: NumPy (Numerical Python) is a library for fast numerical computation using arrays and matrices.

#### Why NumPy?

Faster than Python lists

Supports vectorized math: Vectorization in NumPy means applying operations on whole arrays at once without explicit loops, using optimized low-level code for speed.

Backbone of ML & DL frameworks

In [None]:
# Python List vs NumPy Array

a = [1, 2, 3]
b = [4, 5, 6]

a + b

[1, 2, 3, 4, 5, 6]

In [None]:
# Output

# [1, 2, 3, 4, 5, 6]

# No mathematical meaning
# Slow
# Not used in ML

In [None]:
# NumPy Array

import numpy as np

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

a + b

array([5, 7, 9])

In [None]:
# Output

# [5 7 9]

# Element-wise math
# Very fast
# ML ready

### NumPy Array

In [None]:
# Creating an Array
import numpy as np

x = np.array([10, 20, 30])

In [None]:
# Array Properties

x.shape    # (3,) -> Size of array
x.ndim     # 1 -> Number of dimensions
x.dtype    # int64 -> Data type

dtype('int64')

### Dimensions in NumPy

In [None]:
# 1D Array → Vector

In [None]:
a = np.array([1, 2, 3])

# Used as:
# Feature vector
# Weight vector

In [None]:
# 2D Array → Matrix

In [None]:
b = np.array([
    [1, 2],
    [3, 4],
    [5, 6]
])

# Used as:
# Dataset
# Weight matrix

In [None]:
# 3D Array → Tensor

In [None]:
c = np.array([
    [[1, 2], [3, 4]],
    [[5, 6], [7, 8]]
])

# Used in:
# Deep Learning (images, batches)

### NumPy in Machine Learning

In [None]:
# Feature Matrix (X) and Label Vector (y)
X = np.array([
    [70, 175],
    [65, 160],
    [80, 180]
])

y = np.array([1, 0, 1])

# Term	 Meaning
#  X	  Features
#  y	Target labels

# This is exactly what scikit-learn uses.

### Vectorized Operations

In [None]:
a = np.array([1, 2, 3, 4])

a * 2
a + 10
a ** 2
# ✔ No loops
# ✔ Faster training
# ✔ Cleaner code

array([ 1,  4,  9, 16])

In [None]:
# Why NumPy is Fast?

# Uses C internally
# Avoids Python loops
# Performs batch operations
# ML training = repeated math → NumPy is essential

In [None]:
import numpy as np

a = np.array([2, 4, 6])
b = np.array([1, 3, 5])

print(a + b)
print(a * b)
print(a - b)

[ 3  7 11]
[ 2 12 30]
[1 1 1]


### Array Creation Methods

In [None]:
# zeros() – Initialize with zeros

# np.zeros(shape)

import numpy as np

a = np.zeros(5)
b = np.zeros((3, 4))

print(a)
print(b)


# ML Usage
# Initialize weights
# Create empty datasets

[0. 0. 0. 0. 0.]
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]


In [None]:
# ones() – Initialize with ones

a = np.ones(5)
b = np.ones((2, 3))

print(a)
print(b)

# ML Usage
# Bias initialization
# Mask creation

[1. 1. 1. 1. 1.]
[[1. 1. 1.]
 [1. 1. 1.]]


In [None]:
# full() – Initialize with a constant

a = np.full((3, 3), 7)
print(a)

# Used when:
# You need a fixed-value matrix

[[7 7 7]
 [7 7 7]
 [7 7 7]]


In [None]:
# arange() – Range of numbers

a = np.arange(0, 10)
b = np.arange(0, 10, 2)

print(a)
print(b)

# ML Usage
# Indexing
# Epoch counters
# Custom ranges

[0 1 2 3 4 5 6 7 8 9]
[0 2 4 6 8]


In [None]:
# linspace() – Evenly spaced numbers

a = np.linspace(0, 1, 5)
print(a)

[0.   0.25 0.5  0.75 1.  ]


In [None]:
# Difference:
# arange vs linspace
#    arange	                    linspace
#   Step size	              Number of values
#  May skip endpoint	     Includes endpoint

### Random Arrays (CRITICAL FOR ML & DL)

In [None]:
a = np.random.rand(3, 4)
print(a)

[[0.24581189 0.25939855 0.4049178  0.89485488]
 [0.28340955 0.05134005 0.95765575 0.06546754]
 [0.43552698 0.76313635 0.59159874 0.31474106]]


In [None]:
# Random normal distribution

a = np.random.randn(3, 4)
print(a)

[[-0.28649863  1.74175693 -0.42758481  1.67403965]
 [-1.14768121  1.10977296  0.70419964 -0.91733761]
 [-1.55958076 -1.17919825  0.07103267  0.36823129]]


In [None]:
# Random integers

a = np.random.randint(0, 10, (3, 3))
print(a)

[[6 6 3]
 [4 9 9]
 [4 1 1]]


In [None]:
# Why Randomness Matters in ML

# Random weights prevent symmetry
# Random data shuffling improves learning

# Used in:
# Weight initialization
# Dropout
# Data augmentation

### Shape, Size & Reshape

In [None]:
a = np.array([[1, 2, 3], [4, 5, 6]])

a.shape   # (2, 3)
a.size    # 6
a.ndim    # 2

2

In [None]:
# Reshape (Very Important)

In [None]:
a = np.arange(12)
print(a)

b = a.reshape((3, 4))
print(b)

[ 0  1  2  3  4  5  6  7  8  9 10 11]
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [None]:
# ML Meaning

# Convert raw data into feature matrices
# Adjust input shape for models

### Flattening Arrays

In [None]:
a = np.array([[1, 2], [3, 4]])

a.flatten()

# Used when:
# Feeding data into ML models

array([1, 2, 3, 4])

In [None]:
# Key Intuition

# ML is all about SHAPES
# If shapes mismatch → model breaks

In [None]:
# Practice Cell
import numpy as np

a = np.zeros((4, 5))
b = np.ones((5, 4))

print(a.shape)
print(b.shape)

c = np.arange(20).reshape((4, 5))
print(c)

(4, 5)
(5, 4)
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]
 [15 16 17 18 19]]


### Indexing, Slicing & Boolean Masking

In [None]:
# Indexing Basics

In [None]:
import numpy as np

a = np.array([10, 20, 30, 40, 50])

a[0]      # 10
a[-1]     # 50
a[1:4]    # [20 30 40]

# Similar to Python lists
# Faster

array([20, 30, 40])

In [None]:
# 2D Indexing

b = np.array([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9]
])

b[0, 0]   # 1
b[1, 2]   # 6
b[:, 1]   # column
b[1, :]   # row

array([4, 5, 6])

In [None]:
# Slicing

In [None]:
X = np.array([
    [65, 160],
    [70, 175],
    [80, 180],
    [60, 155]
])

# select height column
height = X[:, 1]

# select first two samples
subset = X[:2, :]

rifat = X[1:3, 0:2]

# a = X[row : column]

print(height)
print(subset)
print(rifat)

# ML Meaning

# Select features
# Select samples
# Prepare training data

[160 175 180 155]
[[ 65 160]
 [ 70 175]]
[[ 70 175]
 [ 80 180]]


### Boolean Masking

In [None]:
a = np.array([10, 25, 30, 15, 40])

mask = a > 20
print(mask)
print(a[mask])

print(a[a > 10])

[False  True  True False  True]
[25 30 40]
[25 30 15 40]


In [None]:
# Masking in ML Data Cleaning

In [None]:
X = np.array([1, 5, 2, 10, 3])

X = X[X >= 3]
print(X)

[ 5 10  3]


In [None]:
# Modify Data Using Masks

a = np.array([1, 2, 3, 4, 5])

a[a < 3] = 0
print(a)

# Used in:
# Thresholding
# Feature engineering

[0 0 3 4 5]


In [None]:
# where() Function

a = np.array([10, 20, 30, 40])

b = np.where(a > 25, 1, 0)
print(b)

# ML Usage:
# Label creation
# Binary classification

[0 0 1 1]


In [None]:
# Fancy Indexing

a = np.array([10, 20, 30, 40, 50])

idx = [0, 2, 4]
a[idx]

# Used when:
# Selecting specific rows
# Custom sampling

array([10, 30, 50])

In [None]:
# Key ML Intuition

# Boolean masking = conditional data selection
# This is how datasets are cleaned before training.

In [None]:
# Practice Cell
import numpy as np

X = np.array([
    [45, 150],
    [72, 175],
    [90, 185],
    [60, 160]
])

# Select rows where weight > 70
print(X[X[:, 0] > 70])


# weight > 70 AND height > 170
print(X[(X[:, 0] > 70) & (X[:, 1] > 170)])


# weight < 60 OR height > 180
print(X[(X[:, 0] < 60) | (X[:, 1] > 180)])

# Select rows NOT matching a condition
# NOT weight > 70
print(X[~(X[:, 0] > 70)])


# Select specific columns after filtering rows
# Get heights of people with weight > 70
X[X[:, 0] > 70, 1]


# Replace values using condition
# Cap weight at 80
X[X[:, 0] > 80, 0] = 80
print(X)

# Create labels using conditions
# Label: 1 if weight > 70 else 0
y = np.where(X[:, 0] > 70, 1, 0)
print(y)

# Count how many rows match condition
np.sum(X[:, 0] > 70)

# Check if ANY or ALL values match
np.any(X[:, 0] > 90)
np.all(X[:, 0] > 50)

# Remove rows where height < 160
X = X[X[:, 1] >= 160]
print(X)

# Select rows using multiple feature thresholds
# ML-style filtering
X[(X[:, 0] > 60) & (X[:, 0] < 85) & (X[:, 1] > 165)]

[[ 72 175]
 [ 90 185]]
[[ 72 175]
 [ 90 185]]
[[ 45 150]
 [ 90 185]]
[[ 45 150]
 [ 60 160]]
[[ 45 150]
 [ 72 175]
 [ 80 185]
 [ 60 160]]
[0 1 1 0]
[[ 72 175]
 [ 80 185]
 [ 60 160]]


array([[ 72, 175],
       [ 80, 185]])

In [None]:
# Practice Cell
import numpy as np

X = np.array([
    [55, 155],
    [68, 165],
    [72, 178],
    [90, 185],
    [60, 160]
])

# 1. Select rows where weight between 60 and 80
print(X[(X[:, 0] >= 60) & (X[:, 0] <= 80)])

# 2. Create labels: height > 170 → 1 else 0
y = np.where(X[:, 1] > 170, 1, 0)
print(y)

[[ 68 165]
 [ 72 178]
 [ 60 160]]
[0 0 1 1 0]


### Broadcasting & Vectorized Operations

What is Broadcasting?

Broadcasting allows NumPy to perform operations on arrays of different shapes
without using loops.

NumPy automatically stretches smaller arrays to match bigger ones.

In [None]:
# Simple Broadcasting Example
import numpy as np

a = np.array([1, 2, 3])
a + 10

array([11, 12, 13])

In [None]:
# Broadcasting with 2D Arrays

X = np.array([
    [10, 20, 30],
    [40, 50, 60]
])

X + 5

array([[15, 25, 35],
       [45, 55, 65]])

In [None]:
# Column-wise Broadcasting

X = np.array([
    [60, 160],
    [70, 170],
    [80, 180]
])

mean = np.array([70, 170])

X - mean

# Mean is broadcast to each row.

array([[-10, -10],
       [  0,   0],
       [ 10,  10]])

In [None]:
# Feature Normalization

# Min-Max Scaling
min_val = X.min(axis=0)
max_val = X.max(axis=0)

X_scaled = (X - min_val) / (max_val - min_val)
print(X_scaled)

[[0.  0. ]
 [0.5 0.5]
 [1.  1. ]]


In [None]:
# Broadcasting Rules

# Shapes are compared from right to left
# Dimensions must be:
# Equal OR
# One of them is 1
# Otherwise → error

In [None]:
# Vectorized Operations vs Loops

# Loop (Slow)
result = []
for x in X:
    result.append(x * 2)


# Vectorized (Fast)
result = X * 2

# Faster
# Cleaner
# Used in ML/DL

In [None]:
# Dot Product

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

c = np.dot(a, b)
print(c)

# Used in:
# Linear Regression
# Neural Networks
# Attention mechanisms

32


In [None]:
# Matrix Multiplication

A = np.array([
    [1, 2],
    [3, 4]
])

B = np.array([
    [5, 6],
    [7, 8]
])

A @ B

# Forward pass in neural networks

array([[19, 22],
       [43, 50]])

In [None]:
# Broadcasting for Bias Addition

Z = np.array([
    [1, 2],
    [3, 4],
    [5, 6]
])

bias = np.array([0.5, 1.0])

Z + bias


# ML INTUITION

# Weights → matrices
# Bias → vector
# Forward pass → matrix multiplication + broadcasting

array([[1.5, 3. ],
       [3.5, 5. ],
       [5.5, 7. ]])

In [None]:
# Practice Cell
import numpy as np

X = np.array([
    [2, 4],
    [6, 8],
    [10, 12]
])

mean = X.mean(axis=0)
std = X.std(axis=0)

X_standardized = (X - mean) / std
print(X_standardized)

[[-1.22474487 -1.22474487]
 [ 0.          0.        ]
 [ 1.22474487  1.22474487]]


### Linear Algebra Essentials for Machine Learning

In [None]:
# Transpose (.T)

import numpy as np

X = np.array([
    [1, 2, 3],
    [4, 5, 6]
])

X.T

# Shape change: (2, 3) → (3, 2)

# ML Usage
# Linear regression formulas
# Neural network backpropagation

array([[1, 4],
       [2, 5],
       [3, 6]])

In [None]:
# Matrix Multiplication (@)

W = np.array([
    [0.1, 0.2],
    [0.3, 0.4]
])

X = np.array([
    [5, 10],
    [15, 20]
])

X @ W

# ML Meaning:
# X → inputs
# W → weights
# Output → predictions

array([[ 3.5,  5. ],
       [ 7.5, 11. ]])

In [None]:
# Identity Matrix
I = np.eye(3)
print(I)

# Used in:
# Regularization
# Matrix inversion

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


In [None]:
# Inverse Matrix

A = np.array([
    [4, 7],
    [2, 6]
])

A_inv = np.linalg.inv(A)
print(A_inv)

# Only for square & invertible matrices

[[ 0.6 -0.7]
 [-0.2  0.4]]


In [None]:
# Determinant

np.linalg.det(A)

# Used to:
# Check invertibility

np.float64(10.000000000000002)

In [None]:
# Norms (Vector Length)

v = np.array([3, 4])

np.linalg.norm(v)

# ML Usage:
# Regularization (L1, L2)
# Distance calculations

np.float64(5.0)

In [None]:
# Mean Squared Error (Using NumPy)

y_true = np.array([3, 5, 7])
y_pred = np.array([2.5, 5.5, 6.8])

mse = np.mean((y_true - y_pred) ** 2)
print(mse)

0.18000000000000002


In [None]:
# Euclidean Distance (k-NN)

a = np.array([1, 2])
b = np.array([4, 6])

distance = np.linalg.norm(a - b)
print(distance)

5.0


In [None]:
# Practice Cell
import numpy as np

# Example: Simple Linear Regression (Normal Equation)


# Feature matrix X (3 samples, 2 features including bias term)
X = np.array([
    [1, 1],  # [bias=1, feature1=1]
    [1, 2],  # [bias=1, feature1=2]
    [1, 3]   # [bias=1, feature1=3]
])

# Target values
y = np.array([2, 3, 5])

# Step 1: Compute weights using normal equation
# W = (X^T X)^-1 X^T y
W = np.linalg.inv(X.T @ X) @ X.T @ y

print("Weights (W):", W)  # W[0] = bias, W[1] = slope

# Step 2: Predict using computed weights
y_pred = X @ W
print("Predictions:", y_pred)

# Step 3: Calculate Mean Squared Error (MSE)
mse = np.mean((y - y_pred) ** 2)
print("Mean Squared Error:", mse)


Weights (W): [0.33333333 1.5       ]
Predictions: [1.83333333 3.33333333 4.83333333]
Mean Squared Error: 0.05555555555555555


### ML pipeline essentials
### Randomness, Shuffling & Train-Test Split (ML Pipeline)

In [1]:
# Random Numbers (for reproducibility)

import numpy as np

# Set random seed (reproducible results)
np.random.seed(42)

# Random integers between 0 and 10
rand_ints = np.random.randint(0, 10, size=(5, 2))
print("Random Integers:\n", rand_ints)

# Random floats between 0 and 1
rand_floats = np.random.rand(5, 2)
print("Random Floats:\n", rand_floats)

# Use case:
# Weight initialization in neural networks
# Random sampling for train-test split

Random Integers:
 [[6 3]
 [7 4]
 [6 9]
 [2 6]
 [7 4]]
Random Floats:
 [[0.60111501 0.70807258]
 [0.02058449 0.96990985]
 [0.83244264 0.21233911]
 [0.18182497 0.18340451]
 [0.30424224 0.52475643]]


In [3]:
# Shuffling Data

# Dataset X (features) and y (labels)
X = np.array([
    [65, 160],
    [70, 175],
    [80, 180],
    [60, 155]
])
y = np.array([0, 1, 1, 0])

# Shuffle indices
indices = np.arange(X.shape[0])
np.random.shuffle(indices)

X_shuffled = X[indices]
y_shuffled = y[indices]

print("Shuffled X:\n", X_shuffled)
print("Shuffled y:\n", y_shuffled)

# This prevents ordered data bias when training ML models.

Shuffled X:
 [[ 70 175]
 [ 65 160]
 [ 60 155]
 [ 80 180]]
Shuffled y:
 [1 0 0 1]


In [4]:
# Train-Test Split (NumPy Style)
# Split 75% train, 25% test

split_idx = int(0.75 * X.shape[0])

X_train = X_shuffled[:split_idx]
y_train = y_shuffled[:split_idx]

X_test = X_shuffled[split_idx:]
y_test = y_shuffled[split_idx:]

print("X_train:\n", X_train)
print("X_test:\n", X_test)

X_train:
 [[ 70 175]
 [ 65 160]
 [ 60 155]]
X_test:
 [[ 80 180]]


In [6]:
# Feature Scaling

# Min-Max Scaling
min_val = X_train.min(axis=0)
max_val = X_train.max(axis=0)

X_train_scaled = (X_train - min_val) / (max_val - min_val)
X_test_scaled = (X_test - min_val) / (max_val - min_val)

print("X_train_scaled:\n", X_train_scaled)
print("X_test_scaled:\n", X_test_scaled)

# Standardization (Z-score)
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)

X_train_std = (X_train - mean) / std
X_test_std = (X_test - mean) / std


# Scaling is critical for gradient-based ML algorithms.

X_train_scaled:
 [[1.   1.  ]
 [0.5  0.25]
 [0.   0.  ]]
X_test_scaled:
 [[2.   1.25]]


In [7]:
# Practice Cell (Full ML Pipeline Example)
import numpy as np

# Dataset
X = np.array([
    [55, 150],
    [68, 165],
    [72, 178],
    [90, 185],
    [60, 160]
])
y = np.array([0, 1, 1, 1, 0])

# Shuffle
indices = np.arange(X.shape[0])
np.random.seed(42)
np.random.shuffle(indices)
X, y = X[indices], y[indices]

# Train-Test Split (80%-20%)
split_idx = int(0.8 * X.shape[0])
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# Min-Max Scaling
X_train_scaled = (X_train - X_train.min(axis=0)) / (X_train.max(axis=0) - X_train.min(axis=0))
X_test_scaled = (X_test - X_train.min(axis=0)) / (X_train.max(axis=0) - X_train.min(axis=0))

print("X_train_scaled:\n", X_train_scaled)
print("X_test_scaled:\n", X_test_scaled)

X_train_scaled:
 [[0.76470588 0.53571429]
 [0.29411765 0.35714286]
 [1.         1.        ]
 [0.         0.        ]]
X_test_scaled:
 [[2.05882353 1.25      ]]


### NumPy for Deep Learning

In [8]:
# Single Neuron (Forward Pass)

import numpy as np

# Inputs (3 features)
X = np.array([0.5, 0.3, 0.2])

# Weights (randomly initialized)
W = np.array([0.4, 0.7, 0.2])

# Bias
b = 0.1

# Forward pass (linear combination)
Z = np.dot(X, W) + b
print("Linear Output (Z):", Z)

# Activation function (sigmoid)
A = 1 / (1 + np.exp(-Z))
print("Activation Output (A):", A)


# Explanation
# Z = X·W + b → linear combination
# sigmoid(Z) → squashes output to 0-1 for probability

Linear Output (Z): 0.55
Activation Output (A): 0.6341355910108007


In [9]:
# Sigmoid Function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Used for binary classification.

In [10]:
# Compute Loss (Binary Cross-Entropy)
y_true = 1  # actual label

# Loss: -[y*log(A) + (1-y)*log(1-A)]
loss = - (y_true * np.log(A) + (1 - y_true) * np.log(1 - A))
print("Loss:", loss)

Loss: 0.45549248146333754


In [11]:
# Backpropagation (Gradient w.r.t Weights)
# derivative of loss w.r.t activation
dA = A - y_true

# derivative w.r.t weights
dW = dA * X
db = dA

print("dW:", dW)
print("db:", db)

# These are the gradients used to update weights.

dW: [-0.1829322  -0.10975932 -0.07317288]
db: -0.36586440898919925


In [12]:
# Update Weights (Gradient Descent Step)

learning_rate = 0.1

W = W - learning_rate * dW
b = b - learning_rate * db

print("Updated Weights:", W)
print("Updated Bias:", b)

Updated Weights: [0.41829322 0.71097593 0.20731729]
Updated Bias: 0.13658644089891994


In [13]:
# Training Loop Example

# Dataset: 4 samples, 3 features
X_train = np.array([
    [0.5, 0.3, 0.2],
    [0.9, 0.1, 0.4],
    [0.2, 0.7, 0.5],
    [0.6, 0.8, 0.9]
])
y_train = np.array([1, 0, 1, 0])

# Initialize weights and bias
np.random.seed(42)
W = np.random.rand(3)
b = 0

# Learning rate
lr = 0.1

# Training loop
for epoch in range(100):
    for X, y in zip(X_train, y_train):
        # Forward
        Z = np.dot(X, W) + b
        A = 1 / (1 + np.exp(-Z))
        # Backward
        dA = A - y
        dW = dA * X
        db = dA
        # Update
        W -= lr * dW
        b -= lr * db

print("Trained Weights:", W)
print("Trained Bias:", b)

Trained Weights: [-1.98365224  1.18801753 -1.78202072]
Trained Bias: 1.1905794713600621


In [14]:
# Practice Cell

# Test neuron on new sample

X_new = np.array([0.4, 0.6, 0.1])
Z = np.dot(X_new, W) + b
A = 1 / (1 + np.exp(-Z))
print("Neuron Output:", A)

Neuron Output: 0.717425528965057


### Small Neural Network (2 Layers) in NumPy

In [16]:
# Dataset (Toy Example)
import numpy as np

# Input features (4 samples, 2 features)
X = np.array([
    [0, 0],
    [0, 1],
    [1, 0],
    [1, 1]
])

# Labels (XOR problem)
y = np.array([[0], [1], [1], [0]])

In [17]:
# Network Architecture

# Input layer: 2 neurons
# Hidden layer: 2 neurons (sigmoid activation)
# Output layer: 1 neuron (sigmoid activation)

In [21]:
# Initialize Weights and Biases

np.random.seed(42)

# Hidden layer
W1 = np.random.rand(2, 2)   # shape: (input_features, hidden_neurons)
b1 = np.zeros((1, 2))       # shape: (1, hidden_neurons)

# Output layer
W2 = np.random.rand(2, 1)   # shape: (hidden_neurons, output_neurons)
b2 = np.zeros((1, 1))       # shape: (1, output_neurons)

print(W1)
print(W2)
print(b1)
print(b2)

[[0.37454012 0.95071431]
 [0.73199394 0.59865848]]
[[0.15601864]
 [0.15599452]]
[[0. 0.]]
[[0.]]


In [22]:
# Activation Function (Sigmoid)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def sigmoid_derivative(a):
    return a * (1 - a)

In [23]:
# Forward Pass

# Hidden layer
Z1 = X @ W1 + b1      # shape: (4, 2)
A1 = sigmoid(Z1)

# Output layer
Z2 = A1 @ W2 + b2     # shape: (4, 1)
A2 = sigmoid(Z2)

print("Output before training:\n", A2)

Output before training:
 [[0.53892274]
 [0.55132394]
 [0.5510619 ]
 [0.56117033]]


In [24]:
# Compute Loss (Binary Cross-Entropy)

loss = -np.mean(y * np.log(A2) + (1 - y) * np.log(1 - A2))
print("Loss:", loss)

Loss: 0.6972936116051336


In [25]:
# Backpropagation (Vectorized)

# Output layer error
dZ2 = A2 - y          # shape: (4,1)
dW2 = A1.T @ dZ2 / X.shape[0]
db2 = np.sum(dZ2, axis=0, keepdims=True) / X.shape[0]

# Hidden layer error
dA1 = dZ2 @ W2.T
dZ1 = dA1 * sigmoid_derivative(A1)
dW1 = X.T @ dZ1 / X.shape[0]
db1 = np.sum(dZ1, axis=0, keepdims=True) / X.shape[0]

In [26]:
# Update Weights (Gradient Descent)
learning_rate = 1.0

W1 -= learning_rate * dW1
b1 -= learning_rate * db1
W2 -= learning_rate * dW2
b2 -= learning_rate * db2

In [27]:
# Training Loop

for epoch in range(10000):
    # Forward
    Z1 = X @ W1 + b1
    A1 = sigmoid(Z1)
    Z2 = A1 @ W2 + b2
    A2 = sigmoid(Z2)

    # Compute Loss
    loss = -np.mean(y * np.log(A2) + (1 - y) * np.log(1 - A2))

    # Backprop
    dZ2 = A2 - y
    dW2 = A1.T @ dZ2 / X.shape[0]
    db2 = np.sum(dZ2, axis=0, keepdims=True) / X.shape[0]

    dA1 = dZ2 @ W2.T
    dZ1 = dA1 * sigmoid_derivative(A1)
    dW1 = X.T @ dZ1 / X.shape[0]
    db1 = np.sum(dZ1, axis=0, keepdims=True) / X.shape[0]

    # Update
    W1 -= learning_rate * dW1
    b1 -= learning_rate * db1
    W2 -= learning_rate * dW2
    b2 -= learning_rate * db2

print("Trained Output:\n", A2)

Trained Output:
 [[1.26177033e-03]
 [9.99120980e-01]
 [9.99120700e-01]
 [9.02227148e-04]]


In [28]:
# Practice Cell — Test New Inputs

# Predict on new inputs
X_new = np.array([
    [0,0],
    [0,1],
    [1,0],
    [1,1]
])

Z1 = X_new @ W1 + b1
A1 = sigmoid(Z1)
Z2 = A1 @ W2 + b2
A2 = sigmoid(Z2)

print("Predictions:\n", A2)

Predictions:
 [[1.26163789e-03]
 [9.99121073e-01]
 [9.99120794e-01]
 [9.02130201e-04]]


### NumPy Tricks & Tips for ML/DL

In [31]:
# Vectorize Everything (No Loops): Performing operations on entire arrays at once, without writing explicit Python loops.
# ML/DL is all matrix math → vectorize to speed up training.
# Instead of Python loops:

import numpy as np

X = np.array([[1,2],[3,4],[5,6]])
W = np.array([0.1,0.2])

# Loop (slow)
Y_loop = np.zeros(X.shape[0])
for i in range(X.shape[0]):
    Y_loop[i] = np.dot(X[i], W)

# Vectorized (fast)
Y_vec = X @ W

print(Y_loop)
print(Y_vec)

[0.5 1.1 1.7]
[0.5 1.1 1.7]


In [32]:
# Batch Processing (Mini-Batches)
# Dataset
X = np.arange(20).reshape(10,2)  # 10 samples, 2 features
batch_size = 3

for i in range(0, X.shape[0], batch_size):
    X_batch = X[i:i+batch_size]
    print("Batch:\n", X_batch)

# Used in mini-batch gradient descent for neural networks.

Batch:
 [[0 1]
 [2 3]
 [4 5]]
Batch:
 [[ 6  7]
 [ 8  9]
 [10 11]]
Batch:
 [[12 13]
 [14 15]
 [16 17]]
Batch:
 [[18 19]]


In [33]:
# Fancy Indexing for Samples & Features

X = np.arange(16).reshape(4,4)

# Select first and last row
rows = [0,3]
X[rows]

# Select first and third column
cols = [0,2]
X[:, cols]

# ML usage: select specific features or samples efficiently.

array([[ 0,  2],
       [ 4,  6],
       [ 8, 10],
       [12, 14]])

In [34]:
# Boolean Indexing (Advanced)

X = np.array([
    [60,160],
    [70,175],
    [80,180],
    [50,150]
])

# Select rows where weight > 65 AND height > 160
mask = (X[:,0] > 65) & (X[:,1] > 160)
X_filtered = X[mask]
print(X_filtered)

# ML usage: filtering datasets, removing outliers.

[[ 70 175]
 [ 80 180]]


In [36]:
# Avoid Loops in Loss & Gradients

# Instead of:

# y_true, y_pred = arrays of length n
y_true = np.array([3, 5, 7])
y_pred = np.array([2.5, 5.5, 6.8])

loss = 0
for i in range(len(y_true)):
    loss += (y_true[i] - y_pred[i])**2
loss /= len(y_true)
print(f"Loop-based MSE: {loss}")

# Do vectorized:
loss_vectorized = np.mean((y_true - y_pred)**2)
print(f"Vectorized MSE: {loss_vectorized}")

# Critical for training big datasets efficiently.

Loop-based MSE: 0.18000000000000002
Vectorized MSE: 0.18000000000000002


In [37]:
# Broadcasting in Neural Networks
X = np.array([[1,2],[3,4],[5,6]])
bias = np.array([0.5,1.0])

Z = X + bias  # Automatically added to each row
print(Z)

# This is exactly how biases are added in layers.

[[1.5 3. ]
 [3.5 5. ]
 [5.5 7. ]]


In [38]:
# Random Mini-Batches (Training Loop)

np.random.seed(42)
X = np.arange(20).reshape(10,2)
y = np.arange(10)

# Shuffle
perm = np.random.permutation(X.shape[0])
X, y = X[perm], y[perm]

# Generate mini-batches
batch_size = 3
for i in range(0, X.shape[0], batch_size):
    X_batch = X[i:i+batch_size]
    y_batch = y[i:i+batch_size]
    print("Batch X:\n", X_batch, "\nBatch y:\n", y_batch)

# Real ML usage: stochastic gradient descent.

Batch X:
 [[16 17]
 [ 2  3]
 [10 11]] 
Batch y:
 [8 1 5]
Batch X:
 [[ 0  1]
 [14 15]
 [ 4  5]] 
Batch y:
 [0 7 2]
Batch X:
 [[18 19]
 [ 8  9]
 [ 6  7]] 
Batch y:
 [9 4 3]
Batch X:
 [[12 13]] 
Batch y:
 [6]


In [39]:
# Random Weight Initialization (Deep Learning)

# Xavier initialization for hidden layer of 4 neurons
input_dim = 3
hidden_dim = 4

W = np.random.randn(input_dim, hidden_dim) * np.sqrt(1/input_dim)
print(W)

# Proper initialization avoids vanishing/exploding gradients.

[[ 0.16110457  0.58342127 -0.33537015 -0.30320693]
 [-0.32988649 -0.53351948 -1.50835588  0.54869619]
 [ 0.47137479 -0.87981022 -0.24713251 -0.42862879]]


In [40]:
# Avoid Memory Copies When Possible

X = np.arange(10)
Y = X  # No copy, same memory
Y[0] = 99
print(X)  # X is also changed

# Use .copy() if you want independent arrays.

[99  1  2  3  4  5  6  7  8  9]


In [41]:
# Practice Cell — ML/DL Tricks

import numpy as np

# Dataset
X = np.arange(20).reshape(10,2)
y = np.arange(10)

# Shuffle
np.random.seed(42)
perm = np.random.permutation(X.shape[0])
X, y = X[perm], y[perm]

# Mini-batch gradient example
batch_size = 4
for i in range(0, X.shape[0], batch_size):
    X_batch = X[i:i+batch_size]
    y_batch = y[i:i+batch_size]
    print("Batch X:\n", X_batch, "\nBatch y:\n", y_batch)

Batch X:
 [[16 17]
 [ 2  3]
 [10 11]
 [ 0  1]] 
Batch y:
 [8 1 5 0]
Batch X:
 [[14 15]
 [ 4  5]
 [18 19]
 [ 8  9]] 
Batch y:
 [7 2 9 4]
Batch X:
 [[ 6  7]
 [12 13]] 
Batch y:
 [3 6]


## Essential NumPy Operations

In [42]:
# Arithmetic Operations (Element-wise)

import numpy as np

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

print("Addition:", a + b)
print("Subtraction:", a - b)
print("Multiplication:", a * b)
print("Division:", a / b)
print("Power:", a ** 2)
print("Square root:", np.sqrt(a))


# Used for feature scaling, forward pass, activations.

Addition: [5 7 9]
Subtraction: [-3 -3 -3]
Multiplication: [ 4 10 18]
Division: [0.25 0.4  0.5 ]
Power: [1 4 9]
Square root: [1.         1.41421356 1.73205081]


In [43]:
# Aggregate Functions

a = np.array([[1,2,3],[4,5,6]])

print("Sum:", np.sum(a))
print("Sum axis=0 (columns):", np.sum(a, axis=0))
print("Sum axis=1 (rows):", np.sum(a, axis=1))

print("Mean:", np.mean(a))
print("Max:", np.max(a))
print("Min:", np.min(a))
print("Standard deviation:", np.std(a))
print("Variance:", np.var(a))

# Used in normalization, statistics, and batch operations.

Sum: 21
Sum axis=0 (columns): [5 7 9]
Sum axis=1 (rows): [ 6 15]
Mean: 3.5
Max: 6
Min: 1
Standard deviation: 1.707825127659933
Variance: 2.9166666666666665


In [44]:
# Comparison Operations
a = np.array([1,2,3,4,5])

print(a > 3)   # [False False False  True  True]
print(a < 4)   # [ True  True  True  True False]
print(a == 2)  # [False  True False False False]

# Used in Boolean masking for filtering and label creation.

[False False False  True  True]
[ True  True  True False False]
[False  True False False False]


In [45]:
# Logical Operations

a = np.array([1,2,3,4,5])

print(np.logical_and(a > 2, a < 5))  # [False False  True  True False]
print(np.logical_or(a < 2, a > 4))   # [ True False False False  True]
print(np.logical_not(a == 3))        # [ True  True False  True  True]


# Very useful for conditional filtering in datasets.

[False False  True  True False]
[ True False False False  True]
[ True  True False  True  True]


In [46]:
# Sorting & Searching

a = np.array([3, 1, 4, 2, 5])

print("Sorted:", np.sort(a))
print("Indices to sort:", np.argsort(a))
print("Max value index:", np.argmax(a)) # important
print("Min value index:", np.argmin(a))

# Used to find top-k values, sorting features, or predictions.

Sorted: [1 2 3 4 5]
Indices to sort: [1 3 0 2 4]
Max value index: 4
Min value index: 1


In [47]:
# Stacking & Concatenation

a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])

# Vertical stack
print(np.vstack((a,b)))

# Horizontal stack
print(np.hstack((a,b)))

# Concatenate along axis
print(np.concatenate((a,b), axis=0))  # vertical
print(np.concatenate((a,b), axis=1))  # horizontal

# ML usage: combine features, merge datasets, batch samples.

[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]
[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]


In [48]:
# Reshape & Flatten

a = np.arange(12)
print("Original:", a)
print("Reshape (3x4):\n", a.reshape(3,4))
print("Flatten:", a.reshape(-1))

# Shape manipulation is critical for ML & DL models.

Original: [ 0  1  2  3  4  5  6  7  8  9 10 11]
Reshape (3x4):
 [[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
Flatten: [ 0  1  2  3  4  5  6  7  8  9 10 11]


In [49]:
# Transpose & Dot Product

a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])

print("Transpose of a:\n", a.T)
print("Dot product:\n", a @ b)
print("Using np.dot:", np.dot(a,b))

# Core linear algebra for forward pass, backprop, and linear regression.

Transpose of a:
 [[1 3]
 [2 4]]
Dot product:
 [[19 22]
 [43 50]]
Using np.dot: [[19 22]
 [43 50]]


In [50]:
# Random Operations

np.random.seed(42)

print("Random integers 0-9:", np.random.randint(0,10,5))
print("Random floats 0-1:", np.random.rand(5))
print("Random normal:", np.random.randn(5))

# Shuffle array
arr = np.arange(10)
np.random.shuffle(arr)
print("Shuffled:", arr)

# Permutation
perm = np.random.permutation(10)
print("Permutation:", perm)

# Used in train-test split, SGD, weight initialization.

Random integers 0-9: [6 3 7 4 6]
Random floats 0-1: [0.44583275 0.09997492 0.45924889 0.33370861 0.14286682]
Random normal: [-0.4826188   0.16416482  0.23309524  0.11799461  1.46237812]
Shuffled: [9 5 6 4 3 1 7 2 0 8]
Permutation: [1 8 7 5 4 9 0 3 6 2]


In [51]:
# Advanced Aggregation (Axis Operations)

X = np.array([[1,2,3],[4,5,6],[7,8,9]])

print("Sum columns:", np.sum(X, axis=0))
print("Sum rows:", np.sum(X, axis=1))
print("Max per row:", np.max(X, axis=1))
print("Min per column:", np.min(X, axis=0))

# Always check axis parameter when aggregating features.

Sum columns: [12 15 18]
Sum rows: [ 6 15 24]
Max per row: [3 6 9]
Min per column: [1 2 3]


In [52]:
# Practice Cell — Operations Combined

import numpy as np

# Dataset
X = np.array([[10,20,30],[40,50,60],[70,80,90]])

# 1. Sum of each row
print("Row sums:", np.sum(X, axis=1))

# 2. Mean of each column
print("Column means:", np.mean(X, axis=0))

# 3. Boolean mask: select values > 50
print("Values > 50:", X[X>50])

# 4. Add 10 to all elements (broadcast)
print("Add 10:", X + 10)

# 5. Flatten dataset
print("Flattened:", X.flatten())

# 6. Shuffle rows
np.random.seed(42)
np.random.shuffle(X)
print("Shuffled X:\n", X)

Row sums: [ 60 150 240]
Column means: [40. 50. 60.]
Values > 50: [60 70 80 90]
Add 10: [[ 20  30  40]
 [ 50  60  70]
 [ 80  90 100]]
Flattened: [10 20 30 40 50 60 70 80 90]
Shuffled X:
 [[10 20 30]
 [40 50 60]
 [70 80 90]]
