PART 1

In [1]:
import numpy as np
import matplotlib.pyplot as plt


In [4]:
#using the sigmoid fun
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [5]:
def compute_cost(X, y, w, b):
    m = X.shape[0]
    z = np.dot(X, w) + b
    y_hat = sigmoid(z)

    cost = -(1/m) * np.sum(
        y * np.log(y_hat + 1e-9) + (1 - y) * np.log(1 - y_hat + 1e-9)
    )
    return cost


In [6]:
def gradient_descent(X, y, w, b, lr, epochs):
    m = X.shape[0]
    costs = []

    for _ in range(epochs):
        z = np.dot(X, w) + b
        y_hat = sigmoid(z)

        dw = (1/m) * np.dot(X.T, (y_hat - y))
        db = (1/m) * np.sum(y_hat - y)

        w -= lr * dw
        b -= lr * db

        cost = compute_cost(X, y, w, b)
        costs.append(cost)

    return w, b, costs


In [7]:
# binary data
X = np.array([[1], [2], [3], [4], [5]])
y = np.array([0, 0, 0, 1, 1])

w = np.zeros(X.shape[1])
b = 0
lr = 0.1
epochs = 1000

w, b, costs = gradient_descent(X, y, w, b, lr, epochs)

print("Final weight:", w)
print("Final bias:", b)


Final weight: [1.76904586]
Final bias: -5.956350539118638


In [None]:
def predict(X, w, b):
    probs = sigmoid(np.dot(X, w) + b)
    return (probs >= 0.5).astype(int)

predictions = predict(X, w, b)
print("Predictions:", predictions)

PART 2

In [8]:
def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


In [9]:
def one_hot(y, num_classes):
    m = y.shape[0]
    one_hot_y = np.zeros((m, num_classes))
    one_hot_y[np.arange(m), y] = 1
    return one_hot_y


In [10]:
def compute_cost_softmax(X, y, W, b):
    m = X.shape[0]
    z = np.dot(X, W) + b
    y_hat = softmax(z)
    cost = -(1/m) * np.sum(y * np.log(y_hat + 1e-9))
    return cost


In [11]:
def gradient_descent_softmax(X, y, W, b, lr, epochs):
    m = X.shape[0]
    costs = []

    for _ in range(epochs):
        z = np.dot(X, W) + b
        y_hat = softmax(z)

        dW = (1/m) * np.dot(X.T, (y_hat - y))
        db = (1/m) * np.sum(y_hat - y, axis=0)

        W -= lr * dW
        b -= lr * db

        cost = compute_cost_softmax(X, y, W, b)
        costs.append(cost)

    return W, b, costs


In [12]:
# Sample data (3 classes)
X = np.array([[1,2],
              [1,3],
              [2,1],
              [3,1],
              [3,3]])

y = np.array([0, 1, 2, 2, 1])
num_classes = 3

y_onehot = one_hot(y, num_classes)

W = np.zeros((X.shape[1], num_classes))
b = np.zeros(num_classes)

lr = 0.1
epochs = 1000

W, b, costs = gradient_descent_softmax(X, y_onehot, W, b, lr, epochs)

print("Final weights:\n", W)
print("Final bias:\n", b)


Final weights:
 [[-1.65027183 -0.53882342  2.18909525]
 [ 0.44650664  2.48233898 -2.92884562]]
Final bias:
 [ 2.3807574  -3.43799431  1.05723692]


In [13]:
def predict_softmax(X, W, b):
    z = np.dot(X, W) + b
    y_hat = softmax(z)
    return np.argmax(y_hat, axis=1)

predictions = predict_softmax(X, W, b)
print("Predicted classes:", predictions)


Predicted classes: [0 1 2 2 1]
