# No Regularization

In [1]:
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('ggplot')

In [2]:
data = np.loadtxt('data/ex2data2.txt', delimiter=',', dtype=np.float64)
X, y = data[:, :-1], data[:, -1].reshape((-1, 1))   # In data, first 2 cols are coordinates, last col is label (0 or 1). Points are distributed in a circular manner, where inner clircle is labeled 0 and outer circle is labeled 1.

FileNotFoundError: data/ex2data2.txt not found.

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='bwr', edgecolors='k')

In [4]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [5]:
def loss(theta, X, y):
    h = sigmoid(np.dot(x, theta))
    cos = -(np.sum(y * np.log(h)) + np.sum((1 - y) * np.log(1 - h))) / len(y)
    return cos

In [6]:
def gradient(theta, X, y):
    h = sigmoid(np.dot(X, theta))
    grad = np.dot(X.T, (h - y)) / len(y)
    return grad

#### The Featurization Trick

In [8]:
def expand_feature(x1, x2, power = 2):
    # Expand a 2D feature matrix to polynomial features up to the power
    new_X = np.ones((x1.shape[0], 1))   # Add a column of ones for bias term
    for i in range(1, power + 1):
        for j in range(i + 1):
            new_feature = (x1 ** (i - j)) * (x2 ** j)
            new_X = np.hstack((new_X, new_feature.reshape((-1, 1))))    # np.append(new_X, (x1**(i-j)*(x2**j)).reshape((-1,1)), axis=1)
    return new_X

In [9]:
# This is a more general version of the above function that can handle any number of features and expand them to a given degree.

def expand_features(X, degree):
    from itertools import combinations_with_replacement
    n_samples, n_features = X.shape
    combos = list(combinations_with_replacement(range(n_features), degree))
    X_expanded = np.ones((n_samples, len(combos)))
    for i, combo in enumerate(combos):
        for index in combo:
            X_expanded[:, i] *= X[:, index]
    return X_expanded

In [10]:
def predict(theta, X):
    prob = sigmoid(np.dot(X, theta))
    return (prob >= 0.5).astype(int).flatten()

In [11]:
def gradient_descent(X, y, theta, alpha, num_iters):
    m = len(y)
    costs = []
    for _ in range(num_iters):
        h = sigmoid(np.dot(X, theta))
        theta -= (alpha / m) * np.dot(X.T, (h - y))
        costs.append(loss(theta, X, y))
    return theta, costs

In [12]:
# Implement Logistic Regression without Regularization
def logistic_regression_no_reg(X, y, power=2, alpha=0.01, num_iters=100):   # 3 hyper params: Learning rate APLHA, num of iterations, power of polynomial features
    X_expanded = expand_feature(X[:, 0], X[:, 1], power = power) # Expand features to given power
    initial_theta = np.zeros((X_expanded.shape[1], 1))  # Initialize theta
    theta, costs = gradient_descent(X_expanded, y, initial_theta, alpha, num_iters)
    predicted = predict(theta, X_expanded)
    accuracy = np.mean(predicted == y.flatten()) * 100
    print(f'Accuracy without regularization: {accuracy:.2f}%')
    return predicted, theta, costs
    

#### Decide the polynomial power in the features, and the number of iterations

In [13]:
power, num_iters, alpha = 20, 20000, 0.6
predicted, theta, costs = logistic_regression_no_reg(X, y, power=power, alpha=alpha, num_iters=num_iters)

NameError: name 'X' is not defined

In [14]:
'Theaccuracy is {.2f}%'.format(np.sum(predicted == y.flatten()) / len(y) * 100)

NameError: name 'predicted' is not defined

#### Visualize the classifier

In [15]:
u = np.linspace(np.min(X[:, 0]), max(X[:, 0]), 50)
v = np.linspace(np.min(X[:, 1]), max(X[:, 1]), 50)

z = np.zeros((len(u), len(v)))

for i in range(len(u)):
    for j in range(len(v)):
        features = expand_feature(np.array([u[i]]).reshape(1, -1), np.array([v[j]]).reshape(1, -1), power=power)
        z[i, j] = np.dot(features, theta)
z = z.T

plt.contour(u,v,z, [0, 0.01], cmap = 'Reds')
sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y.flatten())
plt.title('Logistic Regression without Regularization Decision Boundary')
plt.xlabel('Feature 1 aka X')
plt.ylabel('Feature 2 aka Y')

NameError: name 'X' is not defined

This is an example of Overfitted model. Try changing the order hyperparam and see that model is not getting overfitted. This will also compute optimal model in less time with same accuracy, making unnecessary increase in compute time in manifolds, and that too giving over-fitted model.

# With Regularization

In [17]:
def cost_with_reg(theta, X, y, lambda_ = 0):
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    theta1 = theta.copy()
    theta1[0] = 0  # Exclude bias term from regularization
    reg_term = (lambda_ / m) * np.sum(theta1 ** 2)  # Exclude bias term from regularization. ALSO, in another method, we divide by 2m instead of m. Since we are not using 2m, so we multiply by 2 in gradient calculation in grad_reg function below in the reg_term while calculating gradient.
    cost = -(np.sum(y * np.log(h)) + np.sum((1 - y) * np.log(1 - h))) / m + reg_term
    return cost

#### Regularization: Gradient

In [18]:
def grad_reg(theta, X, y, lambda_ = 0):
    m = len(y)
    h = sigmoid(np.dot(X, theta))
    theta1 = theta.copy()
    theta1[0] = 0  # Exclude bias term from regularization
    reg_term = (lambda_ / m) * theta1
    grad = (np.dot(X.T, (h - y)) / m) + (2 * reg_term)
    return grad

#### Regularization: Gradient Descent

In [20]:
def gradient_descent_reg(X, y, theta, alpha, num_iters = 100, lambda_ = 0):
    m = len(y)
    costs = []
    for _ in range(num_iters):
        h = sigmoid(np.dot(X, theta))
        theta1 = theta.copy()
        theta1[0] = 0  # Exclude(or, replacing) bias term from regularization
        theta -= alpha * grad_reg(theta1, X, y, lambda_)
        costs.append(cost_with_reg(theta1, X, y, lambda_))
    return theta, costs

#### Regularization: Logistic Regression

In [None]:
def logistic_regression_with_reg(X, y, power=2, alpha=0.01, num_iters=100, lambda_ = 0):   # 4 hyper params: Learning rate APLHA, num of iterations, power of polynomial features, regularization parameter lambda
    X_expanded = expand_feature(X[:, 0], X[:, 1], power = power) # Expand features to given power
    initial_theta = np.zeros((X_expanded.shape[1], 1), dtype = np.float64)  # Initialize theta
    theta, costs = gradient_descent_reg(X_expanded, y, initial_theta, alpha, num_iters, lambda_)
    predicted = predict(theta, X_expanded)
    accuracy = np.mean(predicted == y.flatten()) * 100
    print(f'Accuracy with regularization: {accuracy:.2f}%')
    return predicted, theta, costs

#### Regularization: Decide the polynomial feature, number of iterations, lambda

In [None]:
power, num_iters, alpha, lambda_ = 20, 20000, 0.6, 1
predicted, theta, costs = logistic_regression_with_reg(X, y, power=power, alpha=alpha, num_iters=num_iters, lambda_=lambda_)

In [None]:
'The accuracy is {.2f}%'.format(np.sum(predicted == y.flatten()) / len(y) * 100)

NameError: name 'predicted' is not defined

: 

#### Regularization: Visualize Results

In [None]:
u = np.linspace(np.min(X[:, 0]), np.max(X[:, 0]), 50)
v = np.linspace(np.min(X[:, 1]), np.max(X[:, 1]), 50)

z = np.zeros((len(u), len(v)))

for i in range(len(u)):
    for j in range(len(v)):
        features = expand_feature(np.array([u[i]]).reshape(1, -1), np.array([v[j]]).reshape(1, -1), power=power)
        z[i, j] = np.dot(features, theta)
z = z.T

plt.contour(u,v,z, [0, 0.01], cmap = 'Reds')
sns.scatterplot(x = X[:, 0], y = X[:, 1], hue = y.flatten())
plt.title('Logistic Regression with Regularization Decision Boundary')
plt.xlabel('Feature 1 aka X')
plt.ylabel('Feature 2 aka Y')

Play with different hyper parameters to observe the impact on accuracy and precision and model type(over|under-fitted)