In [2]:
# Imports and Consts
SEED = 42 # For determnistic testing
import numpy as np
from util import get_data

  from .autonotebook import tqdm as notebook_tqdm


In [73]:
# Read data and construct data sets
data = get_data()
data = data[1:]
data = data.astype(float)
xs = data[:, :-1]
ys = data[:, -1]
print(xs.shape)

(768, 8)


In [74]:
# Normalize Data
xs = xs / np.linalg.norm(xs, axis=0, keepdims=True)
xs = (xs - np.mean(xs, axis=0)) / np.std(xs, axis=0)

# Add column to account for const B value in training
ones = [[1.0]] * len(xs)
xs = np.append(xs, ones, axis=1)

# Shuffle
np.random.seed(42)
idxs = np.random.permutation(len(xs))

xs = xs[idxs]
ys = ys[idxs]

# Split Data
x_train = xs[:614]
y_train = ys[:614]
x_dev = xs[614:691]
y_dev = ys[614:691]
x_test = xs[691:]
y_test = ys[691:]


In [84]:
# Helpers

def Pr(arr: np.ndarray, param: np.ndarray) -> np.ndarray:
    z = arr @ param
    z_clipped = np.clip(z, -500, +500)
    return 1.0 / (1.0 + np.exp(-z_clipped))

def construct_w(p: np.ndarray) -> np.ndarray:
    return np.diag((1 - p) * p)

def compute_delta(X: np.ndarray, diag: np.ndarray, Y: np.ndarray, p: np.ndarray) -> np.ndarray:
    H = X.T @ diag @ X + 1e-2 * np.diag(np.ones(X.shape[1]))
    B = X.T @ (Y-p)

    # Speed up computation via decomposition
    L = np.linalg.cholesky(H)
    
    # Solve first only with L
    temp = np.linalg.solve(L, B)

    # Return final solution by solving for temp with L.T
    return np.linalg.solve(L.T, temp)

def loglik(p, y):
    ll = np.sum(y * np.log(p + 1e-15) + (1 - y) * (1 - np.log(p + 1e-15)))
    return ll

def scale_const(X, Y, p_old, theta, delta, alpha):
    ll_old = loglik(p_old, Y)

    while True:
        p_new = Pr(X, theta + alpha * delta)
        ll_new = loglik(p_new, Y)
        if ll_new < ll_old:
            alpha *= 0.5
        if alpha < 1e-8:
            return None

In [None]:
# Model Training

def newton_method(X, Y, iters=1000):
    # Initialize Theta
    np.random.seed(42)
    theta = np.random.randn(xs.shape[1])

    alpha = 1.0
    for _ in range(iters):
        # Calc Values
        p = Pr(X, theta)
        W = construct_w(p)
        delta = compute_delta(X, W, Y, p)

        # Prevent exploding step
        alpha = scale_const(X, Y, p, theta, delta, alpha)

        # If next step would lower LL
        if alpha is None:
            return theta
        else:
            # Apply Newton method
            theta += alpha * delta
    return theta


np.float64(2.5333546236961806)

In [None]:
# Predictor

def log_regression(X, theta):
    logits = X @ theta

    # Calculate probability of a 1
    probs = 1.0 / (1.0 + np.exp(logits))

    # Round to get preds
    return np.round(probs)