In [49]:
import random
import sys
from typing import Callable, List

import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import time

In [36]:
class LogLinearModel:
    def __init__(
        self,
        feature_function: Callable,
        learning_rate: float,
        iterations: int,
        loss: Callable,
        gradient_loss: Callable,
        verbose: bool = False,
    ):
        """
        Parameters
        ---
        feature_function : Callable
            Feature function mapping from X x Y -> R^m
        learning_rate : float
            Learning rate parameter eta for gradient descent
        iterations : int
            Number of iterations to run gradient descent for during `fit`
        loss : Callable
            Loss function to be used by this LogLinearModel instance as
            a function of the parameters and the data X and y
        gradient_loss : Callable
            Closed form gradient of the `loss` function used for gradient descent as
            a function of the parameters and the data X and y
        verbose : bool
            Verbosity level of the class. If verbose == True,
            the class will print updates about the gradient
            descent steps during `fit`

        """
        self.feature_function = feature_function
        self.theta = None
        self.alpha = learning_rate
        self.iterations = iterations
        self.loss = loss
        self.gradient_loss = gradient_loss
        self.verbose = verbose

    def gradient_descent(self, X: np.ndarray, y: np.ndarray):
        """Performs one gradient descent step, and update parameters inplace.

        Parameters
        ---
        X : np.ndarray
            Data matrix
        y : np.ndarray
            Binary target values

        Returns
        ---
        None

        """
        n = X.shape[0]
        loss = 0
        for i in range(n):
            grad = self.gradient_loss(X[i], y[i], self.feature_function, self.theta)
            grad_max = max(max(grad), grad_max) if i else max(grad)
            self.theta -= self.alpha * grad
            loss += self.loss(X[i], y[i], self.feature_function, self.theta)
        return loss / n, grad_max

    def fit(self, X: np.ndarray, y: np.ndarray):
        """Fits LogLinearModel class using gradient descent.

        Parameters
        ---
        X : np.ndarray
            Input data matrix
        y : np.ndarray
            Binary target values

        Returns
        ---
        None

        """
        self.theta = np.zeros(2 * X.shape[1])
        for epoch in range(self.iterations):
            if self.verbose:
                print('step:', epoch+1, end=' ')
            loss, grad_max = self.gradient_descent(X, y)
            if self.verbose:
                print('loss:', loss, 'max theta step:', -self.alpha * grad_max)

    def predict(self, X: np.ndarray) -> np.ndarray:
        """Predicts binary target labels for input data `X`.

        Parameters
        ---
        X : np.ndarray
            Input data matrix

        Returns
        ---
        np.ndarray
            Predicted binary target labels

        """
        if self.theta is None:
            print('The model is not trained!')
            return;
        
        n = X.shape[0]
        result = np.zeros(n)
        for i in range(n):
            x_pos = self.feature_function(X[i], 1)
            x_neg = self.feature_function(X[i], 0)
            z_pos = np.dot(self.theta, x_pos)
            z_neg = np.dot(self.theta, x_neg)
            m = max(z_pos, z_neg)
            z_pos -= m
            z_neg -= m 
            y_pred = np.exp(z_pos) / (np.exp(z_pos) + np.exp(z_neg))
            result[i] = int(round(y_pred))
        
        return result
        

In [37]:
# Set seeds to ensure reproducibility
np.random.seed(42)
random.seed(42)

In [38]:
learning_rate = 0.001

def feature_function(x, y):
    features = x.shape[0] 
    feature_vector = np.zeros(2 * features)
    if y:
        feature_vector[:features] = x
    else:
        feature_vector[features:] = x 
    return feature_vector

def negative_log_likelihood(x, y, f, theta):
    x_pos = f(x, y)
    x_neg = f(x, 1-y)
    z_pos = np.dot(theta, x_pos)
    z_neg = np.dot(theta, x_neg)
    m = max(z_pos, z_neg)
    z_pos -= m
    z_neg -= m 
    loss = -z_pos + np.log(np.exp(z_pos) + np.exp(z_neg)) 
    return loss

def gradient_negative_log_likelihood(x, y, f, theta):
    x_pos = f(x, y)
    x_neg = f(x, 1-y)
    z_pos = np.dot(theta, x_pos)
    z_neg = np.dot(theta, x_neg)
    m = max(z_pos, z_neg)
    z_pos -= m
    z_neg -= m 
    y_pred = np.exp(z_pos) / (np.exp(z_pos) + np.exp(z_neg))
    return -x_pos + y_pred * x_pos

In [39]:
lr = LogisticRegression()

llm = LogLinearModel(
    feature_function=feature_function,
    learning_rate=learning_rate,                     
    iterations=100,
    loss=negative_log_likelihood,
    gradient_loss=gradient_negative_log_likelihood
)

In [58]:
# First dataset
# Fit both `lr` and your `llm` on this dataset and compare
# the aspects described in the assignment PDF
X, y = make_classification(
    n_samples=100, random_state=42, n_informative=20, n_features=20, n_redundant=0
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
custom_start = time.time()
llm.fit(X_train, y_train)
custom_end = time.time()
lr.fit(X_train, y_train)
sklearn_end = time.time()
print('Training time:')
print('custom:', custom_end-custom_start, 'sklearn:', sklearn_end - custom_end)

print('Train accuracy:')
print('custom:', accuracy_score(llm.predict(X_train), y_train), 'sklearn:', accuracy_score(lr.predict(X_train), y_train))

print('Test accuracy:')
print('custom:', accuracy_score(llm.predict(X_test), y_test), 'sklearn:', accuracy_score(lr.predict(X_test), y_test))

print('Coefficients:')
print('custom mean:', np.mean(llm.theta), 'custom std-dev:', np.std(llm.theta))
print('sklearn mean:', np.mean(lr.coef_), 'sklearn std-dev:', np.std(lr.coef_))

Training time:
custom: 0.2932131290435791 sklearn: 0.0060117244720458984
Train accuracy:
custom: 0.9125 sklearn: 0.925
Test accuracy:
custom: 0.6 sklearn: 0.6
Coefficients:
custom mean: 0.1089400096826735 custom std-dev: 0.5487270484780616
sklearn mean: 0.08479289560242767 sklearn std-dev: 0.29402006543764897


In [61]:
# Second dataset
# Fit both `lr` and your `llm` on this dataset and compare
# the aspects described in the assignment PDF
X, y = make_classification(
    n_samples=1000,
    random_state=42,
    n_informative=20,
    n_redundant=10,
    n_features=35,
    n_repeated=5,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
custom_start = time.time()
llm.fit(X_train, y_train)
custom_end = time.time()
lr.fit(X_train, y_train)
sklearn_end = time.time()
print('Training time:')
print('custom:', custom_end-custom_start, 'sklearn:', sklearn_end - custom_end)

print('Train accuracy:')
print('custom:', accuracy_score(llm.predict(X_train), y_train), 'sklearn:', accuracy_score(lr.predict(X_train), y_train))

print('Test accuracy:')
print('custom:', accuracy_score(llm.predict(X_test), y_test), 'sklearn:', accuracy_score(lr.predict(X_test), y_test))

print('Coefficients:')
print('custom mean:', np.mean(llm.theta), 'custom std-dev:', np.std(llm.theta))
print('sklearn mean:', np.mean(lr.coef_), 'sklearn std-dev:', np.std(lr.coef_))

Training time:
custom: 2.9820547103881836 sklearn: 0.012933731079101562
Train accuracy:
custom: 0.80375 sklearn: 0.8225
Test accuracy:
custom: 0.79 sklearn: 0.805
Coefficients:
custom mean: -0.9367362157037722 custom std-dev: 6.553943921618134
sklearn mean: 0.010623715299812767 sklearn std-dev: 0.1393127608154251


In [62]:
# Third dataset
# Fit both `lr` and your `llm` on this dataset and compare
# the aspects described in the assignment PDF
X, y = make_classification(
    n_samples=10000, random_state=42, n_informative=2, n_repeated=5
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
custom_start = time.time()
llm.fit(X_train, y_train)
custom_end = time.time()
lr.fit(X_train, y_train)
sklearn_end = time.time()
print('Training time:')
print('custom:', custom_end-custom_start, 'sklearn:', sklearn_end - custom_end)

print('Train accuracy:')
print('custom:', accuracy_score(llm.predict(X_train), y_train), 'sklearn:', accuracy_score(lr.predict(X_train), y_train))

print('Test accuracy:')
print('custom:', accuracy_score(llm.predict(X_test), y_test), 'sklearn:', accuracy_score(lr.predict(X_test), y_test))

print('Coefficients:')
print('custom mean:', np.mean(llm.theta), 'custom std-dev:', np.std(llm.theta))
print('sklearn mean:', np.mean(lr.coef_), 'sklearn std-dev:', np.std(lr.coef_))

Training time:
custom: 27.37222409248352 sklearn: 0.00794672966003418
Train accuracy:
custom: 0.889875 sklearn: 0.89125
Test accuracy:
custom: 0.891 sklearn: 0.8895
Coefficients:
custom mean: -0.4860381821977381 custom std-dev: 2.00379168759306
sklearn mean: 0.12850040103483315 sklearn std-dev: 0.28418480428248827
