In [29]:
import numpy as np

def hinge_loss_single(feature_vector, label, theta, theta_0):
    """
    Finds the hinge loss on a single data point given specific classification
    parameters.

    Args:
        feature_vector - A numpy array describing the given data point.
        label - A real valued number, the correct classification of the data
            point.
        theta - A numpy array describing the linear classifier.
        theta_0 - A real valued number representing the offset parameter.


    Returns: A real number representing the hinge loss associated with the
    given data point and parameters.
    """
    if label*(np.dot(feature_vector, theta)+theta_0)>= 1:
        return 0
    else:
        return round(1 - label*(np.dot(feature_vector, theta)+theta_0), 7)
    
    raise NotImplementedError

In [22]:
def txt_to_array(txt):
    return np.array(list(map(float, txt.strip().split())))

In [23]:
feature_vector = txt_to_array("0.69801826 0.37150353 0.4152274  0.35499628 0.33309134 0.2658746 0.76601011 0.10270042 0.79255585 0.5769108")
theta = txt_to_array("0.07163136 0.13458822 0.12041594 0.14084655 0.15010898 0.18805858 0.06527329 0.48685293 0.06308704 0.08666851")
label =  1.0
theta_0 = 0.5

print(f'Hinge loss: {hinge_loss_single(feature_vector, label, theta, theta_0)}')

Hinge loss: 0.0


In [24]:
feature_vector = txt_to_array("7.56950358 7.63408997 8.02775173 4.37155027 5.40344107 9.57257264 3.73204157 7.20681076 8.72562079 5.50699167")
theta = txt_to_array("5.39244905 6.61170434 6.09357489 4.16775849 8.62866233 1.44394045 2.94864302 9.22016042 7.70625316 7.18008263")
label =  1.0
theta_0 = 1.0

print(f'Hinge loss: {hinge_loss_single(feature_vector, label, theta, theta_0)}')

Hinge loss: 0


In [25]:
feature_vector = txt_to_array("-0.68989342 -0.04841088 -0.94076226 -0.06567371 -0.75799446 -0.75960207 -0.56833799 -0.11542343 -0.0039711  -0.85949969")
theta = txt_to_array("0.56970978 0.75223352 0.38915047 0.40603102 0.00331865 0.65408066 0.95955458 0.9043673  0.24347193 0.75571762")
label =  1.0
theta_0 = 0.660691618633547

print(f'Hinge loss: {hinge_loss_single(feature_vector, label, theta, theta_0)}')

Hinge loss: 2.9611263


In [26]:
feature_vector = txt_to_array("0.68575596 0.26223156 0.3392823  0.5551106  0.04645536 0.10512147 0.49919781 0.32670615 0.01588852 0.77075027")
theta = txt_to_array("0.99738146  0.70095016 -0.42562229 -0.93624678 -0.86947 0.27811091 -0.20921857 -0.17086613  0.99412545  0.63446338")
label =  1.0
theta_0 = 0

print(f'Hinge loss: {hinge_loss_single(feature_vector, label, theta, theta_0)}')

Hinge loss: 0.4629677


$Loss_H = \frac{1}{n}\sum Loss_h(y^{(i)}\cdot(\theta\cdot x^{(i)} + \theta_0))$

In [30]:
def hinge_loss_full(feature_matrix, labels, theta, theta_0):
    """
    Finds the total hinge loss on a set of data given specific classification
    parameters.

    Args:
        feature_matrix - A numpy matrix describing the given data. Each row
            represents a single data point.
        labels - A numpy array where the kth element of the array is the
            correct classification of the kth row of the feature matrix.
        theta - A numpy array describing the linear classifier.
        theta_0 - A real valued number representing the offset parameter.


    Returns: A real number representing the hinge loss associated with the
    given dataset and parameters. This number should be the average hinge
    loss across all of the points in the feature matrix.
    """
    errors = 0
    for i, j in zip(feature_matrix, labels):
        if j*(np.dot(i, theta)+theta_0)>= 1:
            errors += 0
        else:
            errors += round(1 - j*(np.dot(i, theta)+theta_0), 7)
    
    return errors/np.shape(feature_matrix)[0]
    
    raise NotImplementedError