In [38]:
"""
Name - Matrikelnummer 
1) Pham, Ngoc Anh Trung - 7176267
2) Viktor Vironski - 4330455
3) Andy Disser - 5984875

Exercise Sheet 2
"""

from random import random
from sklearn import cluster, datasets
from mpl_toolkits.axes_grid1 import make_axes_locatable
import numpy as np
import copy
import matplotlib.pyplot as plt
import matplotlib as mpl


def softmargin(data, labels, supportvec):
    """
    Compute the soft margin score

    Inputs:
    - data: the given data
    - labels: the given labels
    - supportvec: the given indices of support vectors

    Output:
    - the soft margin score ( # misclassification * (-margin) )
    """

    # Left and right support vectors
    left = data[supportvec[0]]
    right = data[supportvec[1]]

    # Compute threshold
    th = np.abs(np.divide(left+right,2))

    # Compute margin
    margin = np.abs(left-th)

    # Miss classification for label 1 when they are on the right side of the threshold
    missclassification1 = np.logical_and((data-th > 0), (labels == 0))
    # Miss classification for label 2 when they are on the left side of the threshold
    missclassification2 = np.logical_and((data-th < 0), (np.logical_or(labels == 1,\
         labels == 2)))

    # Miss classified when only one of the two criterion above is satisfied
    missclassification = np.logical_xor(missclassification1, missclassification2)

    # The number of missclassified points
    num_of_missclass = np.size(data[missclassification])

    # Weight each of the miss classified with negative margin and
    # return the sum as soft margin score
    return np.multiply(num_of_missclass, (-margin))

In [57]:
# Import and access the data
iris = datasets.load_iris()
iris_data = iris['data']

n = iris_data.shape[0]

# Extract the petal widths
data = iris_data[:, 3]

# Create permutation of 0,1,..,149
permutation = np.random.permutation(np.arange(150))
train_filter = permutation < 100
test_filter = permutation >= 100

# Split the data, 100 samples for training, 50 samples for testing
train_data = data[train_filter]
test_data = data[test_filter]

# The respective labels
train_target = iris.target[train_filter]
test_target = iris.target[test_filter]

# ============= 1b =================================

# Repeat 20 times
i = 0
best_margin_score = -np.inf
best_supp_vec_indices = np.zeros(2)
# The indices of the train data
indices = np.arange(100)
while i < 20:

    # Pick 2 random point, one from 1 class and the other from other class
    supp_vec_left = np.random.choice(indices[train_target == 0], 1, replace=False)
    supp_vec_right = np.random.choice(indices[np.logical_or(train_target == 1, train_target == 2)], 1, replace=False)

    supp_vec_indices = np.array([supp_vec_left, supp_vec_right])

    # Compute score
    score = softmargin(train_data, train_target, supp_vec_indices)

    # Compare score
    if score > best_margin_score:
        best_margin_score = score
        best_supp_vec_indices = supp_vec_indices

    i += 1

# ============ 1b Test the data ========================================

# Compute threshold
th = np.divide(train_data[best_supp_vec_indices[0]]+train_data[best_supp_vec_indices[1]], 2)

# filter the positions that the algorithm predicted the point belongs to class/label 1
predicted_label1 = th-test_data > 0

# filter the positions that the algorithm predicted the point belongs to class/label 2
predicted_label2 = th-test_data <= 0

truth_label1 = test_target == 0
truth_label2 = np.logical_or(test_target == 1, test_target == 2)

correctly_classified_1 = np.logical_and(predicted_label1, truth_label1)
correctly_classified_2 = np.logical_and(predicted_label2, truth_label2)

correct_classified_test_data = test_data[np.logical_or(correctly_classified_1, correctly_classified_2)]

print("Accuracy:", np.divide(np.size(correct_classified_test_data), np.size(test_data)))

Accuracy: 1.0
