# Import dependencies

In [2]:
import numpy as np

# Row vector

In [3]:
def rv(values_list):
    return np.array([values_list])


In [4]:
rv([1, 2, 3, 4])

array([[1, 2, 3, 4]])

# Column vector

In [5]:
def cv(values_list):
    return rv(values_list).T


In [6]:
cv([1, 2, 3, 4])

array([[1],
       [2],
       [3],
       [4]])

# Length

In [7]:
def length(col_v):
    return np.sqrt(np.sum(col_v * col_v))


In [8]:
length(cv([1, 2, 3, 4]))

5.477225575051661

# Normalize a column vector

In [9]:
def normalize(col_v):
    return col_v/length(col_v)

In [10]:
normalize(cv([1, 2, 3, 4]))

array([[0.18257419],
       [0.36514837],
       [0.54772256],
       [0.73029674]])

# Code for signed distance

In [118]:
def signed_distance(x, th, th0):
    return (np.dot(th.T, x)+th0)/length(th)
    

# Code for side of hyperplane

In [119]:
def positive(x, th, th0):
    return np.sign(signed_distance(x, th, th0))
    

# Test of Lenear classifier for (d = 1)

In [122]:
x = cv([2, -3])
th = cv([1, -1])
th0 = rv([-2])

In [123]:
positive(x, th, th0)

array([[1.]])

# Classification of a dataset - expressions operating on data (d = 2)

In [15]:
data = np.transpose(np.array([[1, 2], [1, 3], [2, 1], [1, -1], [2, -1]]))
labels = rv([-1, -1, +1, +1, +1])
data

array([[ 1,  1,  2,  1,  2],
       [ 2,  3,  1, -1, -1]])

In [124]:
th = np.array([[1], [1]])
th0 = np.array([[-2]])
A = positive(data, th, th0)
A

array([[ 1.,  1.,  1., -1., -1.]])

In [125]:
A = (positive(data, th, th0) == labels)
A = np.equal(positive(data, th, th0), labels)
A

array([[False, False,  True, False, False]])

# Score
## data is dimension 2 by 5
## labels is dimension 1 by 5
## ths is dimension 2 by 1
## th0s is dimension 1 by 1
## return 1 by 1 matrix of integer indicating number of data points correct for
## each separator.

In [128]:
def score(data, labels, th, th0):
    return np.sum(np.equal(positive(data, th, th0), labels))

In [129]:
score(data, labels, th, th0)

1

In [72]:
np.argmax(np.array([1,2,6]))

2

# Best separator in m separators set
## data is dimension d by n
## labels is dimension 1 by n
## ths is dimension d by m
## th0s is dimension 1 by m
## return a tuple of dx1 array and an offset in th form of 1 by 1 array m

# Method 1- Not efficient

In [130]:
def best_separator(data, labels, ths, thos):
    nbr_of_separator = ths.shape[1]
    scores = []
    for index_sep in range(nbr_of_separator):
        #score(data, labels, ths[:,index_sep], th0s[:,index_sep])
        scores.append(score(data, labels, ths[:,index_sep], th0s[:,index_sep]))
    argmax_score = np.argmax(np.array(scores))
    best_th = tuple(ths[:,argmax_score])
    best_th0 = th0s[:, argmax_score]
    return best_th, best_th0


In [141]:
ths = np.array([[1, -1], [-1, 3], [4, -2], [5, -3]]).T
th0s = rv([-2, 3, -5, -1])
labels = rv([-1, -1, +1, +1, +1])
ths
th0s

array([[-2,  3, -5, -1]])

In [132]:
best_separator(data, labels, ths, th0s)

((4, -2), array([-5]))

In [109]:
x = np.array([[1, 6], [2, 6], [3, 6]])
b = np.array([[1], [2], [3]])

# Method 2 - more efficient

In [114]:
x

array([[1, 6],
       [2, 6],
       [3, 6]])

In [111]:
b

array([[1],
       [2],
       [3]])

In [112]:
x + b 

array([[2, 7],
       [4, 8],
       [6, 9]])

In [142]:
def score_mat(data, labels, ths, th0s):
    pos = np.sign(np.dot(np.transpose(ths), data) + np.transpose(th0s))
    return np.sum(pos == labels, axis=1, keepdims=True)
    

In [143]:
score_mat(data, labels, ths, th0s)

array([[3],
       [1],
       [5],
       [5]])

In [146]:
def best_score(data, labels, ths, th0s):
    best_index = np.argmax(score_mat(data, labels, ths, th0s))
    return cv(ths[:, best_index]), th0s[:, best_index] # th0s[:, best_index: :best_index+1]


In [147]:
best_score(data, labels, ths, th0s)

(array([[ 4],
        [-2]]),
 array([-5]))