In [66]:
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt

# Loading the data

In [67]:
data = pd.read_csv('dataset/iris.data', header=None)
data.columns = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]
data.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [68]:
data['class'] = data['class'].map({'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2})
data.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,class
145,6.7,3.0,5.2,2.3,2
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,2
148,6.2,3.4,5.4,2.3,2
149,5.9,3.0,5.1,1.8,2


In [69]:
data_np = data.to_numpy()
training, test = data_np[:30, :], data_np[30:50, :]
training, test = np.append(training, data_np[50:80, :], axis=0), np.append(test, data_np[80:100, :], axis=0)
training, test = np.append(training, data_np[100:130, :], axis=0), np.append(test, data_np[130:, :], axis=0)
training.shape, test.shape

((90, 5), (60, 5))

In [70]:
X_train, y_train = training[:, :-1], training[:, -1]
X_test, y_test = test[:, :-1], test[:, -1]
X_train[:3], y_train[:3], X_test[:3], y_test[:3]

(array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2]]),
 array([0., 0., 0.]),
 array([[4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1]]),
 array([0., 0., 0.]))

Add a column of one to the dataset

In [71]:
X_train = np.c_[np.ones(X_train.shape[0]), X_train]
X_test = np.c_[np.ones(X_test.shape[0]), X_test]
X_train[:3], X_test[:3]

(array([[1. , 5.1, 3.5, 1.4, 0.2],
        [1. , 4.9, 3. , 1.4, 0.2],
        [1. , 4.7, 3.2, 1.3, 0.2]]),
 array([[1. , 4.8, 3.1, 1.6, 0.2],
        [1. , 5.4, 3.4, 1.5, 0.4],
        [1. , 5.2, 4.1, 1.5, 0.1]]))

# MSE
Mean square function for (19) in compendium
$$
    MSE = \frac{1}{2} \sum_{k = 1}^N (g_k - t_k)^T (g_k - t_k)
$$

In [72]:
def mse(g_k, t_k):
    """
    Mean squared error
    :param g_k: predicted value
    :param t_k: target value
    :return: mean squared error
    """
    return np.matmul((g_k - t_k).T, (g_k - t_k)) / 2

The gradient is given as
$$
\nabla_{g_k} MSE = g_k - t_k
$$

In equation (22) from the compendium, following gradient for MSE is given with respect to weights
$$
\nabla_W MSE = \sum_{k=1}^N  [(g_k - t_k) \cdot g_k \cdot (1 - g_k)] x_k^T
$$

where $\cdot$ is elementwise multiplication 

In [95]:
def mse_grad_w(g_k, t_k, x_k):
    """
    Calculate the gradient of MSE
    :param g_k: predicted value
    :param t_k: target value
    :param x_k: input data
    :return: gradient of MSE
    """
    elementwise = np.multiply(
            g_k - t_k, 
            np.multiply(g_k, (1 - g_k))
        )
    return np.multiply(
        elementwise,
        x_k.T
    )

# Sigmoid function
The sigmoid function is used as an activation function
$$
    g_k = \frac{1}{1+e^{-Wx_k}}
$$

In [74]:
def sigmoid(weights, x_k):
    """
    sigmoid function
    :param weights: weights
    :param x_k: input data
    :return: sigmoid value
    """
    return 1 / (1 + np.exp(-np.matmul( weights, x_k)))

For training the weights can be updated using
$$
W(m) = W(m - 1) - \alpha \nabla_W MSE
$$

In [91]:
class LinearClassifier:
    def __init__(self, weight_dimention, learning_rate=0.01, n_iters=1000):
        self.learning_rate = learning_rate
        self.n_iters = n_iters
        self.weights = np.random.rand(1, weight_dimention)
        self.historic_mse = []

    def train(self, X, y):
        for _ in range(self.n_iters):
            grad_mse_value = 0
            mse_value = 0

            for i in range(X.shape[0]):
                g_k = sigmoid(self.weights, X[i].T)
                print(g_k-y[i], X[i].T)
                mse_value += mse(g_k, y[i])
                grad_mse_value += mse_grad_w(g_k, y[i], X[i].T)
            
            self.historic_mse.append(mse_value)
            self.weights -= self.learning_rate * grad_mse_value

In [96]:
classifier = LinearClassifier(X_train.shape[1])
classifier.train(X_train, y_train)

[0.85560428] [1.  5.1 3.5 1.4 0.2]
grad [0.10570614] [1.  5.1 3.5 1.4 0.2]


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 1)