In [1]:
import numpy as np
from numpy.linalg import inv
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [2]:
data_set = datasets.load_iris()
X_data = data_set.data
Y_data = data_set.target

sub_data = np.where((Y_data[:,] == 1) | (Y_data[:,] == 2))[0]
X = X_data[sub_data]
Y = Y_data[sub_data]

In [3]:
class AdelineClassifier:
    def __init__(self):
        """
        Initialize the AdelineClassifier.
        """
        pass
    
    def fit(self, X_train, y_train):
        """
        Fit the model to the training data.

        Args:
        X_train (numpy.ndarray): Input features for training.
        y_train (numpy.ndarray): Output labels for training.
        """
        # w = (X.T * X)^-1 * (X.T * Y)
        self.w = np.matmul(inv(np.matmul(X_train.T, X_train)), np.matmul(X_train.T, y_train))
        
    def predict(self, X_test):
        """
        Predict the output labels for test data.

        Args:
        X_test (numpy.ndarray): Input features for testing.

        Returns:
        numpy.ndarray: Predicted output labels for the test data.
        """
        y_pred = np.matmul(X_test, self.w)
        y_pred = np.round(y_pred, 1)
        
        return y_pred
    
    def evaluate(self, X_test, y_test):
        """
        Evaluate the performance of the model on the test data.

        Args:
        X_test (numpy.ndarray): Input features for testing.
        y_test (numpy.ndarray): True output labels for testing.

        Returns:
        float: Accuracy of the model on the test data.
        """
        y_pred = np.matmul(X_test, self.w)
        y_pred = np.round(y_pred, 1)
        
        if np.min(y_test) == 0 and np.max(y_test) == 1:
            y_pred[y_pred > 0.5] = 1
            y_pred[y_pred <= 0.5] = 0
            
        if np.min(y_test) == 0 and np.max(y_test) == 2:
            y_pred[y_pred > 1] = 2
            y_pred[y_pred <= 1] = 0

        if np.min(y_test) == 1 and np.max(y_test) == 2:
            y_pred[y_pred > 1.5] = 2
            y_pred[y_pred <= 1.5] = 1
            
        accuracy = np.count_nonzero(y_pred == y_test) / len(y_test) * 100
            
        return accuracy


In [4]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

In [None]:
model = AdelineClassifier()
model.fit(X_train, Y_train)
print(model.w)

pred = model.predict(X_test)
print('pred', pred)
print('true', Y_test)


In [None]:
accuracy = model.evaluate(X_test, Y_test)
print('accuracy', accuracy)

### KNN

In [7]:
class KNearestNeighbor:
    def __init__(self, k):
        """
        Initialize the KNearestNeighbor classifier.

        Args:
        k (int): Number of nearest neighbors to consider.
        """
        self.k = k
    
    def fit(self, X_train, y_train):
        """
        Fit the model to the training data.

        Args:
        X_train (numpy.ndarray): Input features for training.
        y_train (numpy.ndarray): Output labels for training.
        """
        self.X_train = X_train
        self.y_train = y_train
        self.number_classes = len(np.unique(y_train))
    
    def nearNeighbors(self, x_test):
        """
        Find the indices of the nearest neighbors for a given test instance.

        Args:
        x_test (numpy.ndarray): Input features for a single test instance.

        Returns:
        numpy.ndarray: Indices of the nearest neighbors.
        """
        distance = np.sqrt(np.sum((x_test - self.X_train) ** 2, axis=1))
        near_neighbors = np.argsort(distance)[:self.k]
        
        return near_neighbors
    
    def predict(self, x_test):
        """
        Predict the label for a single test instance.

        Args:
        x_test (numpy.ndarray): Input features for a single test instance.

        Returns:
        int: Predicted label for the test instance.
        """
        near_neighbors = self.nearNeighbors(x_test)
        predict_label = np.argmax(np.bincount(self.y_train[near_neighbors]))
            
        return predict_label
    
    def evaluate(self, X_test, y_test):
        """
        Evaluate the performance of the model on the test data.

        Args:
        X_test (numpy.ndarray): Input features for testing.
        y_test (numpy.ndarray): True output labels for testing.

        Returns:
        float: Accuracy of the model on the test data.
        """
        y_pred = []     
        
        for i in range(len(X_test)):
            y_pred.append(self.predict(X_test[i]))
            
        true_label = np.count_nonzero(y_pred == y_test)
        accuracy = (true_label / len(y_pred)) * 100
        
        return accuracy


In [None]:
knn = KNearestNeighbore(5)
knn.fit(X_train, Y_train)

accuracy = knn.evaluate(X_test, Y_test)

print('accuracy', accuracy)