In [1]:
import numpy as np

In [2]:
data_path = "vertebral+column\column_3C.dat"

In [31]:
data = np.genfromtxt(fname=data_path, delimiter=None, dtype=str, encoding='utf-8')
data

X = data[:, :-1].astype(float) 
y = data[:, -1].reshape(-1, 1)


array(['DH'], dtype='<U6')

In [49]:
a = np.array([3, 1.2, 5, 5.1, 9])
b = np.array([4, 1.0, 2, 2.2, 3])
np.sum(abs(a - b)**0.2)

np.float64(5.638792751791597)

In [121]:
A = np.array(
    [[1, 2, 3, 4, 5],
    [1, 2, 3, 5, 5],
    [1, 2, 3, 4, 5]]
)
np.std(A, axis=0)

array([0.        , 0.        , 0.        , 0.47140452, 0.        ])

In [68]:
np.argmin(np.sort(np.array([1, 2, 3, 0, 4, 5])))

np.int64(0)

In [None]:
class KNN:
    def __init__(self, k=1, m=2):
        self.X_train = None
        self.y_tain = None
        self.k = k
        self.m = m

    def fit(self, X, y):
        self.inst = X
        self.y_tain = y

    def predict(self, X):

        n_test = len(X)
        n_train = len(self.X_train)
        y_pred = []

        for i in range(n_test):

            dist = []

            for j in range(n_train):

                diff = np.abs(X[i, :] - self.inst[j , :])
                dist.append(np.sum(diff ** (self.m)))

            dist = np.array(dist)
            idx = np.argsort(dist)[:self.k]
            neighbors, count = np.unique(self.y_tain[idx], return_counts=True)
            y_pred.append(neighbors[np.argmax(count)])

        return np.array(y_pred).reshape(-1, 1)
        

In [108]:
class DMC:
    def __init__(self):
        self.centroides = None
        self.classes = None

    def fit(self, X, y):

        self.classes = np.sort(np.unique(y))
        self.centroides = []

        for c in self.classes:

            mask = (y == c)
            self.centroides.append(X[mask].mean(axis=0))

        self.centroides = np.array(self.centroides)

    def predict(self, X):

        n_test = len(X)
        y_pred = []

        for i in range(n_test):

            dist = []

            for c in self.centroides:

                diff = np.abs(c - X[i, :])
                dist.append(np.sum(diff ** 2))

            dist = np.array(dist)
            y_pred.append(self.classes[np.argmin(dist)])

        return np.array(y_pred).reshape(-1, 1)

In [None]:
class DMCR:
    def __init__(self):
        self.centroides = None
        self.classes = None

    def fit(self, X, y):

        self.classes = np.sort(np.unique(y))
        self.centroides = []

        for c in self.classes:

            mask = (y == c)
            self.centroides.append(np.median(X[mask], axis=0))

        self.centroides = np.array(self.centroides)

    def predict(self, X):

        n_test = len(X)
        y_pred = []

        for i in range(n_test):

            dist = []

            for c in self.centroides:

                diff = np.abs(c - X[i, :])
                dist.append(np.sum(diff))

            dist = np.array(dist)
            y_pred.append(self.classes[np.argmin(dist)])

        return np.array(y_pred).reshape(-1, 1)

In [None]:
class MaxCorr:
    def __init__(self):
        self.centroides = None
        self.classes = None

    def fit(self, X, y):

        self.classes = np.sort(np.unique(y))
        self.centroides = []

        for c in self.classes:

            mask = (y == c)
            self.centroides.append(X[mask].mean(axis=0))

        self.centroides = np.array(self.centroides)

    def predict(self, X):

        n_test = len(X)
        y_pred = []

        for i in range(n_test):

            corr_array = []

            for c in self.centroides:

                corr = np.dot(c, X[i, :])
                corr_array.append(corr)

            corr_array = np.array(corr_array)
            y_pred.append(self.classes[np.argmax(corr_array)])

        return np.array(y_pred).reshape(-1, 1)

(array([[1],
        [2],
        [3],
        [1],
        [2],
        [4]]),
 array([1.06718737]),
 np.float64(1.0671873729054748))

In [134]:
class StandardScaler:
    def __init__(self):
        self.X_mu = None
        self.X_sigma = None
        self.y_mu = None
        self.y_sigma = None

    def fit(self, X, y=None):
        self.X_mu = np.mean(X, axis=0)
        self.X_sigma = np.std(X, axis=0)
        self.X_sigma = np.where(self.X_sigma == 0, 1.0, self.X_sigma)

        if y is not None:
            self.y_mu = np.mean(y, axis=0)
            self.y_sigma = np.std(y, axis=0)

    def transform(self, X, y=None):
        
        X_scaled = (X - self.X_mu) / self.X_sigma

        if y is not None:
            y_scaled = (y - self.y_mu) / self.y_sigma

        if y is None:
            return X_scaled
        else:
            return X_scaled, y_scaled