# Reconhecimento de Padrões (TIP8311) - Trabalho 2


**Professor:** Guilherme de Alencar Barreto  

<img src="https://loop.frontiersin.org/images/profile/243428/203" alt="Foto do Professor" width="150"/>


**Aluno:** Luis Felipe Carneiro de Souza    **Matrícula:** 535049

In [12]:
import numpy as np
from tqdm import tqdm

In [2]:
data_path = "vertebral+column\column_3C.dat"

In [3]:
data = np.genfromtxt(fname=data_path, delimiter=None, dtype=str, encoding='utf-8')
data

X = data[:, :-1].astype(float) 
y = data[:, -1].reshape(-1, 1)


In [17]:
class KNN:
    def __init__(self, k=1, m=2):
        self.X_train = None
        self.y_train = None
        self.k = k
        self.m = m

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):

        n_test = len(X)
        n_train = len(self.X_train)
        y_pred = []

        for i in range(n_test):

            dist = []

            for j in range(n_train):

                diff = np.abs(X[i, :] - self.X_train[j , :])
                dist.append(np.sum(diff ** (self.m)))

            dist = np.array(dist)
            idx = np.argsort(dist)[:self.k]
            neighbors, count = np.unique(self.y_train[idx], return_counts=True)
            y_pred.append(neighbors[np.argmax(count)])

        return np.array(y_pred).reshape(-1, 1)
        

In [5]:
class DMC:
    def __init__(self):
        self.centroides = None
        self.classes = None

    def fit(self, X, y):

        self.classes = np.sort(np.unique(y))
        self.centroides = []

        for c in self.classes:

            mask = (y == c)
            self.centroides.append(X[mask].mean(axis=0))

        self.centroides = np.array(self.centroides)

    def predict(self, X):

        n_test = len(X)
        y_pred = []

        for i in range(n_test):

            dist = []

            for c in self.centroides:

                diff = np.abs(c - X[i, :])
                dist.append(np.sum(diff ** 2))

            dist = np.array(dist)
            y_pred.append(self.classes[np.argmin(dist)])

        return np.array(y_pred).reshape(-1, 1)

In [6]:
class DMCR:
    def __init__(self):
        self.centroides = None
        self.classes = None

    def fit(self, X, y):

        self.classes = np.sort(np.unique(y))
        self.centroides = []

        for c in self.classes:

            mask = (y == c)
            self.centroides.append(np.median(X[mask], axis=0))

        self.centroides = np.array(self.centroides)

    def predict(self, X):

        n_test = len(X)
        y_pred = []

        for i in range(n_test):

            dist = []

            for c in self.centroides:

                diff = np.abs(c - X[i, :])
                dist.append(np.sum(diff))

            dist = np.array(dist)
            y_pred.append(self.classes[np.argmin(dist)])

        return np.array(y_pred).reshape(-1, 1)

In [7]:
class MaxCorr:
    def __init__(self):
        self.centroides = None
        self.classes = None

    def fit(self, X, y):

        self.classes = np.sort(np.unique(y))
        self.centroides = []

        for c in self.classes:

            mask = (y == c)
            self.centroides.append(X[mask].mean(axis=0))

        self.centroides = np.array(self.centroides)

    def predict(self, X):

        n_test = len(X)
        y_pred = []

        for i in range(n_test):

            corr_array = []

            for c in self.centroides:

                corr = np.dot(c, X[i, :])
                corr_array.append(corr)

            corr_array = np.array(corr_array)
            y_pred.append(self.classes[np.argmax(corr_array)])

        return np.array(y_pred).reshape(-1, 1)

In [8]:
class StandardScaler:
    def __init__(self):
        self.X_mu = None
        self.X_sigma = None
        self.y_mu = None
        self.y_sigma = None

    def fit(self, X, y=None):
        self.X_mu = np.mean(X, axis=0)
        self.X_sigma = np.std(X, axis=0)
        self.X_sigma = np.where(self.X_sigma == 0, 1.0, self.X_sigma)

        if y is not None:
            self.y_mu = np.mean(y, axis=0)
            self.y_sigma = np.std(y, axis=0)

    def transform(self, X, y=None):
        
        X_scaled = (X - self.X_mu) / self.X_sigma

        if y is not None:
            y_scaled = (y - self.y_mu) / self.y_sigma

        if y is None:
            return X_scaled
        else:
            return X_scaled, y_scaled

### Classificador Vizinho Mais Próximo (distância de Minkowski de ordens m ∈ {0,5; 2/3; 1; 3/2; 2; 5/2})

In [9]:
sep = {'20/80': {'train': 0.2, 'test': 0.8},
       '30/70': {'train': 0.3, 'test': 0.7},
       '50/50': {'train': 0.5, 'test': 0.5},
       '70/30': {'train': 0.7, 'test': 0.3},
       '80/20': {'train': 0.8, 'test': 0.2}}

In [20]:
resultados_knn = {}

for s in sep.keys():
    for m in [0.5, 2/3, 1, 3/2, 2, 5/2]:
        for _ in tqdm(range(100), desc=f"Rodada sep: {s}, m: {m:.2f}"):

            idx = np.random.permutation(X.shape[0])
            X, y = X[idx], y[idx]

            split = int(0.7 * len(X))
            X_train, X_test = X[:split], X[split:]
            y_train, y_test = y[:split], y[split:]

            scaler = StandardScaler()
            scaler.fit(X_train)
            X_train_scaled = scaler.transform(X_train)

            model = KNN(k=1, m=m)
            model.fit(X=X_train_scaled, y=y_train)

            scaler_test = StandardScaler()
            scaler_test.fit(X_test)
            X_test_scaled = scaler_test.transform(X_test)

            y_pred = model.predict(X_test_scaled)            

Rodada sep: 20/80, m: 0.50: 100%|██████████| 100/100 [00:19<00:00,  5.13it/s]
Rodada sep: 20/80, m: 0.67: 100%|██████████| 100/100 [00:22<00:00,  4.38it/s]
Rodada sep: 20/80, m: 1.00: 100%|██████████| 100/100 [00:27<00:00,  3.65it/s]
Rodada sep: 20/80, m: 1.50: 100%|██████████| 100/100 [00:20<00:00,  4.86it/s]
Rodada sep: 20/80, m: 2.00: 100%|██████████| 100/100 [00:19<00:00,  5.10it/s]
Rodada sep: 20/80, m: 2.50: 100%|██████████| 100/100 [00:27<00:00,  3.61it/s]
Rodada sep: 30/70, m: 0.50: 100%|██████████| 100/100 [01:01<00:00,  1.63it/s]
Rodada sep: 30/70, m: 0.67: 100%|██████████| 100/100 [00:40<00:00,  2.44it/s]
Rodada sep: 30/70, m: 1.00: 100%|██████████| 100/100 [00:47<00:00,  2.12it/s]
Rodada sep: 30/70, m: 1.50:  57%|█████▋    | 57/100 [00:41<00:31,  1.37it/s]


KeyboardInterrupt: 