In [79]:
from dataset import Dataset

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

from typing import Union

In [80]:
# Conjunto de dados Iris ================================================================================================================
wine_dataset = Dataset.from_file( 
    filepath = r"datasets\wine.data", 
    label_column = 0, 
    column_names = ["class", "Alcohol", "Malicacid", "Ash", "Alcalinity of ash", "Magnesium", "Total phenols", "Flavanoids", "Nonflavanoid phenols","Proanthocyanins", "Color intensity", "Hue", "0D280 0D315 of diluted wines", "Proline"]
).normalize().move_label_to_end()

wine_dataset

Dataset(instâncias=178, features=13, classes=3)

In [81]:
wine_dataset.data

Unnamed: 0,Alcohol,Malicacid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,0D280 0D315 of diluted wines,Proline,class
0,0.684211,-0.616601,0.144385,-0.484536,0.239130,0.255172,0.147679,-0.433962,0.186120,-0.255973,-0.089431,0.941392,0.122682,1.0
1,0.142105,-0.588933,-0.165775,-0.938144,-0.347826,0.151724,0.021097,-0.509434,-0.451104,-0.470990,-0.073171,0.560440,0.101284,1.0
2,0.121053,-0.359684,0.401070,-0.175258,-0.326087,0.255172,0.223629,-0.358491,0.514196,-0.249147,-0.105691,0.391941,0.293866,1.0
3,0.757895,-0.521739,0.219251,-0.360825,-0.065217,0.979310,0.329114,-0.584906,0.116719,0.112628,-0.382114,0.597070,0.714693,1.0
4,0.163158,-0.268775,0.614973,0.072165,0.043478,0.255172,-0.008439,-0.018868,-0.110410,-0.481229,-0.089431,0.216117,-0.348074,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,0.410526,0.940711,0.165775,0.020619,-0.456522,-0.517241,-0.886076,0.471698,-0.589905,0.095563,-0.739837,-0.655678,-0.340942,3.0
174,0.247368,0.252964,0.197861,0.278351,-0.304348,-0.434483,-0.827004,0.132075,-0.369085,0.027304,-0.642276,-0.787546,-0.326676,3.0
175,0.178947,0.399209,-0.037433,-0.030928,0.086957,-0.579310,-0.852321,0.132075,-0.406940,0.522184,-0.821138,-0.787546,-0.205421,3.0
176,0.126316,-0.268775,0.080214,-0.030928,0.086957,-0.537931,-0.856540,0.509434,-0.337539,0.368601,-0.804878,-0.743590,-0.198288,3.0


In [82]:
wine_dataset.determination_matrix()

Unnamed: 0,Alcohol,Malicacid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,0D280 0D315 of diluted wines,Proline,class
Alcohol,1.0,0.008911,0.044751,0.096246,0.073332,0.083579,0.056081,0.024314,0.018686,0.298514,0.005148,0.005234,0.414375,0.10773
Malicacid,0.008911,1.0,0.026911,0.083232,0.002978,0.112337,0.168926,0.085836,0.048729,0.061994,0.315053,0.135947,0.036868,0.191648
Ash,0.044751,0.026911,1.0,0.196574,0.082132,0.016636,0.013243,0.034682,9.3e-05,0.067023,0.005575,1.5e-05,0.050009,0.002464
Alcalinity of ash,0.096246,0.083232,0.196574,1.0,0.006944,0.103114,0.123461,0.130987,0.038938,0.000351,0.075051,0.076601,0.194126,0.268178
Magnesium,0.073332,0.002978,0.082132,0.006944,1.0,0.045968,0.038331,0.065687,0.055904,0.03998,0.003069,0.004357,0.154725,0.043756
Total phenols,0.083579,0.112337,0.016636,0.103114,0.045968,1.0,0.74747,0.202442,0.37505,0.00304,0.18808,0.489929,0.248118,0.517196
Flavanoids,0.056081,0.168926,0.013243,0.123461,0.038331,0.74747,1.0,0.289336,0.426007,0.029715,0.295369,0.619674,0.244227,0.718252
Nonflavanoid phenols,0.024314,0.085836,0.034682,0.130987,0.065687,0.202442,0.289336,1.0,0.133843,0.019337,0.06898,0.25328,0.096961,0.239228
Proanthocyanins,0.018686,0.048729,9.3e-05,0.038938,0.055904,0.37505,0.426007,0.133843,1.0,0.000638,0.087346,0.269431,0.109175,0.249131
Color intensity,0.298514,0.061994,0.067023,0.000351,0.03998,0.00304,0.029715,0.019337,0.000638,1.0,0.272289,0.183882,0.099919,0.070579


In [83]:
wine_dataset.vectorize_labels()

In [84]:
# Separa o conjunto de dados em treinamento e teste
train_dataset, test_dataset = wine_dataset.split()

In [85]:
def dist_euclidiana( P1 : Union[ np.ndarray, list[float] ], P2 : Union[ np.ndarray, list[float] ] ) -> float:
    distance = np.subtract( P1, P2 ) ** 2
    distance = np.sqrt( np.sum( distance ) )
    
    return distance

In [86]:
def KNN( train_dataset : Dataset, P : np.ndarray, k : int = 3 ) -> float:
    distances = []
    nearest_class = None

    # Percorre as instâncias de treinamento
    for index, *features, classe in train_dataset:
        # Armazena o indice do elemento de treinamento e a distância dele até o ponto classificado
        distances.append( (index, dist_euclidiana( features, P )) )
    
    # Ordena a lista em função da distância
    distances = sorted( distances, key = lambda el: el[1] )

    # Obtém a posição dos k elementos mais próximos
    positions = [el[0] for i, el in enumerate(distances) if i < k]

    # Obtém a classe dos k elementos mais próximos
    classes = train_dataset.y[ positions ].to_numpy()

    # Obtém a classe com a maior moda
    classes, moda = np.unique( classes, return_counts = True )
    nearest_class = classes[ np.argmax(moda) ]
    
    return nearest_class

In [87]:
def DMC( train_dataset : Dataset, P : np.ndarray ) -> float:
    nearest_class = None
    min_distance = float('inf')
    
    # Percorre os centroides
    for _, *features, classe in train_dataset.centroids:
        current_distance = dist_euclidiana( features, P )

        # Se a distância calculada for menor que menor distância atual, atualiza
        if current_distance < min_distance:
            min_distance = current_distance
            nearest_class = classe
    
    return nearest_class

In [88]:
class PerceptronSimples:
    def __init__( self, train_dataset : Dataset ):
        self.train_dataset = train_dataset

        # Dimensões do dataset
        self.m, self.n, self.k = self.train_dataset.shape

        # Qual o número de neurônios? É o mesmo que o número de classes
        self.q = self.k

        # Qual o número de entradas? É o número de atributos + 1
        self.p = self.n + 1

        # Inicializa os pesos
        self.W = np.random.normal( size = (self.q, self.p) )
    
    def train( self, max_epocas : int = 1000, *, eta : float = 0.1, reset_weights = False, verbose = True ):
        # Reseta os pesos
        if reset_weights:
            self.W = np.random.normal( size = (self.q, self.p) )

        # Percorre o total de épocas
        for epoca in range( max_epocas ):
            total_erros = 0

            # Para cada época, embaralha o conjunto de treinamento
            shuffled_dataset = self.train_dataset.shuffle()

            # percorre os exemplos de treinamento embaralhado
            for index, *features, y in shuffled_dataset:
                # Vetor de saída que corresponde à classe verdadeira
                real_output = self.train_dataset.encode_label( y )

                # Vetor de entrada, adiciona o "-1" do viés à lista de features do exemplo
                X_bias = np.r_[features, -1]   

                # Calcula a ativação dos neurônios: ordem de saída = (q x 1)
                activations = self.W @ X_bias

                # Calcula a saída da rede considerando a função signal
                output = np.where( activations >= 0, 1, -1 )

                # Calcula o vetor de erro
                err = real_output - output

                if np.any(err != 0):
                    total_erros += 1

                    # Atualiza os pesos usando o produto externo
                    self.W += eta * np.outer(err, X_bias)

            # Se acertou todo o conjunto de treinamento, para o treinamento
            if total_erros == 0:
                print(f"A rede acertou todo o conjunto treinamento em {epoca} épocas")
                break

            # Exibe uma mensagem de log a cada 5% do número de épocas totais
            elif ( epoca % (max_epocas//5) == 0 ) and verbose:
                print(f"Época {epoca}: erros: {total_erros}")
        
        # Entra quando não houve break, ou seja, ainda houve erros até na última época
        else:
            print(f"Treinamento encerrado com {total_erros} erros após {epoca} épocas.")
    
    def predict( self, features : np.ndarray ) -> Union[float, int]:
        # Vetor de entrada, adiciona o "-1" do viés à lista de features passada
        X_bias = np.r_[features, -1]

        # Calcula a ativação dos neurônios
        activations = self.W @ X_bias

        # Inicializa o vetor de saída com -1
        predicted_output = np.full_like(activations, -1)

        # Atribui +1 ao neurônio com maior ativação (resolve ativações múltiplas)
        predicted_output[ np.argmax(activations) ] = +1

        # Retorna a classe correspondente ao vetor predito pela rede
        return self.train_dataset.decode_vector( predicted_output )

In [89]:
PS = PerceptronSimples( train_dataset )
PS.train( 10000 )

Época 0: erros: 39
A rede acertou todo o conjunto treinamento em 11 épocas


In [90]:
for index, *point_test, classe in test_dataset:
    classe_prevista = PS.predict( point_test )
    print(f"{index}] Previu {classe_prevista} e era {classe} [{classe_prevista == classe}]")

train_dataset._centroids

0] Previu 1.0 e era 1.0 [True]
1] Previu 2.0 e era 2.0 [True]
2] Previu 2.0 e era 2.0 [True]
3] Previu 3.0 e era 3.0 [True]
4] Previu 2.0 e era 2.0 [True]
5] Previu 2.0 e era 2.0 [True]
6] Previu 1.0 e era 1.0 [True]
7] Previu 2.0 e era 2.0 [True]
8] Previu 1.0 e era 1.0 [True]
9] Previu 2.0 e era 2.0 [True]
10] Previu 2.0 e era 2.0 [True]
11] Previu 2.0 e era 2.0 [True]
12] Previu 3.0 e era 3.0 [True]
13] Previu 1.0 e era 1.0 [True]
14] Previu 3.0 e era 3.0 [True]
15] Previu 2.0 e era 2.0 [True]
16] Previu 2.0 e era 2.0 [True]
17] Previu 2.0 e era 2.0 [True]
18] Previu 1.0 e era 1.0 [True]
19] Previu 1.0 e era 1.0 [True]
20] Previu 1.0 e era 1.0 [True]
21] Previu 1.0 e era 1.0 [True]
22] Previu 2.0 e era 2.0 [True]
23] Previu 3.0 e era 3.0 [True]
24] Previu 1.0 e era 1.0 [True]
25] Previu 3.0 e era 3.0 [True]
26] Previu 3.0 e era 3.0 [True]
27] Previu 3.0 e era 3.0 [True]
28] Previu 1.0 e era 1.0 [True]
29] Previu 3.0 e era 3.0 [True]
30] Previu 2.0 e era 2.0 [True]
31] Previu 2.0 e e