# Código Learning Vector Quantization

## Imports

In [1]:
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
import random

## Funções de Suporte

In [2]:
def sample_prototypes(dataset, n_prototypes = 2):
    '''Choose a number of samples from the dataset to use as prototypes. Will keep at least one sample of every class.'''
    classes = {x[-1]: [a for a in dataset if a[-1] == x[-1]] for x in dataset}
    print(classes)
        
    if (n_prototypes < len(classes)):
        print("There aren't prototypes enough for all classes")
    elif (n_prototypes > len(dataset)):
        print("There aren't samples enough for this amount of prototypes")
    else:
        prototypes = [random.choice(classes[x]) for x in classes]
        if (n_prototypes > len(classes)):
            still_not_chosen = [data for data in dataset if data not in prototypes]
            prototypes.extend(random.sample(still_not_chosen, n_prototypes - len(classes)))
        return prototypes

sample_prototypes([[1, 0],[1, 1],[2, 2],[2, 0]], 4)

{0: [[1, 0], [2, 0]], 1: [[1, 1]], 2: [[2, 2]]}


[[1, 0], [1, 1], [2, 2], [2, 0]]

In [19]:
def calculate_test_results(real, predicted):
    matched = [a == b for (a, b) in zip(real, predicted)]
    recalls = {i: list(zip(real, matched)).count((i, True)) / real.count(i) for i in real}
    return {
        "precision": matched.count(True) / len(real),
        "recalls": recalls
    }

## Código do LVQ1

In [26]:
class LVQ1:
    def __init__(self, k = 1, n_prototypes = 2, alpha_0 = 0.8):
        self.knn = KNeighborsClassifier(n_neighbors = k)
        self.n_prototypes = n_prototypes
        self.alpha_0 = alpha_0
    
    def adjust_prototype(self, prototype_index, alpha, sample):
        prototype = self.samples[prototype_index]
        if (self.labels[prototype_index] == sample[-1]):
            self.samples[prototype_index] = prototype + (sample[0:-1] - prototype) * alpha
        else:
            self.samples[prototype_index] = prototype - (sample[0:-1] - prototype) * alpha
        print(prototype_index)
        print(self.samples[prototype_index])
    
    def train(self, training):
        prototypes = sample_prototypes(training, self.n_prototypes)
        print(prototypes)
        self.samples = [sample[0:-1] for sample in prototypes]
        self.labels = [sample[-1] for sample in prototypes]
        
        alpha_t = self.alpha_0
        for sample in training:
            self.knn.fit(self.samples, self.labels)
            (_, closest) = self.knn.kneighbors([sample[0:-1]])
            [self.adjust_prototype(prototype_index, alpha_t, sample) for prototype_index in closest[0]]
            alpha_t *= self.alpha_0
        
        self.knn.fit(self.samples, self.labels)
    
    def predict(self, sample):
        return self.knn.predict(sample)
    
    def test(self, testing):
        samples = [sample[0:-1] for sample in testing]
        labels = [sample[-1] for sample in testing]
        print(labels)
        predicted_labels = self.predict(samples)
        print(predicted_labels)
        result = calculate_test_results(labels, predicted_labels)
        return result

In [27]:
lvq1 = LVQ1(n_prototypes=3)
lvq1.train(np.array([[1,0,0], [1,1,1], [2,0,2], [2,1,0]]))
lvq1.test([[0, 1, 1], [1, 0.2, 0]])

{0: [array([1, 0, 0]), array([2, 1, 0])], 1: [array([1, 1, 1])], 2: [array([2, 0, 2])]}
[array([1, 0, 0]), array([1, 1, 1]), array([2, 0, 2])]
0
[1. 0.]
1
[1. 1.]
2
[2. 0.]
1
[0.5904 1.    ]
[1, 0]
[1 0]


{'precision': 1.0, 'recalls': {1: 1.0, 0: 1.0}}

## Código do LVQ2.1

In [4]:
class LVQ2:
    def __init__(self, k = 1, nPrototypes = 2):
        self.knn = NearestNeighbors(n_neighbors = k)
        self.nPrototypes = nPrototypes

## Código do LVQ3

In [5]:
class LVQ3:
    def __init__(self, k = 1, nPrototypes = 2):
        self.knn = NearestNeighbors(n_neighbors = k)
        self.nPrototypes = nPrototypes

# Avaliações

## Imports

In [28]:
%matplotlib inline

from scipy.io import arff
import pandas as pd
from time import process_time

import matplotlib
import matplotlib.pyplot as plt

## Base de Dados 1 - CM1

### Preparação

In [None]:
data = arff.loadarff('Datasets/cm1.arff')
dataFrame = pd.DataFrame(data[0])
dataFrame.defects = [1 if i == b'true' else 0 for i in dataFrame.defects]
rawData = dataFrame.values
rawData

## Base de Dados 2 - 