In [1]:
import sys
import os
import random
import csv
from typing import List

module_path = os.path.abspath(os.path.join("..", ".."))
sys.path.append(module_path)

import lvq
import data_util


In [2]:
def long_list_print(long_list: List, force_padding: int = None):
    """Helper function for printing long lists."""
    long_list = [
        round(element, 3) if isinstance(element, float) else element
        for element in long_list
    ]
    if len(long_list) > 6:
        long_list = [*long_list[:3], "...", *long_list[-3:]]

    long_list = list(map(str, long_list))

    if force_padding:
        padding = force_padding
    else:
        padding = max(len(element) for element in long_list)

    long_list = ", ".join(f"{element:>{padding}}" for element in long_list)

    print(f"[{long_list}]")


def print_codebook(codebook: List[List[float]]):
    """Helper function for printing lvq codebooks."""

    padding = None
    for row in codebook:
        max_element_length = max(len(str(element)) for element in row)

        if padding is None or padding < max_element_length:
            padding = max_element_length

    for row in codebook:
        long_list_print(row, force_padding=max_element_length)


# Load data

In [3]:
with open("ionosphere.csv", "rt") as f:
    dataset = [
        [float(value) for value in features] + [label]
        for *features, label in [line for line in csv.reader(f) if line]
    ]

mapping, encoded = data_util.encode_labels(dataset)

print("Labels mapping:")
for label in mapping:
    print(label, mapping[label])


Labels mapping:
b 0
g 1


# Initialize model

In [4]:
model_config = dict(
    codebook_size=10,
    features_count=34,  # Unused if codebook_init_method == "sample"
    labels_count=2,
    codebook_init_method="sample",
    codebook_init_dataset=encoded,  # Needed only in case codebook_init_method == "sample"
)

model = lvq.LVQ(**model_config)

random.seed(0)

sample = random.choice(encoded)
*features, label = sample

print("Random sample:")
long_list_print(sample)
print("Prediction:", model.predict(features))

print("\nInitialized codebook:")
print_codebook(model.codebook)


Random sample:
[   1.0,    0.0,    1.0,    ...,  0.965, -0.117,      1]
Prediction: 1

Initialized codebook:
[     1.0,      0.0,      0.0,      ...,      0.0,      0.0,        0]
[     1.0,      0.0,    0.847,      ...,    0.858,   -0.062,        1]
[     1.0,      0.0,      0.0,      ...,      1.0,      1.0,        0]
[     1.0,      0.0,    0.906,      ...,    0.874,   -0.162,        1]
[     0.0,      0.0,      0.0,      ...,      0.0,      0.0,        0]
[     1.0,      0.0,    0.947,      ...,    0.927,   -0.006,        1]
[     1.0,      0.0,      0.0,      ...,      0.0,      0.0,        0]
[     1.0,      0.0,    0.951,      ...,    0.915,    0.047,        1]
[     1.0,      0.0,      0.0,      ...,    0.907,   -0.096,        0]
[     1.0,      0.0,    0.835,      ...,    0.905,   -0.043,        1]


# Train model

In [5]:
train_config = dict(
    base_learning_rate=0.1,
    learning_rate_decay="linear",
    epochs=10,
)

random.seed(0)

model.train_codebook(train_vectors=encoded, **train_config)

print("Random sample:")
long_list_print(sample)
print("Prediction:", model.predict(features))


Training: 100% |████████████████████████████████████████████████████████| 10/10, acc=0.895, sse=54.6

Random sample:
[   1.0,    0.0,    1.0,    ...,  0.965, -0.117,      1]
Prediction: 1





# Cross validation

In [6]:
random.seed(0)

scores = lvq.cross_validate(
    encoded,
    fold_count=3,
    **model_config,
    **train_config,
)

print("Validation accuracies:")
for score in scores:
    print(round(score, 3))


Training: 100% |████████████████████████████████████████████████████████| 10/10, acc=0.859, sse=43.2
Training: 100% |████████████████████████████████████████████████████████| 10/10, acc=0.893, sse=39.3
Training: 100% |████████████████████████████████████████████████████████| 10/10, acc=0.906, sse=39.3

Validation accuracies:
0.915
0.855
0.838



