# Лабораторная работа №2 "Многослойный персептрон"

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from torch import float64, Tensor, tensor
from torch.optim.adam import Optimizer, Adam
from typing import Iterable, Sequence
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn


class ConfusionMatrix:
    __true_positive: int
    __true_negative: int
    __false_positive: int
    __false_negative: int

    def __init__(self) -> None:
        self.__true_positive = 0
        self.__true_negative = 0
        self.__false_positive = 0
        self.__false_negative = 0

    @property
    def data(self) -> Sequence[Sequence[int]]:
        return [
            [self.__true_positive, self.__false_positive],
            [self.__false_negative, self.__true_negative],
        ]

    @property
    def accuracy(self) -> float:
        trues = self.__true_positive + self.__true_negative
        return (trues) / (trues + self.__false_positive + self.__false_negative)

    @property
    def precision(self) -> float:
        return self.__true_positive / (self.__true_positive + self.__false_positive)

    @property
    def recall(self) -> float:
        return self.__true_positive / (self.__true_positive + self.__false_negative)

    def accept[T](self, expected: T, actual: T) -> None:
        if expected == actual:
            if actual:
                self.__true_positive += 1
            else:
                self.__true_negative += 1
        else:
            if actual:
                self.__false_positive += 1
            else:
                self.__false_negative += 1

    def __format__(self, format_spec: str) -> str:
        if "n" in format_spec:
            format_spec = format_spec.replace("n", "")
            separator = "\n"
        else:
            separator = ", "

        accuracy = self.accuracy.__format__(format_spec)
        precision = self.precision.__format__(format_spec)
        recall = self.recall.__format__(format_spec)

        return f"Accuracy: {accuracy}{separator}Precision: {precision}{separator}Recall: {recall}"


def GradeClasifier() -> nn.Sequential:
    return nn.Sequential(
        nn.Linear(8, 30, dtype=float64),
        nn.ReLU(),
        nn.Linear(30, 1, dtype=float64),
        nn.Sigmoid(),
    )


def test(
    model: nn.Module,
    input: Sequence[Tensor],
    expected_output: Sequence[Tensor],
    *,
    loss_fn: nn.Module | None = None,
    optimizer: Optimizer | None = None,
) -> ConfusionMatrix:
    if len(input) != len(expected_output):
        raise ValueError("Input length must be equals to expected_output.")

    matrix = ConfusionMatrix()
    for x, y in zip(input, expected_output):
        y_pred = model(x)
        y_real = tensor([y.item()], dtype=float64)

        if loss_fn is not None:
            loss = loss_fn(y_pred, y_real)

        matrix.accept(y.item(), (y_pred >= 0.5).float())

        if optimizer is not None:
            optimizer.zero_grad()

        if loss_fn is not None:
            loss.backward()

        if optimizer is not None:
            optimizer.step()

    return matrix


def train(
    model: nn.Module,
    input: Sequence[Tensor],
    expected_output: Sequence[Tensor],
    epochs: int,
    *,
    learning_rate: float = 0.001,
) -> Iterable[ConfusionMatrix]:
    if len(input) != len(expected_output):
        raise ValueError("Input length must be equals to expected_output.")

    loss_fn = nn.BCELoss()
    optimizer = Adam(model.parameters(), learning_rate)

    model.train()
    try:
        for _ in range(epochs):
            yield test(
                model, input, expected_output, loss_fn=loss_fn, optimizer=optimizer
            )
    finally:
        model.eval()

## Подготовка данных

In [None]:
df = pd.read_csv('./data/mushroom_cleaned.csv.tar.gz')
df.head()

In [None]:
RANDOM_SEED = 42
TRAIN_PROPORTION = 2e-1

X_train, X_test, y_train, y_test = train_test_split(df.drop('class', axis=1), df['class'], test_size=TRAIN_PROPORTION, random_state=RANDOM_SEED)

In [None]:
columns = ["cap-diameter", "stem-height"]
scaler = preprocessing.MinMaxScaler()

X_train[columns] = scaler.fit_transform(X_train[columns])
X_test[columns] = scaler.transform(X_test[columns])

X_train, X_test, y_train, y_test = (
    tensor(X_train.values, dtype=float64),
    tensor(X_test.values, dtype=float64),
    tensor(y_train.values, dtype=float64),
    tensor(y_test.values, dtype=float64),
)

In [None]:
EPOCHS = 80

model = GradeClasifier()
matrixes = [*train(model, X_train, y_train, EPOCHS)]

epochs = [*range(1, len(matrixes) + 1)]
accuracies = [matrix.accuracy for matrix in matrixes]

plt.figure(figsize=(10, 6))
plt.plot(epochs, accuracies, color='b', label='Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid(True)
plt.show()

In [None]:
matrix = test(model, X_test, y_test)
labels = np.array(
    [["True Positive", "False Positive"], ["False Negative", "True Negative"]]
)

plt.figure(figsize=(8, 6))
sns.heatmap(
    matrix.data,
    annot=labels,
    fmt="",
    cmap="Blues",
    cbar=True,
    xticklabels=["Positive", "Negative"],
    yticklabels=["Actual Positive", "Actual Negative"],
)
plt.title("Confusion Matrix Heatmap")
plt.xlabel("Predicted")
plt.ylabel("Actual")

metrics_text = f"{matrix:n.2f}"
plt.text(
    1.5,
    -0.5,
    metrics_text,
    fontsize=12,
    bbox=dict(facecolor="white", alpha=0.5),
    ha="center",
)

plt.show()