<a href="https://colab.research.google.com/github/DilemmaFixer3/AI_pr_5-6-7/blob/main/pr8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm # –î–ª—è –≤—ñ–¥–æ–±—Ä–∞–∂–µ–Ω–Ω—è –ø—Ä–æ–≥—Ä–µ—Å—É

# –í—Å—Ç–∞–Ω–æ–≤—ñ—Ç—å —Ñ—ñ–∫—Å–æ–≤–∞–Ω–µ –ø–æ—á–∞—Ç–∫–æ–≤–µ –∑–Ω–∞—á–µ–Ω–Ω—è –¥–ª—è –≤—ñ–¥—Ç–≤–æ—Ä—é–≤–∞–Ω–æ—Å—Ç—ñ
torch.manual_seed(42)
np.random.seed(42)

# --- 0. –ó–∞–≤–∞–Ω—Ç–∞–∂–µ–Ω–Ω—è —Ç–∞ –ø–æ–ø–µ—Ä–µ–¥–Ω—è –æ–±—Ä–æ–±–∫–∞ –¥–∞–Ω–∏—Ö ---

def load_and_preprocess_data(file_path, sample_frac=0.2):
    """
    –û–Ω–æ–≤–ª–µ–Ω–∞ —Ñ—É–Ω–∫—Ü—ñ—è: –∑–∞–≤–∞–Ω—Ç–∞–∂—É—î, –∑–º–µ–Ω—à—É—î, –≤–∏–¥–∞–ª—è—î —Ä—ñ–¥–∫—ñ—Å–Ω—ñ –∫–ª–∞—Å–∏
    —Ç–∞ —Ä–æ–∑–¥—ñ–ª—è—î –¥–∞–Ω—ñ –∑ –≤–∏–∫–æ—Ä–∏—Å—Ç–∞–Ω–Ω—è–º —Å—Ç—Ä–∞—Ç–∏—Ñ—ñ–∫–∞—Ü—ñ—ó.
    """
    try:
        df = pd.read_csv(file_path)
        # –ó–º–µ–Ω—à–µ–Ω–Ω—è –¥–∞—Ç–∞—Å–µ—Ç—É –¥–æ 20%
        df = df.sample(frac=sample_frac, random_state=42).reset_index(drop=True)
    except FileNotFoundError:
        print(f"–ü–æ–º–∏–ª–∫–∞: –§–∞–π–ª '{file_path}' –Ω–µ –∑–Ω–∞–π–¥–µ–Ω–æ.")
        return None, None, None, None, None

    df.columns = df.columns.str.replace('[^A-Za-z0-9_]+', '', regex=True)
    disease_col = next((col for col in df.columns if 'disease' in col.lower()), None)
    if not disease_col:
        print("–ü–æ–º–∏–ª–∫–∞: –ù–µ –≤–¥–∞–ª–æ—Å—è –∑–Ω–∞–π—Ç–∏ —Å—Ç–æ–≤–ø–µ—Ü—å –∑ –Ω–∞–∑–≤–æ—é —Ö–≤–æ—Ä–æ–±–∏ (–Ω–∞–ø—Ä–∏–∫–ª–∞–¥, 'Disease').")
        return None, None, None, None, None

    X = df.drop(columns=[disease_col])
    y_labels = df[disease_col]

    # --- –í–ò–ü–†–ê–í–õ–ï–ù–ù–Ø –ü–û–ú–ò–õ–ö–ò: –í–∏–¥–∞–ª–µ–Ω–Ω—è –∫–ª–∞—Å—ñ–≤ –∑ –æ–¥–Ω–∏–º –∑—Ä–∞–∑–∫–æ–º ---
    class_counts = y_labels.value_counts()
    rare_classes = class_counts[class_counts < 2].index.tolist()

    if rare_classes:
        print(f" –í–∏–¥–∞–ª—è—î–º–æ {len(rare_classes)} —Ä—ñ–¥–∫—ñ—Å–Ω–∏—Ö –∫–ª–∞—Å—ñ–≤ (—Ö–≤–æ—Ä–æ–±), —â–æ –º–∞—é—Ç—å < 2 –∑—Ä–∞–∑–∫—ñ–≤.")
        df_filtered = df[~df[disease_col].isin(rare_classes)]

        X = df_filtered.drop(columns=[disease_col])
        y_labels = df_filtered[disease_col]

        if X.empty:
            print("–ü–æ–º–∏–ª–∫–∞: –ü—ñ—Å–ª—è —Ñ—ñ–ª—å—Ç—Ä–∞—Ü—ñ—ó –Ω–µ –∑–∞–ª–∏—à–∏–ª–æ—Å—è –¥–∞–Ω–∏—Ö.")
            return None, None, None, None, None

    # –ü–µ—Ä–µ—Ç–≤–æ—Ä–µ–Ω–Ω—è –º—ñ—Ç–æ–∫ (—Ö–≤–æ—Ä–æ–±) –Ω–∞ —á–∏—Å–ª–æ–≤—ñ –∑–Ω–∞—á–µ–Ω–Ω—è
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y_labels)

    # –ü–µ—Ä–µ—Ç–≤–æ—Ä–µ–Ω–Ω—è –Ω–∞ —Ç–µ–Ω–∑–æ—Ä–∏
    X_tensor = torch.tensor(X.values, dtype=torch.float32)
    y_tensor = torch.tensor(y, dtype=torch.long)

    # –†–æ–∑–¥—ñ–ª–µ–Ω–Ω—è –Ω–∞ —Ç—Ä–µ–Ω—É–≤–∞–ª—å–Ω–∏–π —Ç–∞ —Ç–µ—Å—Ç–æ–≤–∏–π –Ω–∞–±–æ—Ä–∏ (—Å—Ç—Ä–∞—Ç–∏—Ñ—ñ–∫–∞—Ü—ñ—è —Ç–µ–ø–µ—Ä –º–æ–∂–ª–∏–≤–∞)
    X_train, X_test, y_train, y_test = train_test_split(
        X_tensor, y_tensor, test_size=0.2, random_state=42, stratify=y_tensor
    )

    print(f" –£—Å–ø—ñ—à–Ω–æ –∑–∞–≤–∞–Ω—Ç–∞–∂–µ–Ω–æ. –ö—ñ–ª—å–∫—ñ—Å—Ç—å –∫–ª–∞—Å—ñ–≤: {len(label_encoder.classes_)}")
    return X_train, X_test, y_train, y_test, len(label_encoder.classes_)

# –í–∫–∞–∂—ñ—Ç—å –ø—Ä–∞–≤–∏–ª—å–Ω–∏–π —à–ª—è—Ö –¥–æ —Ñ–∞–π–ª—É
FILE_PATH = 'Final_Augmented_dataset_Diseases_and_Symptoms.csv' # –ó–∞–º—ñ–Ω—ñ—Ç—å –Ω–∞ —ñ–º'—è –≤–∞—à–æ–≥–æ —Ñ–∞–π–ª—É
X_train, X_test, y_train, y_test, NUM_CLASSES = load_and_preprocess_data(FILE_PATH, sample_frac=0.2)

if X_train is None:
    exit()

INPUT_SIZE = X_train.shape[1]
BATCH_SIZE = 64
N_EPOCHS = 20 # –ó–º–µ–Ω—à–∏–º–æ –∫—ñ–ª—å–∫—ñ—Å—Ç—å –µ–ø–æ—Ö –¥–ª—è —à–≤–∏–¥—à–æ–≥–æ –≤–∏–∫–æ–Ω–∞–Ω–Ω—è

# –°—Ç–≤–æ—Ä–µ–Ω–Ω—è –æ–±'—î–∫—Ç—ñ–≤ Dataset —Ç–∞ DataLoader
class SymptomDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = SymptomDataset(X_train, y_train)
test_dataset = SymptomDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# --- 1. –í–∏–∑–Ω–∞—á–µ–Ω–Ω—è –º–æ–¥–µ–ª—ñ –Ω–µ–π—Ä–æ–Ω–Ω–æ—ó –º–µ—Ä–µ–∂—ñ ---

class SimpleClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.layer_stack = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.layer_stack(x)

# --- 2. –ê–¥–∞–ø—Ç–∞—Ü—ñ—è –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä—ñ–≤: –í–ª–∞—Å–Ω–∞ –≤–µ—Ä—Å—ñ—è Adam ---

class CustomAdam(optim.Optimizer):
    """
    –í–ª–∞—Å–Ω–∞ —Ä–µ–∞–ª—ñ–∑–∞—Ü—ñ—è –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä–∞ Adam –∑ —Ä–µ–≥—É–ª—å–æ–≤–∞–Ω–∏–º–∏ –ø–∞—Ä–∞–º–µ—Ç—Ä–∞–º–∏.
    –ù–∞—Å–ª—ñ–¥—É—î torch.optim.Optimizer.
    """
    def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, weight_decay=0):
        if not 0.0 <= lr:
            raise ValueError(f"–ù–µ–ø—Ä–∏–ø—É—Å—Ç–∏–º–µ –∑–Ω–∞—á–µ–Ω–Ω—è —à–≤–∏–¥–∫–æ—Å—Ç—ñ –Ω–∞–≤—á–∞–Ω–Ω—è: {lr}")
        if not 0.0 <= eps:
            raise ValueError(f"–ù–µ–ø—Ä–∏–ø—É—Å—Ç–∏–º–µ –∑–Ω–∞—á–µ–Ω–Ω—è epsilon: {eps}")
        if not 0.0 <= betas[0] < 1.0:
            raise ValueError(f"–ù–µ–ø—Ä–∏–ø—É—Å—Ç–∏–º–µ –∑–Ω–∞—á–µ–Ω–Ω—è beta1: {betas[0]}")
        if not 0.0 <= betas[1] < 1.0:
            raise ValueError(f"–ù–µ–ø—Ä–∏–ø—É—Å—Ç–∏–º–µ –∑–Ω–∞—á–µ–Ω–Ω—è beta2: {betas[1]}")
        if not 0.0 <= weight_decay:
            raise ValueError(f"–ù–µ–ø—Ä–∏–ø—É—Å—Ç–∏–º–µ –∑–Ω–∞—á–µ–Ω–Ω—è weight_decay: {weight_decay}")

        defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay)
        super().__init__(params, defaults)

    def step(self, closure=None):
        """–í–∏–∫–æ–Ω—É—î —î–¥–∏–Ω–∏–π –∫—Ä–æ–∫ –æ–ø—Ç–∏–º—ñ–∑–∞—Ü—ñ—ó."""
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                grad = p.grad.data
                if grad.is_sparse:
                    raise RuntimeError('Adam –Ω–µ –ø—ñ–¥—Ç—Ä–∏–º—É—î —Ä–æ–∑—Ä—ñ–¥–∂–µ–Ω—ñ –≥—Ä–∞–¥—ñ—î–Ω—Ç–∏')

                state = self.state[p]

                # –Ü–Ω—ñ—Ü—ñ–∞–ª—ñ–∑–∞—Ü—ñ—è —Å—Ç–∞–Ω—É (–º–æ–º–µ–Ω—Ç—ñ–≤)
                if not state:
                    state['step'] = 0
                    # –ï–∫—Å–ø–æ–Ω–µ–Ω—Ü—ñ–π–Ω–æ-–∑–≤–∞–∂–µ–Ω–µ —Å–µ—Ä–µ–¥–Ω—î –≥—Ä–∞–¥—ñ—î–Ω—Ç—ñ–≤
                    state['exp_avg'] = torch.zeros_like(p.data)
                    # –ï–∫—Å–ø–æ–Ω–µ–Ω—Ü—ñ–π–Ω–æ-–∑–≤–∞–∂–µ–Ω–µ —Å–µ—Ä–µ–¥–Ω—î –∫–≤–∞–¥—Ä–∞—Ç—ñ–≤ –≥—Ä–∞–¥—ñ—î–Ω—Ç—ñ–≤
                    state['exp_avg_sq'] = torch.zeros_like(p.data)

                exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
                beta1, beta2 = group['betas']

                state['step'] += 1
                bias_correction1 = 1 - beta1 ** state['step']
                bias_correction2 = 1 - beta2 ** state['step']

                # 1. Weight decay (L2 —Ä–µ–≥—É–ª—è—Ä–∏–∑–∞—Ü—ñ—è) - –∑–∞—Å—Ç–æ—Å–æ–≤—É—î—Ç—å—Å—è —è–∫ —É AdamW –¥–ª—è –∫—Ä–∞—â–æ—ó –ø—Ä–æ–¥—É–∫—Ç–∏–≤–Ω–æ—Å—Ç—ñ
                # –£ –∫–ª–∞—Å–∏—á–Ω–æ–º—É Adam –π–æ–≥–æ –∑–∞—Å—Ç–æ—Å–æ–≤—É—é—Ç—å –¥–æ –≥—Ä–∞–¥—ñ—î–Ω—Ç–∞, –∞–ª–µ —Ç—É—Ç —Ä–æ–±–∏–º–æ AdamW-–ø–æ–¥—ñ–±–Ω–∏–π –ø—ñ–¥—Ö—ñ–¥
                if group['weight_decay'] != 0:
                    grad = grad.add(p.data, alpha=group['weight_decay'])

                # 2. –û–Ω–æ–≤–ª–µ–Ω–Ω—è –ø–µ—Ä—à–æ–≥–æ –º–æ–º–µ–Ω—Ç—É (exp_avg)
                exp_avg.mul_(beta1).add_(grad, alpha=1 - beta1)

                # 3. –û–Ω–æ–≤–ª–µ–Ω–Ω—è –¥—Ä—É–≥–æ–≥–æ –º–æ–º–µ–Ω—Ç—É (exp_avg_sq)
                exp_avg_sq.mul_(beta2).addcmul_(grad, grad, value=1 - beta2)

                # 4. –ö–æ—Ä–µ–∫—Ü—ñ—è –∑–º—ñ—â–µ–Ω–Ω—è (Bias correction)
                denom = (exp_avg_sq.sqrt() / np.sqrt(bias_correction2)).add_(group['eps'])
                step_size = group['lr'] / bias_correction1

                # 5. –û–Ω–æ–≤–ª–µ–Ω–Ω—è –ø–∞—Ä–∞–º–µ—Ç—Ä—ñ–≤
                p.data.addcdiv_(exp_avg, denom, value=-step_size)

        return loss


# --- 3. –§—É–Ω–∫—Ü—ñ—ó –¥–ª—è —Ç—Ä–µ–Ω—É–≤–∞–Ω–Ω—è —Ç–∞ –æ—Ü—ñ–Ω–∫–∏ –º–æ–¥–µ–ª—ñ ---

def train_model(model, optimizer, criterion, train_loader, n_epochs):
    """–¢—Ä–µ–Ω—É—î –º–æ–¥–µ–ª—å —ñ –ø–æ–≤–µ—Ä—Ç–∞—î —Ç–æ—á–Ω—ñ—Å—Ç—å –Ω–∞ —Ç—Ä–µ–Ω—É–≤–∞–ª—å–Ω–∏—Ö –¥–∞–Ω–∏—Ö."""
    model.train()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted == labels).sum().item()

    avg_loss = total_loss / len(train_loader)
    accuracy = 100 * correct_predictions / total_samples
    return avg_loss, accuracy

def evaluate_model(model, test_loader, criterion):
    """–û—Ü—ñ–Ω—é—î –º–æ–¥–µ–ª—å –Ω–∞ —Ç–µ—Å—Ç–æ–≤–∏—Ö –¥–∞–Ω–∏—Ö."""
    model.eval()
    total_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_samples += labels.size(0)
            correct_predictions += (predicted == labels).sum().item()

    avg_loss = total_loss / len(test_loader)
    accuracy = 100 * correct_predictions / total_samples
    return avg_loss, accuracy

# --- 4. –ï–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç–∞–ª—å–Ω–∏–π —Ü–∏–∫–ª –¥–ª—è –≤–∏–∫–æ–Ω–∞–Ω–Ω—è –∑–∞–≤–¥–∞–Ω—å ---

def run_experiment(model_class, input_size, num_classes, train_loader, test_loader,
                   optimizer_config, n_epochs):
    """–ü—Ä–æ–≤–æ–¥–∏—Ç—å —Ç—Ä–µ–Ω—É–≤–∞–Ω–Ω—è –º–æ–¥–µ–ª—ñ –∑ –∑–∞–¥–∞–Ω–∏–º –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä–æ–º —ñ —à–≤–∏–¥–∫—ñ—Å—Ç—é –Ω–∞–≤—á–∞–Ω–Ω—è."""

    # –Ü–Ω—ñ—Ü—ñ–∞–ª—ñ–∑–∞—Ü—ñ—è –º–æ–¥–µ–ª—ñ —Ç–∞ –∫—Ä–∏—Ç–µ—Ä—ñ—é –≤—Ç—Ä–∞—Ç
    model = model_class(input_size, num_classes)
    criterion = nn.CrossEntropyLoss()

    # –Ü–Ω—ñ—Ü—ñ–∞–ª—ñ–∑–∞—Ü—ñ—è –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä–∞
    optim_name = optimizer_config['name']
    lr = optimizer_config['lr']

    if optim_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif optim_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optim_name == 'CustomAdam':
        optimizer = CustomAdam(model.parameters(), lr=lr)
    elif optim_name == 'NAdam':
        optimizer = optim.NAdam(model.parameters(), lr=lr)
    elif optim_name == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=lr)
    else:
        raise ValueError(f"–ù–µ–≤—ñ–¥–æ–º–∏–π –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: {optim_name}")

    # –¢—Ä–µ–Ω—É–≤–∞–Ω–Ω—è
    for epoch in range(n_epochs):
        train_loss, train_acc = train_model(model, optimizer, criterion, train_loader, 1)
        # print(f"Epoch {epoch+1}/{n_epochs} - Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%")

    # –û—Ü—ñ–Ω–∫–∞
    test_loss, test_acc = evaluate_model(model, test_loader, criterion)

    return train_acc, test_acc, test_loss


# --- –ì–æ–ª–æ–≤–Ω–∏–π –±–ª–æ–∫ –≤–∏–∫–æ–Ω–∞–Ω–Ω—è –∑–∞–≤–¥–∞–Ω—å ---
def main_lab_work():

    print(f"---  –†–æ–∑–º—ñ—Ä–Ω—ñ—Å—Ç—å –¥–∞–Ω–∏—Ö: {INPUT_SIZE} –æ–∑–Ω–∞–∫, {NUM_CLASSES} –∫–ª–∞—Å—ñ–≤ ---")
    print(f"---  –¢—Ä–µ–Ω—É–≤–∞–Ω–Ω—è –Ω–∞ {N_EPOCHS} –µ–ø–æ—Ö–∞—Ö ---")

    # 1. –î–æ—Å–ª—ñ–¥–∂–µ–Ω–Ω—è –≤–ø–ª–∏–≤—É —à–≤–∏–¥–∫–æ—Å—Ç—ñ –Ω–∞–≤—á–∞–Ω–Ω—è (Learning Rate)
    print("\n## 1. –î–æ—Å–ª—ñ–¥–∂–µ–Ω–Ω—è –≤–ø–ª–∏–≤—É —à–≤–∏–¥–∫–æ—Å—Ç—ñ –Ω–∞–≤—á–∞–Ω–Ω—è üß™")

    optimizers_to_test = ['SGD', 'Adam']
    learning_rates = [1e-4, 1e-3, 1e-2]

    results_lr = []

    for opt_name in optimizers_to_test:
        for lr in learning_rates:
            print(f"-> –û–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: {opt_name}, –®–≤–∏–¥–∫—ñ—Å—Ç—å –Ω–∞–≤—á–∞–Ω–Ω—è (LR): {lr}")
            config = {'name': opt_name, 'lr': lr}

            # –í–∏–∫–æ–Ω–∞–Ω–Ω—è –µ–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—É
            train_acc, test_acc, _ = run_experiment(
                SimpleClassifier, INPUT_SIZE, NUM_CLASSES,
                train_loader, test_loader, config, N_EPOCHS
            )

            results_lr.append({
                'Optimizer': opt_name,
                'LR': lr,
                'Train Accuracy': f"{train_acc:.2f}%",
                'Test Accuracy': f"{test_acc:.2f}%"
            })

    print(pd.DataFrame(results_lr).to_markdown(index=False))


    # 2. –ê–¥–∞–ø—Ç–∞—Ü—ñ—è –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä—ñ–≤: –í–ª–∞—Å–Ω–∞ –≤–µ—Ä—Å—ñ—è Adam
    # –í–∏–∫–æ—Ä–∏—Å—Ç–∞—î–º–æ CustomAdam –∑ –æ–ø—Ç–∏–º–∞–ª—å–Ω–∏–º LR, –∑–Ω–∞–π–¥–µ–Ω–∏–º —É –ø.1 (–∞–±–æ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω–∏–º 1e-3)
    print("\n## 2. –ê–¥–∞–ø—Ç–∞—Ü—ñ—è –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä—ñ–≤: –í–ª–∞—Å–Ω–∞ –≤–µ—Ä—Å—ñ—è Adam (CustomAdam) ")

    # –ü–æ—Ä—ñ–≤–Ω—è–Ω–Ω—è CustomAdam –∑ –≤–±—É–¥–æ–≤–∞–Ω–∏–º Adam
    best_lr_adam = 1e-3

    custom_adam_config = {'name': 'CustomAdam', 'lr': best_lr_adam}
    print(f"-> CustomAdam (LR: {best_lr_adam})")
    custom_train_acc, custom_test_acc, _ = run_experiment(
        SimpleClassifier, INPUT_SIZE, NUM_CLASSES,
        train_loader, test_loader, custom_adam_config, N_EPOCHS
    )

    adam_config = {'name': 'Adam', 'lr': best_lr_adam}
    print(f"-> –í–±—É–¥–æ–≤–∞–Ω–∏–π Adam (LR: {best_lr_adam})")
    adam_train_acc, adam_test_acc, _ = run_experiment(
        SimpleClassifier, INPUT_SIZE, NUM_CLASSES,
        train_loader, test_loader, adam_config, N_EPOCHS
    )

    results_custom = [{
        'Optimizer': 'CustomAdam',
        'LR': best_lr_adam,
        'Test Accuracy': f"{custom_test_acc:.2f}%"
    }, {
        'Optimizer': 'Built-in Adam',
        'LR': best_lr_adam,
        'Test Accuracy': f"{adam_test_acc:.2f}%"
    }]

    print(pd.DataFrame(results_custom).to_markdown(index=False))


    # 3. –ü–æ—Ä—ñ–≤–Ω—è–Ω–Ω—è –∑ –Ω–æ–≤–∏–º–∏ –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä–∞–º–∏ (Nadam, AdamW)
    print("\n## 3. –ü–æ—Ä—ñ–≤–Ω—è–Ω–Ω—è –∑ –Ω–æ–≤–∏–º–∏ –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä–∞–º–∏ (Nadam, AdamW)")

    # –í–∏–∫–æ—Ä–∏—Å—Ç–∞—î–º–æ —Å—Ç–∞–Ω–¥–∞—Ä—Ç–Ω–∏–π LR (1e-3)
    new_optimizers = ['NAdam', 'AdamW']
    default_lr = 1e-3

    results_new_opt = []

    for opt_name in new_optimizers:
        print(f"-> –û–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: {opt_name}, –®–≤–∏–¥–∫—ñ—Å—Ç—å –Ω–∞–≤—á–∞–Ω–Ω—è (LR): {default_lr}")
        config = {'name': opt_name, 'lr': default_lr}

        train_acc, test_acc, _ = run_experiment(
            SimpleClassifier, INPUT_SIZE, NUM_CLASSES,
            train_loader, test_loader, config, N_EPOCHS
        )

        results_new_opt.append({
            'Optimizer': opt_name,
            'LR': default_lr,
            'Test Accuracy': f"{test_acc:.2f}%"
        })

    # –î–æ–¥–∞–º–æ Adam –¥–ª—è –ø–æ—Ä—ñ–≤–Ω—è–Ω–Ω—è
    results_new_opt.append({
        'Optimizer': 'Built-in Adam',
        'LR': default_lr,
        'Test Accuracy': f"{adam_test_acc:.2f}%"
    })

    print(pd.DataFrame(results_new_opt).to_markdown(index=False))


    # 4. –ê–Ω–∞–ª—ñ–∑ –Ω–∞ —Ä–µ–∞–ª—å–Ω–∏—Ö –º–µ–¥–∏—á–Ω–∏—Ö –¥–∞–Ω–∏—Ö
    print("\n## 4. –ê–Ω–∞–ª—ñ–∑ –Ω–∞ —Ä–µ–∞–ª—å–Ω–∏—Ö –º–µ–¥–∏—á–Ω–∏—Ö –¥–∞–Ω–∏—Ö ")
    print("> –¢—Ä–µ–Ω—É–≤–∞–Ω–Ω—è —Ç–∞ –æ—Ü—ñ–Ω–∫–∞ –ø—Ä–æ–≤–æ–¥–∏–ª–∏—Å—è –Ω–∞ 20% —Ä–µ–∞–ª—å–Ω–∏—Ö –º–µ–¥–∏—á–Ω–∏—Ö –¥–∞–Ω–∏—Ö,")
    print("> —è–∫ –∑–∞–∑–Ω–∞—á–µ–Ω–æ —É –≤—Ö—ñ–¥–Ω–∏—Ö –¥–∞–Ω–∏—Ö (X_train, X_test, y_train, y_test).")
    print(f"> –ó–∞–≥–∞–ª—å–Ω–∞ –∫—ñ–ª—å–∫—ñ—Å—Ç—å –∑–∞–ø–∏—Å—ñ–≤ —É 20% –≤–∏–±—ñ—Ä—Ü—ñ: {X_train.shape[0] + X_test.shape[0]}.")

    # –í–∏–∫–æ—Ä–∏—Å—Ç–æ–≤—É—î–º–æ –Ω–∞–π–∫—Ä–∞—â–∏–π –æ–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä –∑ —É—Å—ñ—Ö –µ–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç—ñ–≤ –¥–ª—è —Ñ—ñ–Ω–∞–ª—å–Ω–æ—ó –æ—Ü—ñ–Ω–∫–∏ (–Ω–∞–ø—Ä–∏–∫–ª–∞–¥, AdamW)
    best_opt_config = {'name': 'AdamW', 'lr': 1e-3}
    final_model = SimpleClassifier(INPUT_SIZE, NUM_CLASSES)
    final_optimizer = optim.AdamW(final_model.parameters(), lr=1e-3)
    criterion = nn.CrossEntropyLoss()

    print("\n-> –§—ñ–Ω–∞–ª—å–Ω–µ —Ç—Ä–µ–Ω—É–≤–∞–Ω–Ω—è –º–æ–¥–µ–ª—ñ –∑ AdamW:")

    # –¢—Ä–µ–Ω—É–≤–∞–Ω–Ω—è –∑ –ø—Ä–æ–≥—Ä–µ—Å-–±–∞—Ä–æ–º
    for epoch in tqdm(range(N_EPOCHS), desc="–§—ñ–Ω–∞–ª—å–Ω–µ —Ç—Ä–µ–Ω—É–≤–∞–Ω–Ω—è"):
        train_model(final_model, final_optimizer, criterion, train_loader, 1)

    final_test_loss, final_test_acc = evaluate_model(final_model, test_loader, criterion)

    print(f"\n** –§—ñ–Ω–∞–ª—å–Ω–∞ —Ç–æ—á–Ω—ñ—Å—Ç—å –Ω–∞ —Ç–µ—Å—Ç–æ–≤–æ–º—É –Ω–∞–±–æ—Ä—ñ (AdamW, LR=1e-3): {final_test_acc:.2f}%**")
    print(f"** –§—ñ–Ω–∞–ª—å–Ω—ñ –≤—Ç—Ä–∞—Ç–∏ –Ω–∞ —Ç–µ—Å—Ç–æ–≤–æ–º—É –Ω–∞–±–æ—Ä—ñ: {final_test_loss:.4f}**")


if __name__ == "__main__":
    main_lab_work()

 –í–∏–¥–∞–ª—è—î–º–æ 42 —Ä—ñ–¥–∫—ñ—Å–Ω–∏—Ö –∫–ª–∞—Å—ñ–≤ (—Ö–≤–æ—Ä–æ–±), —â–æ –º–∞—é—Ç—å < 2 –∑—Ä–∞–∑–∫—ñ–≤.
 –£—Å–ø—ñ—à–Ω–æ –∑–∞–≤–∞–Ω—Ç–∞–∂–µ–Ω–æ. –ö—ñ–ª—å–∫—ñ—Å—Ç—å –∫–ª–∞—Å—ñ–≤: 682
---  –†–æ–∑–º—ñ—Ä–Ω—ñ—Å—Ç—å –¥–∞–Ω–∏—Ö: 377 –æ–∑–Ω–∞–∫, 682 –∫–ª–∞—Å—ñ–≤ ---
---  –¢—Ä–µ–Ω—É–≤–∞–Ω–Ω—è –Ω–∞ 20 –µ–ø–æ—Ö–∞—Ö ---

## 1. –î–æ—Å–ª—ñ–¥–∂–µ–Ω–Ω—è –≤–ø–ª–∏–≤—É —à–≤–∏–¥–∫–æ—Å—Ç—ñ –Ω–∞–≤—á–∞–Ω–Ω—è üß™
-> –û–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: SGD, –®–≤–∏–¥–∫—ñ—Å—Ç—å –Ω–∞–≤—á–∞–Ω–Ω—è (LR): 0.0001
-> –û–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: SGD, –®–≤–∏–¥–∫—ñ—Å—Ç—å –Ω–∞–≤—á–∞–Ω–Ω—è (LR): 0.001
-> –û–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: SGD, –®–≤–∏–¥–∫—ñ—Å—Ç—å –Ω–∞–≤—á–∞–Ω–Ω—è (LR): 0.01
-> –û–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: Adam, –®–≤–∏–¥–∫—ñ—Å—Ç—å –Ω–∞–≤—á–∞–Ω–Ω—è (LR): 0.0001
-> –û–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: Adam, –®–≤–∏–¥–∫—ñ—Å—Ç—å –Ω–∞–≤—á–∞–Ω–Ω—è (LR): 0.001
-> –û–ø—Ç–∏–º—ñ–∑–∞—Ç–æ—Ä: Adam, –®–≤–∏–¥–∫—ñ—Å—Ç—å –Ω–∞–≤—á–∞–Ω–Ω—è (LR): 0.01
| Optimizer   |     LR | Train Accuracy   | Test Accuracy   |
|:------------|-------:|:-----------------|:---------------

–§—ñ–Ω–∞–ª—å–Ω–µ —Ç—Ä–µ–Ω—É–≤–∞–Ω–Ω—è: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 20/20 [00:54<00:00,  2.73s/it]



** –§—ñ–Ω–∞–ª—å–Ω–∞ —Ç–æ—á–Ω—ñ—Å—Ç—å –Ω–∞ —Ç–µ—Å—Ç–æ–≤–æ–º—É –Ω–∞–±–æ—Ä—ñ (AdamW, LR=1e-3): 83.82%**
** –§—ñ–Ω–∞–ª—å–Ω—ñ –≤—Ç—Ä–∞—Ç–∏ –Ω–∞ —Ç–µ—Å—Ç–æ–≤–æ–º—É –Ω–∞–±–æ—Ä—ñ: 0.4520**
