In [None]:
import numpy as np
from torch.utils.data import SubsetRandomSampler

def get_train_valid_samplers(train_dataset, validation_split=0.3, shuffle_dataset=True, random_seed=42):
    dataset_size = len(train_dataset)
    indices = list(range(dataset_size))
    split = int(np.floor(validation_split * dataset_size))

    if shuffle_dataset:
       np.random.seed(random_seed)
       np.random.shuffle(indices)

    train_indices, val_indices = indices[split:], indices[:split]

    train_sampler = SubsetRandomSampler(train_indices)
    valid_sampler = SubsetRandomSampler(val_indices)
    return train_sampler, valid_sampler

train_sampler, valid_sampler = get_train_valid_samplers(train_dataset)

### train

In [None]:
#Вход в kaggle по Api tokeny, в настройках аккаунта kaggle

import kagglehub
kagglehub.login()

In [None]:
# Скачиваем датасет, а так же передаём в него путь к датасету

ml_intensive_yandex_academy_spring_2025_path = kagglehub.competition_download('ml-intensive-yandex-academy-spring-2025')

print('Data source import complete.')

In [None]:
# !pip uninstall -y tensorflow && pip install tensorflow-cpu # пусть будет

In [None]:
import torch_xla.core.xla_model as xm
import torch_xla.distributed.xla_multiprocessing as xmp
import multiprocessing as mp

In [None]:
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score

def train_fn(rank, model, train_loader, valid_loader, optimizer, criterion, num_epochs, device, path_of_model):
    # функция обучения
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    train_F1 = []
    Val_F1 = []

    xm.master_print('-'*55)
    xm.master_print(f'- initialization | TPU cores = {xm.xrt_world_size()}\t\t')

    for epoch in range(num_epochs):
        xm.master_print('-'*55)
        xm.master_print('EPOCH {}/{}'.format(epoch + 1, num_epochs))

        model.train()
        running_loss = 0.0

        y_true = []
        y_pred = []

        # TRAIN

        for bathc_inx, (images, labels) in enumerate(train_loader):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            xm.optimizer_step(optimizer, barrier=True)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

        train_accuracy = accuracy_score(y_true, y_pred)
        tr_f1 = f1_score(y_true, y_pred, average='weighted')
        train_loss = running_loss / len(train_loader)

        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        train_F1.append(tr_f1)

        model.eval()
        val_loss = 0.0
        y_true = []
        y_pred = []

        # VALID

        with torch.no_grad():
            for images, labels in valid_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

                _, predicted = torch.max(outputs.data, 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())

        val_loss_epoch = val_loss / len(valid_loader)
        f1 = f1_score(y_true, y_pred, average='weighted')
        val_accuracy = accuracy_score(y_true, y_pred)

        val_losses.append(val_loss_epoch)
        val_accuracies.append(val_accuracy)
        Val_F1.append(f1)

        xm.master_print(f'Core: {rank}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss_epoch:.4f}, Train Acc: {train_accuracy*100:.2f}%, Val Acc: {val_accuracy*100:.2f}%, Train F1: {tr_f1:.4f}, Val F1: {f1:.4f}')

    # сохранение результатов(для графиков) и модели
    results = {
                'train_losses': train_losses,
                'val_losses': val_losses,
                'train_accuracies': train_accuracies,
                'val_accuracies': val_accuracies,
                'train_F1': train_F1,
                'Val_F1': Val_F1
            }
    torch.save(results, f"results_{rank}.pth")

    if xm.is_master_ordinal():
        torch.save(model.state_dict(), path_of_model)

def _mp_fn(rank, flags):
    xm.rendezvous('checking_out')

    device = xm.xla_device()
    # ЗДЕСЬ СТАВИТСЯ МОДЕЛЬ
    modelь = Model1().to(device)
    optimizer = torch.optim.Adamax(modelь.parameters(), lr=flags.lr, weight_decay=1e-5)

    criterion = nn.CrossEntropyLoss()

    train_loader = torch.utils.data.DataLoader(
            train_dataset + train_dataset_augm,
            batch_size=flags.batch_size,
            sampler=train_sampler,
            drop_last=True,
            num_workers=0
        )

    valid_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=flags.batch_size,
            sampler=valid_sampler,
            drop_last=True,
            num_workers=0
        )

    train_fn(rank, modelь, train_loader, valid_loader, optimizer, criterion, flags.epochs, device, flags.path_of_model)

In [None]:
# здесь можно изменять пораметры обучения
class Flags:
    def __init__(self, epochs=2, batch_size=128, lr=0.01, path_of_model='model1.pth'):
        self.epochs = epochs
        self.batch_size = batch_size
        self.lr = lr
        self.path_of_model = path_of_model

if __name__ == '__main__':
    FLAGS = Flags()

    xmp.spawn(_mp_fn, args=(FLAGS,), start_method='fork')

In [None]:
# для инициализации обученой модели

device = xm.xla_device()
final_modelь = Model1().to(xm.xla_device())
final_modelь.load_state_dict(torch.load('model1.pth', map_location=device))

In [None]:
# Создание словаря после обучения для сбора метрик из файлов с каждого ядра
import numpy as np

results = {
            'train_losses': np.mean(np.array([eval(f"torch.load('results_{i}.pth', map_location=device)")['train_losses'] for i in range(7)]), axis=0).tolist(),
            'val_losses': np.mean(np.array([eval(f"torch.load('results_{i}.pth', map_location=device)")['val_losses'] for i in range(7)]), axis=0).tolist(),
            'train_accuracies': np.mean(np.array([eval(f"torch.load('results_{i}.pth', map_location=device)")['train_accuracies'] for i in range(7)]), axis=0).tolist(),
            'val_accuracies': np.mean(np.array([eval(f"torch.load('results_{i}.pth', map_location=device)")['val_accuracies'] for i in range(7)]), axis=0).tolist(),
            'train_F1': np.mean(np.array([eval(f"torch.load('results_{i}.pth', map_location=device)")['train_F1'] for i in range(7)]), axis=0).tolist(),
            'Val_F1': np.mean(np.array([eval(f"torch.load('results_{i}.pth', map_location=device)")['Val_F1'] for i in range(7)]), axis=0).tolist()
            }

results

In [None]:
from IPython.display import clear_output

def show_loss_accuracy(loss, val_loss, acc, val_acc, tr_f1, val_f1):
    clear_output()
    plt.figure(figsize=(12, 5))

    plt.subplot(1, 3, 1)
    plt.plot(loss, label='Train Loss')
    plt.plot(val_loss, label='Val Loss')
    plt.legend()
    plt.title('Loss Curves')

    plt.subplot(1, 3, 2)
    plt.plot(acc, label='Train Accur')
    plt.plot(val_acc, label='Val Accur')
    plt.legend()
    plt.title('Accuracy')

    plt.subplot(1, 3, 3)
    plt.plot(tr_f1, label="Train F1")
    plt.plot(val_f1, label='Valid F1')
    plt.legend()
    plt.title('F1')

    plt.show()

In [None]:
show_loss_accuracy(results['train_losses'], results['val_losses'], results['train_accuracies'], results['val_accuracies'], results['train_F1'], results['Val_F1'])