# –§–∞–∑–∞ 2 ‚Ä¢ –ù–µ–¥–µ–ª—è 8 ‚Ä¢ –ß–µ—Ç–≤–µ—Ä–≥
## –ù–µ–π—Ä–æ–Ω–Ω—ã–µ —Å–µ—Ç–∏
### üî• PyTorch: fine tuning

### –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –∫ —Ä–∞–±–æ—Ç–µ

1. –ó–∞–≥—Ä—É–∑–∏ —ç—Ç–æ—Ç –Ω–æ—É—Ç–±—É–∫ –Ω–∞ Google Colab
2. –ü–æ–¥–≥—Ä—É–∑–∏ –∞—Ä—Ö–∏–≤ [–¥–∞—Ç–∞—Å–µ—Ç–∞](https://www.kaggle.com/datasets/ikobzev/architectural-heritage-elements-image64-dataset) –≤ —Å–≤–æ–µ –ø—Ä–æ—Å—Ç—Ä–∞–Ω—Å—Ç–≤–æ –∏ —Ä–∞–∑–∞—Ä—Ö–∏–≤–∏—Ä—É–π –µ–≥–æ —Å –ø–æ–º–æ—â—å—é `unzip`. 

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms as T
from torch.nn import functional as F

import numpy as np
import torchutils as tu
import matplotlib.pyplot as plt
import pandas as pd
import mlflow
import mlflow.pytorch
import multiprocessing as mp

# –£—Å—Ç–∞–Ω–æ–≤–∏—Ç–µ –º–µ—Ç–æ–¥ –∑–∞–ø—É—Å–∫–∞ –ø—Ä–æ—Ü–µ—Å—Å–æ–≤ –Ω–∞ 'spawn'
# if __name__ == "__main__":
#     mp.set_start_method("spawn", force=True)

torch.set_default_device("cuda")
# config = {
#     "batch_size": 2048,
#     "learning_rate": 0.002,
#     "epochs": 10,
#     "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
#     "data_dir": "data/",
#     "log_dir": "runs/experiment1",
# }
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
GENERATOR = torch.Generator(device=DEVICE)
BATCH_SIZE = 128
mlflow.set_tracking_uri("http://localhost:5000")

plt.rcParams["figure.dpi"] = 300
plt.rcParams["xtick.labelsize"] = 12
plt.rcParams["ytick.labelsize"] = 12

In [2]:
def fit_model(
    epochs: int,
    model: nn.Module,
    optimizer: torch.optim.Optimizer,
    criterion: torch.nn.modules.loss.CrossEntropyLoss,
    train_loader,
    valid_loader,
) -> dict:

    history = {
        "train_losses": [],
        "train_accs": [],
        "train_precs": [],
        "train_recals": [],
        "train_f1s": [],
        "valid_losses": [],
        "valid_accs": [],
        "valid_precs": [],
        "valid_recals": [],
        "valid_f1s": [],
    }
    start_epoch = len(history["train_accs"])

    for epoch in range(start_epoch + 1, start_epoch + epochs + 1):
        print(f'{"-"*13} Epoch {epoch} {"-"*13}')

        model.train()
        batch_accs = []
        batch_losses = []
        batch_precs = []
        batch_recals = []
        batch_f1s = []

        for samples, labels in train_loader:

            samples = samples.to(DEVICE)
            labels = labels.to(DEVICE)

            y_pred = model(samples)

            loss = criterion(y_pred, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            metrics = classification_metrics(labels, y_pred, n_classes)
            batch_losses.append(loss.item())
            batch_precs.append(metrics["precision"])
            batch_accs.append(metrics["accuracy"])
            batch_recals.append(metrics["recall"])
            batch_f1s.append(metrics["f1"])

        history["train_losses"].append(np.mean(batch_losses))
        history["train_accs"].append(np.mean(batch_accs))
        history["train_precs"].append(np.mean(batch_precs))
        history["train_recals"].append(np.mean(batch_recals))
        history["train_f1s"].append(np.mean(batch_f1s))

        model.eval()
        batch_accs = []
        batch_losses = []
        batch_precs = []
        batch_recals = []
        batch_f1s = []
        for samples, labels in valid_loader:
            samples = samples.to(DEVICE)
            labels = labels.to(DEVICE)
            with torch.no_grad():
                y_pred = model(samples)

            loss = criterion(y_pred, labels)

            metrics = classification_metrics(labels, y_pred, n_classes)
            batch_losses.append(loss.item())
            batch_precs.append(metrics["precision"])
            batch_accs.append(metrics["accuracy"])
            batch_recals.append(metrics["recall"])
            batch_f1s.append(metrics["f1"])

        history["valid_losses"].append(np.mean(batch_losses))
        history["valid_accs"].append(np.mean(batch_accs))
        history["valid_precs"].append(np.mean(batch_precs))
        history["valid_recals"].append(np.mean(batch_recals))
        history["valid_f1s"].append(np.mean(batch_f1s))

        # [MLflow] –õ–æ–≥–∏—Ä—É–µ–º –º–µ—Ç—Ä–∏–∫–∏
        if use_mlflow:
            # epoch ‚Äì –Ω–æ–º–µ—Ä —à–∞–≥–∞ (–º–æ–∂–Ω–æ —É–∫–∞–∑—ã–≤–∞—Ç—å step=epoch)
            mlflow.log_metric("train_loss", history["train_losses"][-1], step=epoch)
            mlflow.log_metric("train_acc", history["train_accs"][-1], step=epoch)
            mlflow.log_metric("train_prec", history["train_precs"][-1], step=epoch)
            mlflow.log_metric("train_recal", history["train_recals"][-1], step=epoch)
            mlflow.log_metric("train_f1", history["train_f1s"][-1], step=epoch)
            mlflow.log_metric("valid_loss", history["valid_losses"][-1], step=epoch)
            mlflow.log_metric("valid_acc", history["valid_accs"][-1], step=epoch)
            mlflow.log_metric("valid_prec", history["valid_precs"][-1], step=epoch)
            mlflow.log_metric("valid_recal", history["valid_recals"][-1], step=epoch)
            mlflow.log_metric("valid_f1", history["valid_f1s"][-1], step=epoch)

        print(
            f'Train: loss {history["train_losses"][-1]:.4f}\n'
            f'accuracy {history["train_accs"][-1]:.4f}, F1-score {history["train_f1s"][-1]:.4f}\n'
            f'precision {history["train_precs"][-1]:.4f}, recal {history["train_recals"][-1]:.4f}\n'
            f'Valid: loss {history["valid_losses"][-1]:.4f}\n'
            f'accuracy {history["valid_accs"][-1]:.4f}, F1-score {history["valid_f1s"][-1]:.4f}\n'
            f'precision {history["valid_precs"][-1]:.4f}, recal {history["valid_recals"][-1]:.4f}\n'
        )
        print(f'{"-"*35}')
        print()

    return history

In [3]:
def classification_metrics(
    y_true: torch.Tensor, y_pred: torch.Tensor, num_classes: int
):
    """
    –°—á–∏—Ç–∞–µ—Ç Accuracy, Precision –∏ Recall –¥–ª—è –º—É–ª—å—Ç–∏–∫–ª–∞—Å—Å–æ–≤–æ–π –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏.

    –ê—Ä–≥—É–º–µ–Ω—Ç—ã:
        y_true (Tensor): –ò—Å—Ç–∏–Ω–Ω—ã–µ –º–µ—Ç–∫–∏ (shape: [N])
        y_pred (Tensor): –ü—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω—ã–µ –º–µ—Ç–∫–∏ (shape: [N])
        num_classes (int): –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –∫–ª–∞—Å—Å–æ–≤

    –í–æ–∑–≤—Ä–∞—â–∞–µ—Ç:
        dict: {'accuracy': float, 'precision': Tensor[num_classes], 'recall': Tensor[num_classes]}
    """
    # –£–±–µ–¥–∏–º—Å—è, —á—Ç–æ –≤—Å—ë –Ω–∞ –æ–¥–Ω–æ–π –≤–∏–¥–µ–æ–∫–∞—Ä—Ç–µ
    device = y_true.device
    y_pred = y_pred.to(device)
    y_pred = y_pred.argmax(axis=1)
    # Accuracy
    accuracy = (y_true == y_pred).float().mean().item()

    # –ú–∞—Ç—Ä–∏—Ü–∞ –æ—à–∏–±–æ–∫
    conf_matrix = torch.zeros(num_classes, num_classes, device=device)
    for t, p in zip(y_true, y_pred):
        conf_matrix[t, p] += 1

    # True Positive, False Positive, False Negative
    TP = conf_matrix.diag()
    FP = conf_matrix.sum(0) - TP
    FN = conf_matrix.sum(1) - TP

    precision_per_class = TP / (TP + FP + 1e-8)
    recall_per_class = TP / (TP + FN + 1e-8)
    f1_per_class = (
        2
        * precision_per_class
        * recall_per_class
        / (precision_per_class + recall_per_class + 1e-8)
    )

    return {
        "accuracy": accuracy,
        "precision": precision_per_class.mean().item(),
        "recall": recall_per_class.mean().item(),
        "f1": f1_per_class.mean().item(),
    }

In [4]:
def plot_history(history, grid=True, suptitle="model 1"):
    fig, ax = plt.subplots(3, 2, figsize=(16, 20))
    fig.suptitle(suptitle, fontsize=24, fontweight="bold", y=0.85)
    ax[0][0].plot(history["train_losses"], label="train loss")
    ax[0][0].plot(history["valid_losses"], label="valid loss")
    ax[0][0].set_title(f'Loss on epoch {len(history["train_losses"])}', fontsize=16)
    ax[0][0].grid(grid)
    ax[0][0].set_ylim((0, max(history["train_losses"] + history["valid_losses"]) + 0.1))
    ax[0][0].legend(fontsize=14)
    ax[0][0].set_xlabel("Epoch", fontsize=14)
    ax[0][0].set_ylabel("Loss", fontsize=14)

    ax[0][1].plot(history["train_accs"], label="train acc")
    ax[0][1].plot(history["valid_accs"], label="valid acc")
    ax[0][1].set_title(f'Accuracy on epoch {len(history["train_losses"])}', fontsize=16)
    ax[0][1].grid(grid)
    ax[0][1].set_ylim((min(history["train_accs"]) - 0.05, 1))
    ax[0][1].legend(fontsize=14)
    ax[0][1].set_xlabel("Epoch", fontsize=14)
    ax[0][1].set_ylabel("Accuracy", fontsize=14)

    ax[1][0].plot(history["train_precs"], label="train prec")
    ax[1][0].plot(history["valid_precs"], label="valid prec")
    ax[1][0].set_title(
        f'Precision on epoch {len(history["train_losses"])}', fontsize=16
    )
    ax[1][0].grid(grid)
    ax[1][0].set_ylim(min(history["train_precs"]) - 0.05, 1)
    ax[1][0].legend(fontsize=14)
    ax[1][0].set_xlabel("Epoch", fontsize=14)
    ax[1][0].set_ylabel("Precision", fontsize=14)

    ax[1][1].plot(history["train_recals"], label="train recal")
    ax[1][1].plot(history["valid_recals"], label="valid recal")
    ax[1][1].set_title(f'Recal on epoch {len(history["train_losses"])}', fontsize=16)
    ax[1][1].grid(grid)
    ax[1][1].set_ylim((min(history["train_recals"]) - 0.05, 1))
    ax[1][1].legend(fontsize=14)
    ax[1][1].set_xlabel("Epoch", fontsize=14)
    ax[1][1].set_ylabel("Recal", fontsize=14)

    ax[2][0].plot(history["train_f1s"], label="train f1")
    ax[2][0].plot(history["valid_f1s"], label="valid f1")
    ax[2][0].set_title(f'F1-score on epoch {len(history["train_losses"])}', fontsize=16)
    ax[2][0].grid(grid)
    ax[2][0].set_ylim((min(history["train_f1s"]) - 0.05, 1))
    ax[2][0].legend(fontsize=14)
    ax[2][0].set_xlabel("Epoch", fontsize=14)
    ax[2][0].set_ylabel("F1", fontsize=14)
    plt.subplots_adjust(top=0.8)
    # plt.tight_layout(rect=[0, 0, 1, 0.8])
    plt.show()
    return fig

1. –£–±–µ–¥–∏—Å—å, —á—Ç–æ —Å—Ç—Ä—É–∫—Ç—É—Ä–∞ –ø–∞–ø–æ–∫ —Å–æ–æ—Ç–≤–µ—Ç—Å—Ç–≤—É–µ—Ç –∑–∞–¥–∞—á–µ –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏–∏ (–ª–∏–±–æ –ø—Ä–∏–≤–µ–¥–∏—Ç–µ –µ–µ –∫ —Ñ–æ—Ä–º–∞—Ç—É, —É–∫–∞–∑–∞–Ω–Ω–æ–º—É –Ω–∏–∂–µ): 

        `train`
        
            - class_1
            - class_2
            - ...
            - class_n
            
        `valid`

            - class_1
            - class_2
            - ...
            - class_n

In [5]:
# code

2. –°–æ–∑–¥–∞–π `DataLoader` –≤ –¥–ª—è –æ–±—É—á–∞—é—â–µ–π –∏ –≤–∞–ª–∏–¥–∞—Ü–∏–æ–Ω–Ω—ã—Ö –≤—ã–±–æ—Ä–æ–∫. –ü—Ä–∏–º–µ–Ω–∏—Ç–µ –∞—É–≥–º–µ–Ω—Ç–∞—Ü–∏–∏ –∫ –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏—è–º. 

In [6]:
base_transforms = (
    T.ToTensor(),
    T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
)
augmentation = (
    T.ColorJitter(),  # –°–ª—É—á–∞–π–Ω–æ –º–µ–Ω—è–µ—Ç —è—Ä–∫–æ—Å—Ç—å, –∫–æ–Ω—Ç—Ä–∞—Å—Ç, –Ω–∞—Å—ã—â–µ–Ω–Ω–æ—Å—Ç—å –∏ –æ—Ç—Ç–µ–Ω–æ–∫
    T.RandomRotation((0, 180)),  # –°–ª—É—á–∞–π–Ω–æ –ø–æ–≤–æ—Ä–∞—á–∏–≤–∞–µ—Ç –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ
    T.RandomHorizontalFlip(
        p=0.5
    ),  # –° –≤–µ—Ä–æ—è—Ç–Ω–æ—Å—Ç—å—é p –æ—Ç–∞–∂–∞–µ—Ç –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ –ø–æ –≥–æ—Ä–∏–∑–æ–Ω—Ç–∞–ª—å–Ω–æ–π –æ—Å–∏
)

train_ds = datasets.ImageFolder(
    "data/train",
    transform=T.Compose(
        [
            T.Resize((224, 224)),  # –î–ª—è convnext-tiny
            *augmentation,
            *base_transforms,
        ]
    ),
)

valid_ds = datasets.ImageFolder(
    "data/test",
    transform=T.Compose(
        [
            T.Resize((224, 224)),
            *base_transforms,
        ]
    ),
)

In [7]:
train_loader = DataLoader(
    train_ds,
    batch_size=BATCH_SIZE,
    generator=GENERATOR,
    shuffle=True,  # –ø–µ—Ä–µ–º–µ—à–∏–≤–∞–µ–º –¥–∞–Ω–Ω—ã–µ –¥–ª—è –æ–±—É—á–µ–Ω–∏—è
    num_workers=4,  # –ø–æ–¥—Å—Ç—Ä–æ–π –ø–æ–¥ —Å–≤–æ–∏ CPU-—è–¥—Ä–∞
    # pin_memory=True,  # —É—Å–∫–æ—Ä—è–µ—Ç –ø–µ—Ä–µ–¥–∞—á—É CPU‚ÜíGPU
    persistent_workers=True,  # –Ω–µ –ø–µ—Ä–µ—Å–æ–∑–¥–∞—ë—Ç –≤–æ—Ä–∫–µ—Ä–æ–≤ –Ω–∞ –∫–∞–∂–¥–æ–π —ç–ø–æ—Ö–µ
    prefetch_factor=2,  # (–ø–æ —É–º–æ–ª—á–∞–Ω–∏—é 2) –±–∞—Ç—á–µ–π –Ω–∞ –≤–æ—Ä–∫–µ—Ä –≤–ø–µ—Ä—ë–¥
    drop_last=True,  # —É–±–∏—Ä–∞–µ–º –Ω–µ–ø–æ–ª–Ω—ã–π –±–∞—Ç—á (–æ–ø—Ç–∏–º–∞–ª—å–Ω–µ–µ –¥–ª—è —Ç—Ä–µ–Ω–∏—Ä–æ–≤–∫–∏)
)
valid_loader = DataLoader(
    valid_ds,
    batch_size=BATCH_SIZE,
    generator=GENERATOR,
    shuffle=False,  # –≤–∞–ª–∏–¥–∞—Ü–∏—é –Ω–µ –ø–µ—Ä–µ–º–µ—à–∏–≤–∞–µ–º
    num_workers=4,
    # pin_memory=True,
    persistent_workers=True,
)

3. –°–æ–∑–¥–∞–π —ç–∫–∑–µ–º–ø–ª—è—Ä –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω–æ–π –º–æ–¥–µ–ª–∏: [torchvision models](https://pytorch.org/vision/stable/models.html). –ú–æ–∂–Ω–æ –≤–∑—è—Ç—å –ª—é–±—É—é –º–æ–¥–µ–ª—å –¥–ª—è baseline, –∞ –¥–∞–ª—å—à–µ –ø–æ–ø—Ä–æ–±–æ–≤–∞—Ç—å —á—Ç–æ-—Ç–æ –±–æ–ª–µ–µ —Å–ª–æ–∂–Ω–æ–µ. 

In [8]:
from torchvision.models import convnext_tiny, ConvNeXt_Tiny_Weights

model = convnext_tiny(weights=ConvNeXt_Tiny_Weights.DEFAULT)

In [9]:
model.to(DEVICE)
fake_batch = torch.randn(4, 3, 224, 224, device=DEVICE)
tu.get_model_summary(model, fake_batch)

Layer                                                    Kernel             Output          Params           FLOPs
0_features.0.Conv2d_0                                  [3, 96, 4, 4]     [4, 96, 56, 56]       4,704    59,006,976
1_features.0.LayerNorm2d_1                                      [96]     [4, 96, 56, 56]         192             0
2_features.1.0.block.Conv2d_0                          [1, 96, 7, 7]     [4, 96, 56, 56]       4,800    60,211,200
3_features.1.0.block.Permute_1                                     -     [4, 56, 56, 96]           0             0
4_features.1.0.block.LayerNorm_2                                [96]     [4, 56, 56, 96]         192             0
5_features.1.0.block.Linear_3                              [96, 384]    [4, 56, 56, 384]      37,248   920,027,136
6_features.1.0.block.GELU_4                                        -    [4, 56, 56, 384]           0             0
7_features.1.0.block.Linear_5                              [384, 96]     [4, 56,

In [10]:
model

ConvNeXt(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
    )
    (1): Sequential(
      (0): CNBlock(
        (block): Sequential(
          (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (1): Permute()
          (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=96, out_features=384, bias=True)
          (4): GELU(approximate='none')
          (5): Linear(in_features=384, out_features=96, bias=True)
          (6): Permute()
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): CNBlock(
        (block): Sequential(
          (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (1): Permute()
          (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=

5. –ó–∞–º–µ–Ω–∏ –≤—ã—Ö–æ–¥–Ω–æ–π —Å–ª–æ–π —Ñ–æ—Ä–º–∞—Ç–æ–º, –∫–æ—Ç–æ—Ä—ã–π –ø–æ–¥—Ö–æ–¥–∏—Ç –ø–æ–¥ –∑–∞–¥–∞—á—É: –±–∏–Ω–∞—Ä–Ω–∞—è –∏–ª–∏ –º–Ω–æ–≥–æ–∫–ª–∞—Å—Å–æ–≤–∞—è –∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ü–∏—è. 

In [11]:
model.classifier

Sequential(
  (0): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=768, out_features=1000, bias=True)
)

In [12]:
model.classifier[2]

Linear(in_features=768, out_features=1000, bias=True)

In [13]:
model.classifier[2] = nn.Linear(in_features=768, out_features=10, bias=True)

In [14]:
model.classifier

Sequential(
  (0): LayerNorm2d((768,), eps=1e-06, elementwise_affine=True)
  (1): Flatten(start_dim=1, end_dim=-1)
  (2): Linear(in_features=768, out_features=10, bias=True)
)

6. –û–±—É—á–∏ –º–æ–¥–µ–ª—å (—Ç–æ–ª—å–∫–æ –ø–æ—Å–ª–µ–¥–Ω–∏–π —Å–ª–æ–π!) –∏ –∑–∞—Ñ–∏–∫—Å–∏—Ä—É–π –º–µ—Ç—Ä–∏–∫—É –Ω–∞ –≤–∞–ª–∏–¥–∞—Ü–∏–æ–Ω–Ω–æ–π —á–∞—Å—Ç–∏ –≤—ã–±–æ—Ä–∫–∏. 

In [15]:
use_mlflow = True
mlflow_experiment_name = "convnext_tiny_tuning"

In [16]:
lr = 1e-4
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
criterion = torch.nn.CrossEntropyLoss()
n_classes = 10

In [17]:
def fit_with_mlflow(
    model=model,
    epochs=15,
    optimizer=optimizer,
    criterion=criterion,
    train_loader=train_loader,
    valid_loader=valid_loader,
    augmentation=True,
):
    mlflow.set_experiment(
        mlflow_experiment_name
    )  # —É—Å—Ç–∞–Ω–æ–≤–∏—Ç—å (–∏–ª–∏ —Å–æ–∑–¥–∞—Ç—å) —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç
    with mlflow.start_run(run_name=f"convnext_tiny_bs_{BATCH_SIZE}_lr_{lr}"):
        # –õ–æ–≥–∏—Ä—É–µ–º –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã –∏–∑ config
        mlflow.log_param("batch_size", BATCH_SIZE)
        mlflow.log_param("learning_rate", lr)
        mlflow.log_param("epochs", epochs)
        mlflow.log_param("device", DEVICE)
        mlflow.log_param("optimizer", optimizer)
        mlflow.log_param("criterion", criterion)
        mlflow.log_param("augmentation", ("Yes" if augmentation else "No"))

        # –ó–∞–ø—É—Å–∫–∞–µ–º –æ–±—É—á–µ–Ω–∏–µ
        logs = fit_model(
            model=model,
            epochs=epochs,
            optimizer=optimizer,
            criterion=criterion,
            train_loader=train_loader,
            valid_loader=valid_loader,
        )

        # –°–æ—Ö—Ä–∞–Ω—è–µ–º –º–æ–¥–µ–ª—å –≤ MLflow (–æ–ø—Ü–∏–æ–Ω–∞–ª—å–Ω–æ)
        # mlflow.pytorch.log_model(base_cnn, "model")

    # –ü–æ—Å–ª–µ –≤—ã—Ö–æ–¥–∞ –∏–∑ `with` Run –∞–≤—Ç–æ–º–∞—Ç–∏—á–µ—Å–∫–∏ –∑–∞–≤–µ—Ä—à–∞–µ—Ç—Å—è
    return logs

In [18]:
model

ConvNeXt(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
      (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
    )
    (1): Sequential(
      (0): CNBlock(
        (block): Sequential(
          (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (1): Permute()
          (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=96, out_features=384, bias=True)
          (4): GELU(approximate='none')
          (5): Linear(in_features=384, out_features=96, bias=True)
          (6): Permute()
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): CNBlock(
        (block): Sequential(
          (0): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (1): Permute()
          (2): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (3): Linear(in_features=

In [19]:
for param in model.parameters():
    print(param.requires_grad)

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


##### –ó–∞–º–æ—Ä–∞–∂–∏–≤–∞–µ–º –≤—Å–µ —Å–ª–æ–∏ –Ω–µ–π—Ä–æ—Å–µ—Ç–∏

In [20]:
for param in model.parameters():
    param.requires_grad = False

for param in model.parameters():
    print(param.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
Fals

##### –†–∞–∑–º–æ—Ä–∞–∂–∏–≤–∞–µ–º —Ç–æ–ª—å–∫–æ –ø–æ—Å–ª–µ–¥–Ω–∏–π —Å–ª–æ–π (–∫–ª–∞—Å—Å–∏—Ñ–∏–∫–∞—Ç–æ—Ä)

In [21]:
model.classifier[2].weight.requires_grad = True
model.classifier[2].bias.requires_grad = True

In [22]:
for param in model.parameters():
    print(param.requires_grad)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
Fals

In [None]:
fit_with_mlflow()

7. –†–∞—Å–ø–µ—á–∞—Ç–∞–π –∏–∑–æ–±—Ä–∞–∂–µ–Ω–∏–µ –∏–∑ —Ç–µ—Å—Ç–æ–≤–æ–π –≤—ã–±–æ—Ä–∫–∏ –∏ –ø–æ–¥–ø–∏—à–∏ –∫–ª–∞—Å—Å –∫–∞—Ä—Ç–∏–Ω–∫–∏, –ø—Ä–µ–¥—Å–∫–∞–∑–∞–Ω–Ω—ã–π –º–æ–¥–µ–ª—å—é. 

In [None]:
# code

8. "–†–∞–∑–º–æ—Ä–æ–∑—å" –Ω–µ—Å–æ–∫–ª—å–∫–æ —Å–ª–æ–µ–≤ –±–∞–∑–æ–≤–æ–π –º–æ–¥–µ–ª–∏ –∏ –≤–Ω–æ–≤—å –æ–±—É—á–∏ –µ–µ. –ó–∞—Ñ–∏–∫—Å–∏—Ä—É–π –º–µ—Ç—Ä–∏–∫—É –∫–∞—á–µ—Å—Ç–≤–∞ –¥–ª—è –º–æ–¥–µ–ª–∏. –û —Ç–æ–º, –∫–∞–∫ —Å–¥–µ–ª–∞—Ç—å unfreeze –Ω–µ—Å–∫–æ–ª—å–∫–∏—Ö —Å–ª–æ–µ–≤ —Å—Ä–∞–∑—É, –∞ –Ω–µ —Ç–æ–ª—å–∫–æ –ø–æ—Å–ª–µ–¥–Ω–µ–≥–æ, –º–æ–∂–Ω–æ –ø–æ—á–∏—Ç–∞—Ç—å —Ç—É—Ç: [discuss.pytorch.org](https://discuss.pytorch.org/t/how-the-pytorch-freeze-network-in-some-layers-only-the-rest-of-the-training/7088/3)

In [None]:
# code

9. –°—Ä–∞–≤–Ω–∏ –∫–∞—á–µ—Å—Ç–≤–æ –¥–≤—É—Ö –º–æ–¥–µ–ª–µ–π: –ø—Ä–µ–¥–æ–±—É—á–µ–Ω–Ω–æ–π —Å –∑–∞–º–µ–Ω–µ–Ω–Ω—ã–º –≤—ã—Ö–æ–¥–Ω—ã–º —Å–ª–æ–µ–º –∏ –¥–æ–æ–±—É—á–µ–Ω–Ω–æ–π —Å –Ω–µ—Å–∫–æ–ª—å–∫–∏–º–∏ —Ä–∞–∑–º–æ—Ä–æ–∂–µ–Ω–Ω—ã–º–∏ —Å–ª–æ—è–º–∏. 

In [None]:
# code

<img src="https://icons.iconarchive.com/icons/icons8/windows-8/256/Programming-Github-icon.png" width=32 /> –°–æ—Ö—Ä–∞–Ω–∏ —Ñ–∞–π–ª –¥–ª—è __github__ –∏ —Ä–∞—Å–ø–µ—á–∞—Ç–∞–π —Ä–µ–∑—É–ª—å—Ç–∞—Ç –∫–æ–º–∞–Ω–¥—ã `!git status` –≤ —è—á–µ–π–∫–µ –Ω–∏–∂–µ. 

In [None]:
# code

10. –°–æ—Ö—Ä–∞–Ω–∏ –º–æ–¥–µ–ª—å (–ø—Ä–∏–º–µ—Ä –º–æ–∂–Ω–æ –ø–æ—Å–º–æ—Ç—Ä–µ—Ç—å [—Ç—É—Ç](../../learning/aux/model_saving.ipynb) –∏–ª–∏ –≤ [–¥–æ–∫—É–º–µ–Ω—Ç–∞—Ü–∏–∏](https://pytorch.org/tutorials/beginner/saving_loading_models.html))

In [None]:
# code

11. –†–µ–∞–ª–∏–∑—É–π —Ñ—É–Ω–∫—Ü–∏—é, –∫–æ—Ç–æ—Ä–∞—è –Ω–∞ –≤—Ö–æ–¥ –ø—Ä–∏–Ω–∏–º–∞–µ—Ç –ø—É—Ç—å –∫ —Ñ–∞–π–ª—É, –∞ –≤ –æ—Ç–≤–µ—Ç –≤–æ–∑–≤—Ä–∞—â–∞–µ—Ç –∫–ª–∞—Å—Å –æ–±—ä–µ–∫—Ç–∞. 

In [None]:
def get_prediction(path: str) -> str:
    pass

12. –°–æ—Ö—Ä–∞–Ω–∏ –Ω–æ—É—Ç–±—É–∫ –Ω–∞ github