# Тестирование логгирования
_Воспроизведение экспериментов_

In [1]:
from datetime import datetime
from os.path import exists, join

import albumentations as A
import numpy as np
import pandas as pd
import torch
import torchvision
from torch.nn import CrossEntropyLoss
from torch.optim import SGD, Adam
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import DataLoader

from src.config import (MOT20_EXT_FIRST_AXIS_MEAN, MOT20_EXT_SECOND_AXIS_MEAN,
                        RESULTS_PATH)
from src.models.reidentification import SiameseBasicCNN, SiameseTransfered
from src.models.utils import resnet_grad_l4_unlock
from src.train import get_loaders, train_siamese
from src.train.loss import ContrastiveLoss
from src.train.utils import save_train_results, get_distance_accuracy
from src.transforms import get_norm_transform, get_resize_transform
from tqdm import tqdm

  from .autonotebook import tqdm as notebook_tqdm
  warn(f"Failed to load image Python extension: {e}")


## Создание и обработка csv

In [3]:
df = pd.DataFrame(columns=[
    'model_name',
    'datetime',
    'epoch_count',
    'optimizer',
    'lr',
    'gamma',
    'step_size',
    'loss_name',
    'dataset_config'
])

dt = str(datetime.now())

df.loc[len(df)] = [
    'test',
    dt,
    1,
    'adam',
    1e-3,
    0.5,
    2,
    'contr',
    'a'
]

df = df.astype({
    'model_name': 'object',
    'datetime': 'datetime64',
    'epoch_count': 'i4',
    'optimizer': 'object',
    'lr': 'float64',
    'gamma': 'float64',
    'step_size': 'i4',
    'loss_name': 'object',
    'dataset_config': 'object'
})

# df.astype({
#     'model_name': 'int32'
# })

df.dtypes

model_name                object
datetime          datetime64[ns]
epoch_count                int32
optimizer                 object
lr                       float64
gamma                    float64
step_size                  int32
loss_name                 object
dataset_config            object
dtype: object

In [4]:
file_path = join(RESULTS_PATH, 'experiments.csv')
config_path = join(RESULTS_PATH, 'configs.csv')

# # experiments.csv
# # pd.read_csv()
# df.to_csv(file_path, sep=',', index=False)

In [5]:
# pd.read_csv(file_path)

In [6]:
# if (not exists(file_path)):
#     # TODO: указать типы
#     pd.DataFrame(columns=[
#         'model_name',
#         'datetime',
#         'epoch_count',
#         'optimizer',
#         'lr',
#         'gamma',
#         'step_size',
#         'loss_name',
#         'dataset_config'
#     ]).to_csv(file_path, sep=',', index=False)

In [7]:
# df = pd.read_csv(file_path)

In [8]:
res = pd.DataFrame({
    'model_name': 'test2',
    'datetime': dt,
    'epoch_count': 1,
    'optimizer': 'adam',
    'lr': 1e-3,
    'gamma': 0.5,
    'step_size': 2,
    'loss_name': 'contr',
    'dataset_config': 'a'
}, index=[0])
res

Unnamed: 0,model_name,datetime,epoch_count,optimizer,lr,gamma,step_size,loss_name,dataset_config
0,test2,2023-05-15 23:49:07.326662,1,adam,0.001,0.5,2,contr,a


In [9]:
df = df.append(res)
df

  df = df.append(res)


Unnamed: 0,model_name,datetime,epoch_count,optimizer,lr,gamma,step_size,loss_name,dataset_config
0,test,2023-05-15 23:49:07.326662,1,adam,0.001,0.5,2,contr,a
0,test2,2023-05-15 23:49:07.326662,1,adam,0.001,0.5,2,contr,a


In [10]:
# df.to_csv(file_path, sep=',', index=False)

In [11]:
# pd.read_csv(file_path)

## Сохранение конфигов

In [12]:
df_config = pd.DataFrame(columns=[
    'dataset_config',
    'dataset',
    'dataset_use',
    'train_proportion',
    'val_proportion',
    'test_proportion',
    'batch_size',
])
df_config

Unnamed: 0,dataset_config,dataset,dataset_use,train_proportion,val_proportion,test_proportion,batch_size


In [13]:
res = pd.DataFrame({
    'dataset_config': 'mot1',
    'dataset': 'mot',
    'dataset_use': 0.02,
    'train_proportion': 0.7,
    'val_proportion': 0.15,
    'test_proportion': 0.15,
    'batch_size': 16
}, index=[0])

In [14]:
df_config = df_config.append(res)
df_config

  df_config = df_config.append(res)


Unnamed: 0,dataset_config,dataset,dataset_use,train_proportion,val_proportion,test_proportion,batch_size
0,mot1,mot,0.02,0.7,0.15,0.15,16


In [15]:
'mot1' in df_config['dataset_config'].unique(), 'mot2' in df_config['dataset_config'].unique()

(True, False)

In [16]:
conf = {
    "mot20_ext_v1": {
        "dataset_config": 'mot20_ext_v1',
        "dataset": 'MOT20_ext',
        "dataset_use": 0.002,
        "train_proportion": 0.65,
        "val_proportion": 0.15,
        "test_proportion": 0.2,
        "batch_size": 16
    },
    "mot20_ext_v2": {
        "dataset_config": 'mot20_ext_v2',
        "dataset": 'MOT20_ext',
        "dataset_use": 0.002,
        "train_proportion": 0.65,
        "val_proportion": 0.15,
        "test_proportion": 0.2,
        "batch_size": 16,
        'extra_values': {
            'visibility_threshold': 0.7,
            'frame_distance': 3,
            'negative_proportion': 0.5
        }
    }
}

In [17]:
pd.DataFrame(conf['mot20_ext_v1'], index=[0])

Unnamed: 0,dataset_config,dataset,dataset_use,train_proportion,val_proportion,test_proportion,batch_size
0,mot20_ext_v1,MOT20_ext,0.002,0.65,0.15,0.2,16


## Проверка функций

In [18]:
# save_train_results(
#     'siamese1',
#     datetime.now(),
#     10, 
#     1e-3,
#     'adam',
#     'contrastive',
#     'mot20_ext_v1',
#     0.5,
#     3
# )

In [19]:
# save_train_results(
#     'siamese2',
#     datetime.now(),
#     10, 
#     1e-3,
#     'adam',
#     'contrastive',
#     'mot20_ext_v1',
#     0.5,
#     3,
#     conf['mot20_ext_v1']
# )

In [20]:
# save_train_results(
#     'siamese2',
#     datetime.now(),
#     10, 
#     1e-3,
#     'adam',
#     'contrastive',
#     'mot20_ext_v1',
#     0.5,
#     3,
#     conf['mot20_ext_v1']
# )

In [21]:
# save_train_results(
#     'siamese2',
#     datetime.now(),
#     10, 
#     1e-3,
#     'adam',
#     'contrastive',
#     'mot20_ext_v1',
#     0.5,
#     3,
#     conf['mot20_ext_v2']
# )

### Экстра параметры

In [22]:
extra = {
    'visibility_threshold': 0.7,
    'frame_distance': 3,
    'negative_proportion': 0.5
}

for k, v in zip(extra, extra.values()):
    print(k, v)

visibility_threshold 0.7
frame_distance 3
negative_proportion 0.5


In [23]:
';'.join([f'{k}={v}' for k, v in zip(extra, extra.values())])

'visibility_threshold=0.7;frame_distance=3;negative_proportion=0.5'

In [24]:
c = conf['mot20_ext_v2']
c['extra_values'] = ';'.join([f'{k}={v}' for k, v in zip(
    c['extra_values'], c['extra_values'].values())])
c

{'dataset_config': 'mot20_ext_v2',
 'dataset': 'MOT20_ext',
 'dataset_use': 0.002,
 'train_proportion': 0.65,
 'val_proportion': 0.15,
 'test_proportion': 0.2,
 'batch_size': 16,
 'extra_values': 'visibility_threshold=0.7;frame_distance=3;negative_proportion=0.5'}

In [25]:
# save_train_results(
#     'siamese2',
#     datetime.now(),
#     10, 
#     1e-3,
#     'adam',
#     'contrastive',
#     'mot20_ext_v1',
#     0.5,
#     3,
#     conf['mot20_ext_v2']
# )

In [26]:
# pd.read_csv(join(RESULTS_PATH, 'configs.csv'))

In [27]:
a = [1.1, 1.2, 1.3]
';'.join(map(str, a))

'1.1;1.2;1.3'

## Тестирование при обучении

In [28]:
# настройка параметров датасета
dataset_config = {
    'dataset_config': 'mot20_ext-v1a',
    'dataset': 'mot20_ext',
    'dataset_use': 0.002,
    'val_proportion': 0.1,
    'test_proportion': 0.2,
    'batch_size': 16,
    'extra_parameters': {
        'visibility_threshold': 0.9,
        'negative_proportion': 0.4
    }
}

In [29]:
# создание трансформаций
resize_transform = get_resize_transform((MOT20_EXT_FIRST_AXIS_MEAN, MOT20_EXT_SECOND_AXIS_MEAN)) 
norm_transform = get_norm_transform()
transform = A.Compose([resize_transform, norm_transform])
# загрузка даталоадеров
train_loader, val_loader, test_loader = get_loaders(dataset_config, transform=transform)
# настройка модели
model_rn18_l4 = SiameseTransfered(
    # выделение признаков - resnet18
    torchvision.models.resnet18(pretrained=True),
    # разморозка layer4 и выходного линейного
    freeze_grad_fn=resnet_grad_l4_unlock,
    # имя
    name='siamese_resnet18_l4'
)

lr = 1e-3
criterion = ContrastiveLoss()
optimizer = Adam(model_rn18_l4.parameters(), lr)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")




In [30]:
model, results = train_siamese(
    config=dataset_config,
    criterion=criterion,
    device=device,
    epoch_count=1,
    lr=lr,
    model=model_rn18_l4,
    optimizer=optimizer,
    threshold=0.25,
    train_loader=train_loader,
    val_loader=val_loader,
)

Epoch 0/0:


100%|██████████| 33/33 [01:13<00:00,  2.21s/it]

train Loss: 630.9809 Acc: 0.8580



100%|██████████| 4/4 [00:02<00:00,  1.87it/s]

val Loss: 26.3586 Acc: 0.9375
Model saved at siamese_resnet18_l4.pth



  df = df.append(pd.DataFrame({


In [31]:
torch.eq(torch.tensor([1, 2]), torch.tensor([1, 1])).float().mean().item()

0.5

## Воспроизведение результатов

In [40]:
exp_df = pd.read_csv(file_path)

In [41]:
conf_df = pd.read_csv(config_path)

Что нам нужно?
- доставать лучшие модели и отправлять их на тест
  - написать автоматический тест
  - лучшие модели вычислять в рамках датасета, на котором обучались (dataset). один датасет - одна задача
  - смотрим на лучшие модели, смотрим на лучшие конфиги

In [38]:
def test_siamese(model: torch.nn.Module, test_loader: DataLoader, threshold: float) -> tuple[float, np.ndarray]:
    """Вычисляет score для тестового набора"""
    test_predictions = []
    acc = 0
    for x1, x2, y in tqdm(test_loader):
        distance = model(x1, x2)
        test_predictions = np.concatenate(distance)
        acc += get_distance_accuracy(distance.clone(), y, threshold)
    
    return acc / len(test_loader), test_predictions

In [44]:
for dataset in exp_df['dataset_config'].unique():
    df = exp_df[exp_df['dataset_config'] == dataset]
    

            model_name                    datetime  epoch_count optimizer  \
0  siamese_resnet18_l4  2023-05-15 14:16:41.458515            1      Adam   
1  siamese_resnet18_l4  2023-05-15 14:18:55.297936            1      Adam   
2  siamese_resnet18_l4  2023-05-15 14:19:18.337717            1      Adam   
3  siamese_resnet18_l4  2023-05-15 23:50:26.011689            1      Adam   

      lr  gamma  step_size        loss_name dataset_config extra_parameters  \
0  0.001    NaN        NaN  ContrastiveLoss  mot20_ext-v1a   threshold=0.25   
1  0.001    NaN        NaN  ContrastiveLoss  mot20_ext-v1a   threshold=0.25   
2  0.001    NaN        NaN  ContrastiveLoss  mot20_ext-v1a   threshold=0.25   
3  0.001    NaN        NaN  ContrastiveLoss  mot20_ext-v1a   threshold=0.25   

    val_losses  val_accuracies  test_accuracy  
0  4087.438477          0.0000            NaN  
1  4045.716309          0.0000            NaN  
2     0.000000          0.0000            NaN  
3    26.358607          0.