In [1]:
import json
import yaml
from PIL import Image
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
import io
import torch
import torch.nn as nn
import torch.nn.functional as F
from efficientnet_pytorch import EfficientNet
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms

%load_ext autoreload
%autoreload 2

In [2]:
# !pip install efficientnet-pytorch
# !pip install albumentations==1.1.0
# !pip install numpy==1.24.0

In [3]:
from src.models import SiamNet, get_siamnet, get_transforms

In [4]:
# transform = Transform()

# config_path = 'configs/finetuning_config.yaml'
config_path = 'configs/train_config.yaml'

with open(config_path, 'r') as file:
    train_config = yaml.safe_load(file)
    # finetuning_config = yaml.safe_load(file)

In [5]:
net = get_siamnet(train_config)

Loaded pretrained weights for efficientnet-b3
loaded weights from /mnt/DATA2/dorin/res.cv.science.is.matching/effnet_tuning/checkpoints/SiamNet_tunehead_600.pth


In [6]:
# class CustomDataset(Dataset):
#     def __init__(self, json_file):
#         with open(json_file, 'r') as f:
#             self.data = json.load(f)
#         self.basic_transform, self.simple_transform, self.train_transform = get_transforms()
#         self.image_paths = []

#         for images in self.data.values():
#             for img_path in images:
#                 self.image_paths.append(img_path)

#     def __len__(self):
#         return len(self.image_paths)

#     def __getitem__(self, idx):
#         image_path = self.image_paths[idx]
#         image = Image.open(image_path).convert('RGB')

#         # Apply the first transformation
#         augmented1 = self.train_transform(image).convert('RGB')
#         # augmented1.show()

#         # Apply the second transformation
#         augmented2 = self.simple_transform(image).convert('RGB')
#         # augmented2.show()
        
#         tensor1 = self.basic_transform(augmented1)
#         tensor2 = self.basic_transform(augmented2)

#         return tensor1, tensor2



# json_file = '../notebooks/data/thur/thur_dataset.json'

# full_dataset = CustomDataset(json_file)

# train_size = int(0.8 * len(full_dataset))
# test_size = len(full_dataset) - train_size
# train_indices, test_indices = train_test_split(list(range(len(full_dataset))),
#                                                test_size=test_size,
#                                                random_state=42)

# train_dataset = Subset(full_dataset, train_indices)
# test_dataset = Subset(full_dataset, test_indices)

# train_loader = DataLoader(train_dataset, batch_size=train_config['training']['batch_size'], shuffle=True, num_workers=0)
# test_loader = DataLoader(test_dataset, batch_size=train_config['training']['batch_size'], shuffle=False, num_workers=0)

In [7]:
import os
from PIL import Image
import numpy as np
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.model_selection import train_test_split
from torchvision import transforms

class CombinedDataset(Dataset):
    def __init__(self, data_dir):
        self.basic_transform, self.simple_transform, self.train_transform = get_transforms()
        self.data_dir = data_dir
        # self.transform = transform
        self.image_paths = []
        self.coco_paths = []
        self.thur_paths = []
        self.landscapes_paths = []

        # Сбор путей к изображениям для COCO 2017
        coco_dir = os.path.join(data_dir, 'coco2017', 'coco_images')
        for split in ['test2017', 'train2017', 'unlabeled2017', 'val2017']:
            split_dir = os.path.join(coco_dir, split)
            for root, _, files in os.walk(split_dir):
                for file in files:
                    if file.endswith(('.jpg', '.png', '.jpeg')):
                        self.coco_paths.append(os.path.join(root, file))

        # Сбор путей к изображениям для THUR15000
        thur_dir = os.path.join(data_dir, 'thur', 'THUR15000')
        for category in os.listdir(thur_dir):
            category_dir = os.path.join(thur_dir, category, 'Src')
            if os.path.isdir(category_dir):
                for file in os.listdir(category_dir):
                    if file.endswith(('.jpg', '.png', '.jpeg')):
                        self.thur_paths.append(os.path.join(category_dir, file))

        # Сбор путей к изображениям для landscapes
        landscapes_dir = os.path.join(data_dir, 'landscapes')
        for root, _, files in os.walk(landscapes_dir):
            for file in files:
                if file.endswith(('.jpg', '.png', '.jpeg')):
                    self.landscapes_paths.append(os.path.join(root, file))

        # Combine all paths
        self.image_paths = self.coco_paths + self.thur_paths + self.landscapes_paths

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path).convert('RGB')

        # Apply the first transformation
        augmented1 = self.train_transform(image).convert('RGB')

        # Apply the second transformation
        augmented2 = self.simple_transform(image).convert('RGB')

        tensor1 = self.basic_transform(augmented1)
        tensor2 = self.basic_transform(augmented2)
        
        return tensor1, tensor2

In [8]:
data_dir = '../notebooks/data'

full_dataset = CombinedDataset(data_dir)

# Split each dataset separately
coco_train_indices, coco_test_indices = train_test_split(
    list(range(len(full_dataset.coco_paths))), test_size=0.05, random_state=42)
thur_train_indices, thur_test_indices = train_test_split(
    list(range(len(full_dataset.thur_paths))), test_size=0.05, random_state=42)
landscapes_train_indices, landscapes_test_indices = train_test_split(
    list(range(len(full_dataset.landscapes_paths))), test_size=0.05, random_state=42)

# Combine the indices
train_indices = coco_train_indices + thur_train_indices + landscapes_train_indices
test_indices = coco_test_indices + thur_test_indices + landscapes_test_indices

# Create subsets
train_dataset = Subset(full_dataset, train_indices)
test_dataset = Subset(full_dataset, test_indices)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=train_config['training']['batch_size'], shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=train_config['training']['batch_size'], shuffle=False, num_workers=0)

In [9]:
from train import train_model

In [None]:
train_model(net, train_loader, test_loader, train_config, resume=False)

100%|██████████| 9121/9121 [4:49:38<00:00,  1.91s/it]  


Epoch [1/300], Train Loss: 0.0006,               Train Accuracy: 0.9839, Train F1: 0.6521, Train Recall: 0.4838,               Train Precision: 0.9999, Val Loss: 0.0452,               lr: 0.0001,               Val Accuracy: 0.9788, Val F1: 0.4879, Val Recall: 0.3227,               Val Precision: 1.0000
Checkpoint saved at epoch 1


100%|██████████| 9121/9121 [4:56:21<00:00,  1.95s/it]  


Epoch [2/300], Train Loss: 0.0003,               Train Accuracy: 0.9766, Train F1: 0.4028, Train Recall: 0.2522,               Train Precision: 1.0000, Val Loss: 0.0507,               lr: 5e-05,               Val Accuracy: 0.9763, Val F1: 0.3891, Val Recall: 0.2415,               Val Precision: 1.0000
Checkpoint saved at epoch 2


100%|██████████| 9121/9121 [4:57:38<00:00,  1.96s/it]  


Epoch [3/300], Train Loss: 0.0001,               Train Accuracy: 0.9766, Train F1: 0.3997, Train Recall: 0.2497,               Train Precision: 1.0000, Val Loss: 0.0501,               lr: 5e-05,               Val Accuracy: 0.9768, Val F1: 0.4107, Val Recall: 0.2584,               Val Precision: 1.0000
Checkpoint saved at epoch 3


100%|██████████| 9121/9121 [5:01:11<00:00,  1.98s/it]  


Epoch [4/300], Train Loss: 0.0001,               Train Accuracy: 0.9769, Train F1: 0.4125, Train Recall: 0.2599,               Train Precision: 1.0000, Val Loss: 0.0503,               lr: 2.5e-05,               Val Accuracy: 0.9770, Val F1: 0.4162, Val Recall: 0.2628,               Val Precision: 1.0000
Checkpoint saved at epoch 4


100%|██████████| 9121/9121 [5:00:05<00:00,  1.97s/it]  


Epoch [5/300], Train Loss: 0.0001,               Train Accuracy: 0.9776, Train F1: 0.4404, Train Recall: 0.2823,               Train Precision: 1.0000, Val Loss: 0.0430,               lr: 1.25e-05,               Val Accuracy: 0.9787, Val F1: 0.4842, Val Recall: 0.3194,               Val Precision: 1.0000
Checkpoint saved at epoch 5


100%|██████████| 9121/9121 [4:57:42<00:00,  1.96s/it]  


Epoch [6/300], Train Loss: 0.0000,               Train Accuracy: 0.9789, Train F1: 0.4919, Train Recall: 0.3261,               Train Precision: 1.0000, Val Loss: 0.0408,               lr: 1.25e-05,               Val Accuracy: 0.9789, Val F1: 0.4895, Val Recall: 0.3241,               Val Precision: 1.0000
Checkpoint saved at epoch 6


100%|██████████| 9121/9121 [4:57:37<00:00,  1.96s/it]  


Epoch [7/300], Train Loss: 0.0001,               Train Accuracy: 0.9802, Train F1: 0.5351, Train Recall: 0.3653,               Train Precision: 1.0000, Val Loss: 0.0312,               lr: 1.25e-05,               Val Accuracy: 0.9818, Val F1: 0.5892, Val Recall: 0.4176,               Val Precision: 1.0000
Checkpoint saved at epoch 7


100%|██████████| 9121/9121 [4:59:31<00:00,  1.97s/it]  


Epoch [8/300], Train Loss: 0.0000,               Train Accuracy: 0.9807, Train F1: 0.5521, Train Recall: 0.3813,               Train Precision: 1.0000, Val Loss: 0.0319,               lr: 1.25e-05,               Val Accuracy: 0.9813, Val F1: 0.5735, Val Recall: 0.4020,               Val Precision: 1.0000
Checkpoint saved at epoch 8


100%|██████████| 9121/9121 [5:01:13<00:00,  1.98s/it]  


Epoch [9/300], Train Loss: 0.0001,               Train Accuracy: 0.9814, Train F1: 0.5768, Train Recall: 0.4052,               Train Precision: 1.0000, Val Loss: 0.0338,               lr: 1.25e-05,               Val Accuracy: 0.9813, Val F1: 0.5746, Val Recall: 0.4031,               Val Precision: 1.0000
Checkpoint saved at epoch 9


  0%|          | 21/9121 [00:38<4:17:40,  1.70s/it]

### Тюнинг модельки на big data


Epoch [1/300], Train Loss: 0.0034,               Train Accuracy: 0.9897, Train F1: 0.8034, Train Recall: 0.6715,               Train Precision: 0.9998, Val Loss: 0.0082,               lr: 5e-05,               Val Accuracy: 0.9903, Val F1: 0.8170, Val Recall: 0.6908,               Val Precision: 0.9994
Checkpoint saved at epoch 1
100%|██████████| 9121/9121 [4:50:44<00:00,  1.91s/it]   
Epoch [2/300], Train Loss: 0.0027,               Train Accuracy: 0.9919, Train F1: 0.8516, Train Recall: 0.7423,               Train Precision: 0.9988, Val Loss: 0.0061,               lr: 2.5e-05,               Val Accuracy: 0.9927, Val F1: 0.8670, Val Recall: 0.7660,               Val Precision: 0.9986
Checkpoint saved at epoch 2
100%|██████████| 9121/9121 [4:50:38<00:00,  1.91s/it]   
Epoch [3/300], Train Loss: 0.0022,               Train Accuracy: 0.9933, Train F1: 0.8809, Train Recall: 0.7890,               Train Precision: 0.9970, Val Loss: 0.0050,               lr: 2.5e-05,               Val Accuracy: 0.9939, Val F1: 0.8920, Val Recall: 0.8078,               Val Precision: 0.9957
Checkpoint saved at epoch 3
100%|██████████| 9121/9121 [4:56:18<00:00,  1.95s/it]   
Epoch [4/300], Train Loss: 0.0022,               Train Accuracy: 0.9942, Train F1: 0.8990, Train Recall: 0.8198,               Train Precision: 0.9952, Val Loss: 0.0048,               lr: 1.25e-05,               Val Accuracy: 0.9942, Val F1: 0.8992, Val Recall: 0.8207,               Val Precision: 0.9942
Checkpoint saved at epoch 4
100%|██████████| 9121/9121 [4:57:33<00:00,  1.96s/it]  
Epoch [5/300], Train Loss: 0.0020,               Train Accuracy: 0.9949, Train F1: 0.9112, Train Recall: 0.8419,               Train Precision: 0.9930, Val Loss: 0.0045,               lr: 1.25e-05,               Val Accuracy: 0.9945, Val F1: 0.9037, Val Recall: 0.8279,               Val Precision: 0.9948
Checkpoint saved at epoch 5
100%|██████████| 9121/9121 [4:36:38<00:00,  1.82s/it]  
Epoch [6/300], Train Loss: 0.0020,               Train Accuracy: 0.9952, Train F1: 0.9176, Train Recall: 0.8533,               Train Precision: 0.9923, Val Loss: 0.0039,               lr: 1.25e-05,               Val Accuracy: 0.9959, Val F1: 0.9307, Val Recall: 0.8791,               Val Precision: 0.9887
Checkpoint saved at epoch 6
100%|██████████| 9121/9121 [4:33:00<00:00,  1.80s/it]  
Epoch [7/300], Train Loss: 0.0020,               Train Accuracy: 0.9955, Train F1: 0.9227, Train Recall: 0.8627,               Train Precision: 0.9917, Val Loss: 0.0038,               lr: 1.25e-05,               Val Accuracy: 0.9959, Val F1: 0.9313, Val Recall: 0.8794,               Val Precision: 0.9896
Checkpoint saved at epoch 7
100%|██████████| 9121/9121 [4:30:48<00:00,  1.78s/it]  
Epoch [8/300], Train Loss: 0.0020,               Train Accuracy: 0.9959, Train F1: 0.9309, Train Recall: 0.8799,               Train Precision: 0.9881, Val Loss: 0.0038,               lr: 1.25e-05,               Val Accuracy: 0.9964, Val F1: 0.9395, Val Recall: 0.9011,               Val Precision: 0.9812
Checkpoint saved at epoch 8