In [None]:
!pip install accelerate -Uq

exit()

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.6/302.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.3/21.3 MB[0m [31m46.3 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
!pip install -q datasets transformers evaluate

# Сделаем torch Dataset

In [None]:
from transformers import set_seed
import os
import random
import numpy as np

import torch

def set_all_seeds(seed=22):

    # python's seeds
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

    # torch's seeds
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


magic_seed = 22
set_all_seeds(seed=magic_seed)

# 6 дней после заражения

# Сделаем torch Dataset

In [None]:
# Import libraries
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image
import os
import numpy as np
import pandas as pd
import torch

# Mount google drive
from google.colab import drive
drive.mount('gdrive')

## Подготовка набора данных

In [None]:
class MultispecDataset(Dataset):
    # create dataset of multispectral images
    def __init__(self, nir_dir, rgb_dir, mask_dir, img_names, transform=None):
        '''
        nir_dir:
            path to NIR images

        rgb_dir:
            path to RGB images

        mask_dir:
            path to masks

        image_names:
            names of images
        '''
        self.nir_dir = nir_dir
        self.rgb_dir = rgb_dir
        self.mask_dir = mask_dir
        self.img_names = img_names
        self.transform = transform

    def __getitem__(self, idx):
        # Open image
        nir_img = Image.open(os.path.join(self.nir_dir, self.img_names[idx]))

        # transform to tensor, change size from [512, 512, 3] to [3, 512, 512]
        # and scale pixel values to range [0, 1]
        nir_img = torch.tensor(np.array(nir_img)).permute((2, 0, 1)) / 255

        rgb_img = Image.open(os.path.join(self.rgb_dir, self.img_names[idx]))
        rgb_img = torch.tensor(np.array(rgb_img)).permute((2, 0, 1)) / 255

        mask = Image.open(os.path.join(self.mask_dir, self.img_names[idx].replace('JPG', 'png')))

        # transform if desired
        if self.transform:
            nir_img = self.transform(nir_img)

        # concatenate images to get the multispectral one
        image = torch.cat([nir_img, rgb_img], axis=0)

        return {'pixel_values': image, 'labels': torch.tensor(np.array(mask) / 255).long()}

    def __len__(self):
        return len(self.img_names)

In [None]:
id2label = {0: 'unlabeled', 1: 'leaf-disease'}
label2id = {v: k for k, v in id2label.items()}

num_labels = len(id2label)

In [None]:
id2label

{0: 'unlabeled', 1: 'leaf-disease'}

In [None]:
from transformers import SegformerImageProcessor

model_checkpoint =  "nvidia/mit-b2"
image_processor = SegformerImageProcessor.from_pretrained(model_checkpoint, do_reduce_labels=False)

In [None]:
image_mean = image_processor.image_mean
image_std = image_processor.image_std
image_size = image_processor.size

print('image_mean:', image_mean)
print('image_std:', image_std)
print('image_size:',image_size)

image_mean: [0.485, 0.456, 0.406]
image_std: [0.229, 0.224, 0.225]
image_size: {'height': 512, 'width': 512}


In [None]:
image_transforms = transforms.Compose([
                                       transforms.Resize(size=(image_size['height'], image_size['width'])),
                                       transforms.Normalize(mean=image_mean,std=image_std)])

In [None]:
# Image directories
nir_dir = '/content/gdrive/MyDrive/Cropped Potato/2023_ИК_больные/6 дней после заражения'
rgb_dir = '/content/gdrive/MyDrive/Cropped Potato/2023_ВИД_больные/6 дней после заражения'
mask_dir = '/content/gdrive/MyDrive/Cropped Potato/Разметка/6 дней после заражения/Разметка картинки'

In [None]:
rgb_images = os.listdir(rgb_dir)
mask_images = os.listdir(mask_dir)

In [None]:
# 0.7 : 0.15 : 0.15
num_train_samples = 945
num_validation_samples = 210
num_test_samples = len(rgb_images) - num_train_samples - num_validation_samples # 189

In [None]:
# Make train-val-test split
train_images = rgb_images[:num_train_samples]
train_masks = mask_images[:num_train_samples]

val_images = rgb_images[num_train_samples:(num_train_samples + num_validation_samples)]
val_masks = mask_images[num_train_samples:(num_train_samples + num_validation_samples)]

test_images = rgb_images[(num_train_samples + num_validation_samples):]
test_masks = mask_images[(num_train_samples + num_validation_samples):]

In [None]:
print(len(train_images), len(train_masks))
print(len(val_images), len(val_masks))
print(len(test_images), len(test_masks))

945 945
210 210
189 189


In [None]:
# Create Dataset objects
train_dataset = MultispecDataset(nir_dir, rgb_dir, mask_dir, train_images, image_transforms)
val_dataset = MultispecDataset(nir_dir, rgb_dir, mask_dir, val_images, image_transforms)

## Tune HP

### Write custom Trainers

We are going to write custom trainers (with custom loss function)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
from torch import nn
from transformers import Trainer

In [None]:
!pip install segmentation-models-pytorch -q

In [None]:
# Weighted Cross-Entropy
class WeightedCSETrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")

        logits_tensor = nn.functional.interpolate(
        logits,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
        )

        loss_fct = nn.CrossEntropyLoss(weight=torch.tensor([self.weight_0, self.weight_1]).to(device))
        loss = loss_fct(logits_tensor.reshape(-1, self.model.config.num_labels), labels.reshape(-1))

        return (loss, outputs) if return_outputs else loss

In [None]:
from segmentation_models_pytorch.losses import JaccardLoss

# Jaccard Loss
class JaccardTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")

        logits_tensor = nn.functional.interpolate(
        logits,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
        )

        disease_logits = logits_tensor[:, 1, :, :]


        loss_fct = JaccardLoss(mode='binary')
        loss = loss_fct(disease_logits,
                        labels)

        return (loss, outputs) if return_outputs else loss

In [None]:
from segmentation_models_pytorch.losses import DiceLoss

# Dice Loss
class DiceTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")

        logits_tensor = nn.functional.interpolate(
        logits,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
        )

        disease_logits = logits_tensor[:, 1, :, :]


        loss_fct = DiceLoss(mode='binary')
        loss = loss_fct(disease_logits, labels)

        return (loss, outputs) if return_outputs else loss

In [None]:
from segmentation_models_pytorch.losses import TverskyLoss

# Tversky Loss
class TverskyTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")

        logits_tensor = nn.functional.interpolate(
        logits,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
        )

        disease_logits = logits_tensor[:, 1, :, :]


        loss_fct = TverskyLoss(mode='binary',
                               alpha=self.alpha, beta=self.betta)
        loss = loss_fct(disease_logits, labels)

        return (loss, outputs) if return_outputs else loss

In [None]:
from segmentation_models_pytorch.losses import FocalLoss

# Focal Loss
class FocalTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")

        logits_tensor = nn.functional.interpolate(
        logits,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
        )

        disease_logits = logits_tensor[:, 1, :, :]


        loss_fct = FocalLoss(mode='binary',
                               alpha=self.alpha, gamma=self.gamma)
        loss = loss_fct(disease_logits, labels)

        return (loss, outputs) if return_outputs else loss

In [None]:
from segmentation_models_pytorch.losses import LovaszLoss

# Lovasz Loss
class LovaszTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")

        logits_tensor = nn.functional.interpolate(
        logits,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
        )

        disease_logits = logits_tensor[:, 1, :, :]


        loss_fct = LovaszLoss(mode='binary')
        loss = loss_fct(disease_logits, labels)

        return (loss, outputs) if return_outputs else loss

In [None]:
from segmentation_models_pytorch.losses import MCCLoss

# MCC Loss
class MCCTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.get("labels")
        # forward pass
        outputs = model(**inputs)
        logits = outputs.get("logits")

        logits_tensor = nn.functional.interpolate(
        logits,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
        )

        disease_logits = logits_tensor[:, 1, :, :]


        loss_fct = MCCLoss()
        loss = loss_fct(disease_logits, labels)

        return (loss, outputs) if return_outputs else loss

### Wandb

In [None]:
!pip install -q wandb

In [None]:
import wandb
wandb.login()

%env WANDB_PROJECT=leaf_disease_sweeps
%env WANDB_LOG_MODEL='end'

In [None]:
model_checkpoint = "nvidia/mit-b2"

In [None]:
from transformers import SegformerForSemanticSegmentation, TrainingArguments, Trainer
import torch.nn as nn

def model_init():
    '''
    initialize model

    Return:
    -------
        model to fine-tune
    '''
    model = SegformerForSemanticSegmentation.from_pretrained(
    model_checkpoint,
    id2label=id2label,
    label2id=label2id,
    ignore_mismatched_sizes=True,  # Will ensure the segmentation specific components are reinitialized.
    )

    # Change model architecture so that it will be able to work with multispectral (6-channel) images
    model.segformer.encoder.patch_embeddings[0].proj = nn.Conv2d(6, 64,
                                                             kernel_size=(7, 7),
                                                             stride=(4, 4),
                                                             padding=(3, 3))
    return model

In [None]:
# Define sweep config
sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'eval/mean',
        'goal': 'maximize'
    }
}

# hyperparameters
parameters_dict = {
    'epochs': {
        'value': 5
        },

    'batch_size': {
        'value': 8
    },

    'num_warmup_steps': {
        'distribution': 'int_uniform',
        'min': 0,
        'max': 200,
    },

    'learning_rate': {
        'distribution': 'uniform',
        'min': 1e-6,
        'max': 1e-3
    },

    'weight_decay': {
        'distribution': 'uniform',
        'min': 0.0,
        'max': 1.5,
    },

    'objective': {
        'values': ['CrossEntropyLoss', 'JaccardLoss', 'DiceLoss', 'TverskyLoss',
                   'FocalLoss', 'LovaszLoss', 'MCCLoss']
    },

    'betta_1': {
        'distribution': 'uniform',
        'min': 0.5,
        'max': 1
    },

    'betta_2': {
        'distribution': 'uniform',
        'min': 0.5,
        'max': 1
    },

    'class_weight_0': {
        'distribution': 'uniform',
        'min': 0.3,
        'max': 5
    },

    'class_weight_1': {
        'distribution': 'uniform',
        'min': 5,
        'max': 50
    },

    'tversky_alpha': {
        'distribution': 'uniform',
        'min': 0.1,
        'max': 1
    },

    'tversky_betta': {
        'distribution': 'uniform',
        'min': 0.1,
        'max': 1
    },

    'focal_gamma': {
        'distribution': 'uniform',
        'min': 0,
        'max': 5
    },

    'focal_alpha': {
        'distribution': 'uniform',
        'min': 0.1,
        'max': 1
    },

    'smooth': {
        'distribution': 'uniform',
        'min': 0,
        'max': 1.5
    },
}


sweep_config['parameters'] = parameters_dict

In [None]:
sweep_id = wandb.sweep(sweep_config, project='multispec-leaf-disease-sweeps')

In [None]:
from sklearn.metrics import cohen_kappa_score

def calculate_cohen_kappa(y_true, y_pred):
    return cohen_kappa_score(y_true.flatten(), y_pred.flatten())

In [None]:
import torch
from torch import nn
import evaluate
from matplotlib import pyplot as plt

metric = evaluate.load("mean_iou")
sigma = nn.Sigmoid()

def compute_metrics(eval_pred):
    global best_mean
    global best_iou
    global best_dice
    global best_kappa
    with torch.no_grad():

        logits, labels = eval_pred

        logits_tensor = torch.from_numpy(logits)
        # scale the logits to the size of the label
        logits_tensor = nn.functional.interpolate(
            logits_tensor,
            size=labels.shape[-2:],
            mode="bilinear",
            align_corners=False,
        ).detach().cpu().numpy()


        pred_labels = logits_tensor.argmax(axis=1)

        # currently using _compute instead of compute
        # see this issue for more info: https://github.com/huggingface/evaluate/pull/328#issuecomment-1286866576
        metrics = metric._compute(
                predictions=pred_labels,
                references=labels,
                num_labels=len(id2label),
                ignore_index=-1,
                reduce_labels=False,#feature_extractor.do_reduce_labels,
            )


        eval_iou = metrics['per_category_iou'][1]

        metrics = {'eval_IoU': eval_iou}
        metrics['eval_Dice'] = eval_iou * 2 / (eval_iou + 1)
        metrics['eval_kappa_score'] = calculate_cohen_kappa(labels, pred_labels)
        metrics['eval_mean'] = (metrics['eval_IoU'] + metrics['eval_kappa_score']) / 2

        print('Mean:', metrics['eval_mean'])
        print('IoU:', metrics['eval_IoU'])
        print('Dice:', metrics['eval_Dice'])
        print('kappa:', metrics['eval_kappa_score'])

        if metrics['eval_mean'] > best_mean:
            best_mean = metrics['eval_mean']
            best_iou = metrics['eval_IoU']
            best_dice = metrics['eval_Dice']
            best_kappa = metrics['eval_kappa_score']
        else:
            metrics['eval_mean'] = best_mean
            metrics['eval_IoU'] = best_iou
            metrics['eval_Dice'] = best_dice
            metrics['eval_kappa_score'] = best_kappa


    return metrics

In [None]:
from torch.optim import AdamW
from transformers import TrainingArguments, get_linear_schedule_with_warmup

def train(config=None):
    '''
    perform training procedure with particular configuration

    Parameters:
    -----------
        config (dict):
            training hyperparameters
    '''

    global best_mean
    global best_iou
    global best_dice
    global best_kappa
    best_mean = 0
    best_iou = 0
    best_dice = 0
    best_kappa = 0
    with wandb.init(config=config):
        # set sweep configuration
        config = wandb.config

        epochs = config.epochs
        batch_size = config.batch_size

        num_training_steps = len(train_dataset) // batch_size * epochs

        model = model_init()
        optimizer = AdamW(model.parameters(), lr=config.learning_rate,
                          weight_decay=config.weight_decay,
                          betas=(config.betta_1, config.betta_1))
        scheduler = get_linear_schedule_with_warmup(optimizer=optimizer,
                                                    num_warmup_steps=config.num_warmup_steps,
                                                    num_training_steps=num_training_steps)



        # set training arguments
        training_args = TrainingArguments(
        "segformer-b2-finetuned-leaf-disease",
        label_smoothing_factor=config.smooth,
        num_train_epochs=config.epochs,
        per_device_train_batch_size=config.batch_size,
        per_device_eval_batch_size=config.batch_size,
        save_total_limit=5,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_strategy='epoch',
        load_best_model_at_end=True,
        metric_for_best_model='eval_mean',
        greater_is_better=True,
        push_to_hub=False,
        remove_unused_columns=False,
        seed=magic_seed,
        hub_strategy="end",
        )

        objective = config.objective

        # define particular Trainer
        if objective == 'CrossEntropyLoss':
            trainer = WeightedCSETrainer(model=model,
                                         args=training_args,
                                         train_dataset=train_dataset,
                                         eval_dataset=val_dataset,
                                         compute_metrics=compute_metrics,
                                         optimizers=(optimizer, scheduler))
            setattr(trainer, 'weight_0', config.class_weight_0)
            setattr(trainer, 'weight_1', config.class_weight_1)
        elif objective == 'JaccardLoss':
            trainer = JaccardTrainer(model=model,
                                     args=training_args,
                                     train_dataset=train_dataset,
                                     eval_dataset=val_dataset,
                                     compute_metrics=compute_metrics,
                                     optimizers=(optimizer, scheduler))

        elif objective == 'DiceLoss':
            trainer = DiceTrainer(model=model,
                                  args=training_args,
                                  train_dataset=train_dataset,
                                  eval_dataset=val_dataset,
                                  compute_metrics=compute_metrics,
                                  optimizers=(optimizer, scheduler))

        elif objective == 'TverskyLoss':
            trainer = TverskyTrainer(model=model,
                                     args=training_args,
                                     train_dataset=train_dataset,
                                     eval_dataset=val_dataset,
                                     compute_metrics=compute_metrics,
                                     optimizers=(optimizer, scheduler))

            setattr(trainer, 'alpha', config.tversky_alpha)
            setattr(trainer, 'betta', config.tversky_betta)
        elif objective == 'FocalLoss':
            trainer = FocalTrainer(model=model,
                                   args=training_args,
                                   train_dataset=train_dataset,
                                   eval_dataset=val_dataset,
                                   compute_metrics=compute_metrics,
                                   optimizers=(optimizer, scheduler))

            setattr(trainer, 'alpha', config.focal_alpha)
            setattr(trainer, 'gamma', config.focal_gamma)
        elif objective == 'LovaszLoss':
            trainer = LovaszTrainer(model=model,
                                    args=training_args,
                                    train_dataset=train_dataset,
                                    eval_dataset=val_dataset,
                                    compute_metrics=compute_metrics,
                                    optimizers=(optimizer, scheduler))
        elif objective == 'MCCLoss':
            trainer = MCCTrainer(model=model,
                                 args=training_args,
                                 train_dataset=train_dataset,
                                 eval_dataset=val_dataset,
                                 compute_metrics=compute_metrics,
                                 optimizers=(optimizer, scheduler))
        else:
            print('ВЫ ВВЕЛИ НЕПРАВИЛЬНЫЙ ЛОСС')



        # start training loop
        trainer.train()

In [None]:
# We will log the mean value of IoU and kappa score. The higher this value, the better
best_mean = 0

best_iou = 0
best_dice = 0
best_kappa = 0

In [None]:
# Start tuning hyperparameters
wandb.agent(sweep_id, train, count=50)

[34m[1mwandb[0m: Agent Starting Run: 20ji1jbq with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.6733811349468842
[34m[1mwandb[0m: 	betta_2: 0.6531000450935891
[34m[1mwandb[0m: 	class_weight_0: 2.5335889266254115
[34m[1mwandb[0m: 	class_weight_1: 29.45220495433291
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.1261787953389969
[34m[1mwandb[0m: 	focal_gamma: 1.36537336321138
[34m[1mwandb[0m: 	learning_rate: 0.00029416366340114924
[34m[1mwandb[0m: 	num_warmup_steps: 91
[34m[1mwandb[0m: 	objective: MCCLoss
[34m[1mwandb[0m: 	smooth: 0.7543296344281722
[34m[1mwandb[0m: 	tversky_alpha: 0.1841302902763169
[34m[1mwandb[0m: 	tversky_betta: 0.16571841329169207
[34m[1mwandb[0m: 	weight_decay: 1.3090100498640664


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.7811,,0,0,0,0
2,0.0,,0,0,0,0
3,0.0,,0,0,0,0
4,0.0,,0,0,0,0
5,0.0,,0,0,0,0


Mean: 0.0
IoU: 0.0
Dice: 0.0
kappa: 0.0
Mean: 0.0
IoU: 0.0
Dice: 0.0
kappa: 0.0
Mean: 0.0
IoU: 0.0
Dice: 0.0
kappa: 0.0
Mean: 0.0
IoU: 0.0
Dice: 0.0
kappa: 0.0
Mean: 0.0
IoU: 0.0
Dice: 0.0
kappa: 0.0


VBox(children=(Label(value='0.002 MB of 0.004 MB uploaded\r'), FloatProgress(value=0.5982772122161315, max=1.0…

0,1
eval/Dice,▁▁▁▁▁
eval/IoU,▁▁▁▁▁
eval/kappa_score,▁▁▁▁▁
eval/mean,▁▁▁▁▁
eval/runtime,█▆▅▂▁
eval/samples_per_second,▁▃▄▇█
eval/steps_per_second,▁▃▄██
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███
train/learning_rate,█▆▄▃▁

0,1
eval/Dice,0.0
eval/IoU,0.0
eval/kappa_score,0.0
eval/loss,
eval/mean,0.0
eval/runtime,31.6378
eval/samples_per_second,6.638
eval/steps_per_second,0.853
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: cpddv6yb with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8499884979892429
[34m[1mwandb[0m: 	betta_2: 0.5561476285282485
[34m[1mwandb[0m: 	class_weight_0: 4.009760277034754
[34m[1mwandb[0m: 	class_weight_1: 31.133691025399425
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.985926475829676
[34m[1mwandb[0m: 	focal_gamma: 4.3168816515167485
[34m[1mwandb[0m: 	learning_rate: 0.0008035918543114604
[34m[1mwandb[0m: 	num_warmup_steps: 133
[34m[1mwandb[0m: 	objective: FocalLoss
[34m[1mwandb[0m: 	smooth: 0.9715580710931488
[34m[1mwandb[0m: 	tversky_alpha: 0.4253359961111712
[34m[1mwandb[0m: 	tversky_betta: 0.3944673992982789
[34m[1mwandb[0m: 	weight_decay: 1.0677063458196567


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.0009,0.000392,0.057085,0.108004,0.097372,0.077229
2,0.0003,0.000296,0.10917,0.196851,0.187874,0.148522
3,0.0002,0.000316,0.182311,0.308398,0.301185,0.241748
4,0.0001,0.000348,0.230636,0.374824,0.368676,0.299656
5,0.0001,0.000323,0.230636,0.374824,0.368676,0.299656


Mean: 0.07722862105535708
IoU: 0.057084878069069304
Dice: 0.10800434147415627
kappa: 0.09737236404164484
Mean: 0.14852220996864055
IoU: 0.10917046597821618
Dice: 0.19685065429854362
kappa: 0.18787395395906492
Mean: 0.24174834475499607
IoU: 0.18231139464016177
Dice: 0.30839827048380686
kappa: 0.30118529486983037
Mean: 0.2996559888181966
IoU: 0.2306359653143161
Dice: 0.37482402890022715
kappa: 0.36867601232207714
Mean: 0.25954437852297196
IoU: 0.19693696341578984
Dice: 0.3290682290465421
kappa: 0.3221517936301541


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▃▆██
eval/IoU,▁▃▆██
eval/kappa_score,▁▃▆██
eval/loss,█▁▂▅▃
eval/mean,▁▃▆██
eval/runtime,█▂▂▁▅
eval/samples_per_second,▁▇▇█▄
eval/steps_per_second,▁███▃
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.37482
eval/IoU,0.23064
eval/kappa_score,0.36868
eval/loss,0.00032
eval/mean,0.29966
eval/runtime,43.7625
eval/samples_per_second,4.799
eval/steps_per_second,0.617
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: 4w2nwea9 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.7166623664105793
[34m[1mwandb[0m: 	betta_2: 0.7051148153688448
[34m[1mwandb[0m: 	class_weight_0: 3.0393000174267955
[34m[1mwandb[0m: 	class_weight_1: 27.734584008364006
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.4845350504649437
[34m[1mwandb[0m: 	focal_gamma: 2.3337414196258432
[34m[1mwandb[0m: 	learning_rate: 0.0005105108491283779
[34m[1mwandb[0m: 	num_warmup_steps: 146
[34m[1mwandb[0m: 	objective: TverskyLoss
[34m[1mwandb[0m: 	smooth: 0.3049206590506113
[34m[1mwandb[0m: 	tversky_alpha: 0.5443000260384396
[34m[1mwandb[0m: 	tversky_betta: 0.857648870203161
[34m[1mwandb[0m: 	weight_decay: 0.6822404309767118


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9475,0.852476,0.142025,0.248725,0.242274,0.19215
2,0.6166,0.643248,0.30783,0.470749,0.467417,0.387623
3,0.51,0.575611,0.359774,0.529167,0.525811,0.442792
4,0.4734,0.563908,0.370052,0.540202,0.536887,0.45347
5,0.4473,0.563184,0.38306,0.553931,0.551021,0.467041


Mean: 0.19214951512802825
IoU: 0.14202518965962088
Dice: 0.24872514362306017
kappa: 0.24227384059643564
Mean: 0.3876230858991506
IoU: 0.30782966433452313
Dice: 0.47074886390676784
kappa: 0.46741650746377805
Mean: 0.4427923619501961
IoU: 0.3597735910250682
Dice: 0.5291669045489434
kappa: 0.525811132875324
Mean: 0.45346971252737545
IoU: 0.37005231941405
Dice: 0.5402017341532119
kappa: 0.536887105640701
Mean: 0.46704067898213686
IoU: 0.3830601092896175
Dice: 0.5539312524693797
kappa: 0.5510212486746563


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▇██
eval/IoU,▁▆▇██
eval/kappa_score,▁▆▇██
eval/loss,█▃▁▁▁
eval/mean,▁▆▇██
eval/runtime,██▆▁▆
eval/samples_per_second,▁▁▃█▃
eval/steps_per_second,▁▁▃█▃
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55393
eval/IoU,0.38306
eval/kappa_score,0.55102
eval/loss,0.56318
eval/mean,0.46704
eval/runtime,43.4952
eval/samples_per_second,4.828
eval/steps_per_second,0.621
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: msy6h1m9 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.530559492925212
[34m[1mwandb[0m: 	betta_2: 0.6462464255355176
[34m[1mwandb[0m: 	class_weight_0: 4.547573274062611
[34m[1mwandb[0m: 	class_weight_1: 5.643049492401848
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.8223124727758642
[34m[1mwandb[0m: 	focal_gamma: 0.7808438032946968
[34m[1mwandb[0m: 	learning_rate: 9.969896433156176e-06
[34m[1mwandb[0m: 	num_warmup_steps: 45
[34m[1mwandb[0m: 	objective: JaccardLoss
[34m[1mwandb[0m: 	smooth: 0.5445824364197704
[34m[1mwandb[0m: 	tversky_alpha: 0.4970279457803519
[34m[1mwandb[0m: 	tversky_betta: 0.5614285903170009
[34m[1mwandb[0m: 	weight_decay: 0.8616594554460022


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9883,0.988933,0.011729,0.023186,0.010789,0.011259
2,0.9841,0.987507,0.014288,0.028173,0.016048,0.015168
3,0.9819,0.987212,0.014881,0.029326,0.017178,0.016029
4,0.981,0.986648,0.015267,0.030076,0.017934,0.016601
5,0.9802,0.986113,0.016079,0.031649,0.019581,0.01783


Mean: 0.011258873769272974
IoU: 0.011729145931562367
Dice: 0.023186335945205194
kappa: 0.010788601606983583
Mean: 0.015167671743220217
IoU: 0.014287677422175769
Dice: 0.02817283052967393
kappa: 0.016047666064264665
Mean: 0.016029460973694364
IoU: 0.01488125094735775
Dice: 0.029326092946276423
kappa: 0.01717767100003098
Mean: 0.016600713163858676
IoU: 0.015267487502393562
Dice: 0.030075793207861527
kappa: 0.017933938825323792
Mean: 0.01783004716141023
IoU: 0.0160787229738164
Dice: 0.03164857724164889
kappa: 0.019581371349004062


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▅▆▇█
eval/IoU,▁▅▆▇█
eval/kappa_score,▁▅▆▇█
eval/loss,█▄▄▂▁
eval/mean,▁▅▆▇█
eval/runtime,█▁▆▅▅
eval/samples_per_second,▁█▃▄▄
eval/steps_per_second,▁█▂▄▄
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.03165
eval/IoU,0.01608
eval/kappa_score,0.01958
eval/loss,0.98611
eval/mean,0.01783
eval/runtime,43.366
eval/samples_per_second,4.843
eval/steps_per_second,0.623
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kvdxds59 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.5576998435084036
[34m[1mwandb[0m: 	betta_2: 0.5104419497889835
[34m[1mwandb[0m: 	class_weight_0: 2.365989016178593
[34m[1mwandb[0m: 	class_weight_1: 31.22564308383346
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.7158864623971665
[34m[1mwandb[0m: 	focal_gamma: 2.2087600372171874
[34m[1mwandb[0m: 	learning_rate: 0.0006674348761164393
[34m[1mwandb[0m: 	num_warmup_steps: 118
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 1.3106210231723712
[34m[1mwandb[0m: 	tversky_alpha: 0.4935085728364026
[34m[1mwandb[0m: 	tversky_betta: 0.41612126851211984
[34m[1mwandb[0m: 	weight_decay: 0.9020149350050962


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8827,0.85738,0.172606,0.294398,0.288205,0.230406
2,0.522,0.560833,0.31389,0.477802,0.474706,0.394298
3,0.4376,0.527261,0.346433,0.514594,0.511378,0.428905
4,0.4048,0.514612,0.361501,0.531033,0.528283,0.444892
5,0.3791,0.501989,0.375276,0.545746,0.543046,0.459161


Mean: 0.23040554531000643
IoU: 0.17260619610348132
Dice: 0.2943975508180736
kappa: 0.28820489451653153
Mean: 0.3942979995227872
IoU: 0.31388979445631743
Dice: 0.47780231763837366
kappa: 0.47470620458925694
Mean: 0.4289052706619653
IoU: 0.34643282149533927
Dice: 0.5145935481736004
kappa: 0.5113777198285914
Mean: 0.44489159886053353
IoU: 0.36150055882619136
Dice: 0.5310325529911741
kappa: 0.5282826388948757
Mean: 0.45916066890405915
IoU: 0.3752758129389366
Dice: 0.5457462559993399
kappa: 0.5430455248691817


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▇██
eval/IoU,▁▆▇██
eval/kappa_score,▁▆▇██
eval/loss,█▂▁▁▁
eval/mean,▁▆▇██
eval/runtime,▇█▆▃▁
eval/samples_per_second,▂▁▃▆█
eval/steps_per_second,▂▁▃▆█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.54575
eval/IoU,0.37528
eval/kappa_score,0.54305
eval/loss,0.50199
eval/mean,0.45916
eval/runtime,43.2905
eval/samples_per_second,4.851
eval/steps_per_second,0.624
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ocbi86yu with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.7334251971870362
[34m[1mwandb[0m: 	betta_2: 0.7034603545265329
[34m[1mwandb[0m: 	class_weight_0: 2.146613932796874
[34m[1mwandb[0m: 	class_weight_1: 33.84262771779403
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.5065598748427699
[34m[1mwandb[0m: 	focal_gamma: 0.32887799566177045
[34m[1mwandb[0m: 	learning_rate: 0.0006207640963551679
[34m[1mwandb[0m: 	num_warmup_steps: 152
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.29679537566664205
[34m[1mwandb[0m: 	tversky_alpha: 0.629768325299824
[34m[1mwandb[0m: 	tversky_betta: 0.6370763292754028
[34m[1mwandb[0m: 	weight_decay: 0.5275977729169923


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9329,0.735563,0.208276,0.34475,0.341418,0.274847
2,0.5324,0.563621,0.309382,0.472562,0.469399,0.389391
3,0.4323,0.515039,0.361214,0.530723,0.527705,0.44446
4,0.395,0.492625,0.382314,0.553151,0.550401,0.466357
5,0.3696,0.50065,0.382314,0.553151,0.550401,0.466357


Mean: 0.2748472089903631
IoU: 0.20827635720762688
Dice: 0.34474953675161124
kappa: 0.3414180607730992
Mean: 0.3893907462989423
IoU: 0.309382390963737
Dice: 0.47256232113527064
kappa: 0.46939910163414766
Mean: 0.44445972452323257
IoU: 0.36121413328844854
Dice: 0.5307234540914149
kappa: 0.5277053157580166
Mean: 0.46635728830313916
IoU: 0.38231389245161634
Dice: 0.5531506187405233
kappa: 0.550400684154662
Mean: 0.46094713061345927
IoU: 0.37699867883941124
Dice: 0.5475657814823186
kappa: 0.5448955823875073


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▅▇██
eval/IoU,▁▅▇██
eval/kappa_score,▁▅▇██
eval/loss,█▃▂▁▁
eval/mean,▁▅▇██
eval/runtime,▇▂▁█▄
eval/samples_per_second,▂▇█▁▄
eval/steps_per_second,▃██▁▅
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55315
eval/IoU,0.38231
eval/kappa_score,0.5504
eval/loss,0.50065
eval/mean,0.46636
eval/runtime,43.5948
eval/samples_per_second,4.817
eval/steps_per_second,0.619
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: r9hw0c3b with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.6626611587205731
[34m[1mwandb[0m: 	betta_2: 0.6091065595946559
[34m[1mwandb[0m: 	class_weight_0: 0.8471523267762717
[34m[1mwandb[0m: 	class_weight_1: 41.88266493375092
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.5203835792748547
[34m[1mwandb[0m: 	focal_gamma: 2.927139943659065
[34m[1mwandb[0m: 	learning_rate: 0.0008449194252370763
[34m[1mwandb[0m: 	num_warmup_steps: 129
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.17937776261653693
[34m[1mwandb[0m: 	tversky_alpha: 0.6047226247112517
[34m[1mwandb[0m: 	tversky_betta: 0.8726117166358549
[34m[1mwandb[0m: 	weight_decay: 0.836030142904211


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8721,0.670377,0.246639,0.395687,0.391356,0.318998
2,0.5115,0.558444,0.309699,0.472932,0.470017,0.389858
3,0.4352,0.523243,0.348763,0.51716,0.514151,0.431457
4,0.3998,0.509033,0.370241,0.540402,0.537498,0.453869
5,0.3736,0.495265,0.379709,0.550418,0.547735,0.463722


Mean: 0.31899757658723754
IoU: 0.2466394830505546
Dice: 0.39568694302385204
kappa: 0.3913556701239205
Mean: 0.3898578493007524
IoU: 0.30969906332372693
Dice: 0.47293164055226417
kappa: 0.4700166352777778
Mean: 0.43145665567723074
IoU: 0.3487628095940055
Dice: 0.5171595881991846
kappa: 0.514150501760456
Mean: 0.4538692270715192
IoU: 0.3702406447305599
Dice: 0.5404023682327171
kappa: 0.5374978094124785
Mean: 0.46372163220041374
IoU: 0.3797085128778313
Dice: 0.5504184533671183
kappa: 0.5477347515229962


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▄▆██
eval/IoU,▁▄▆██
eval/kappa_score,▁▅▆██
eval/loss,█▄▂▂▁
eval/mean,▁▄▆██
eval/runtime,▄▇▄█▁
eval/samples_per_second,▅▂▅▁█
eval/steps_per_second,▅▂▅▁█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55042
eval/IoU,0.37971
eval/kappa_score,0.54773
eval/loss,0.49526
eval/mean,0.46372
eval/runtime,43.2492
eval/samples_per_second,4.856
eval/steps_per_second,0.624
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: qep54nrk with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.7156544079945724
[34m[1mwandb[0m: 	betta_2: 0.5087090415133685
[34m[1mwandb[0m: 	class_weight_0: 2.596798334160445
[34m[1mwandb[0m: 	class_weight_1: 44.08056282175286
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.5495432369352488
[34m[1mwandb[0m: 	focal_gamma: 1.7676476162769266
[34m[1mwandb[0m: 	learning_rate: 0.0008627143419803124
[34m[1mwandb[0m: 	num_warmup_steps: 121
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.9660606235834795
[34m[1mwandb[0m: 	tversky_alpha: 0.40978283876508215
[34m[1mwandb[0m: 	tversky_betta: 0.9559149710978904
[34m[1mwandb[0m: 	weight_decay: 0.7328334525223315


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8566,0.937451,0.054012,0.102489,0.091669,0.072841
2,0.5081,0.547251,0.33563,0.502579,0.499098,0.417364
3,0.4348,0.521077,0.353043,0.52185,0.51867,0.435857
4,0.4037,0.499952,0.373821,0.544206,0.541319,0.45757
5,0.3782,0.49306,0.380798,0.551562,0.548866,0.464832


Mean: 0.07284097057836925
IoU: 0.054012495899731776
Dice: 0.10248928947208609
kappa: 0.09166944525700671
Mean: 0.41736389045604216
IoU: 0.3356297865632368
Dice: 0.502579067852117
kappa: 0.4990979943488475
Mean: 0.435856771008456
IoU: 0.35304304202008563
Dice: 0.5218504231661313
kappa: 0.5186704999968264
Mean: 0.4575697983430531
IoU: 0.3738208855842859
Dice: 0.5442061472595824
kappa: 0.5413187111018203
Mean: 0.46483214660388134
IoU: 0.380798264986534
Dice: 0.5515624905427408
kappa: 0.5488660282212287


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▇███
eval/IoU,▁▇▇██
eval/kappa_score,▁▇███
eval/loss,█▂▁▁▁
eval/mean,▁▇▇██
eval/runtime,█▄▇▇▁
eval/samples_per_second,▁▅▂▂█
eval/steps_per_second,▁▆▂▂█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55156
eval/IoU,0.3808
eval/kappa_score,0.54887
eval/loss,0.49306
eval/mean,0.46483
eval/runtime,43.2533
eval/samples_per_second,4.855
eval/steps_per_second,0.624
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: rl48w6ec with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.7870851965711989
[34m[1mwandb[0m: 	betta_2: 0.5259639604489406
[34m[1mwandb[0m: 	class_weight_0: 0.9203238722736022
[34m[1mwandb[0m: 	class_weight_1: 44.39670768750117
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.7241366342581582
[34m[1mwandb[0m: 	focal_gamma: 1.9808508371590845
[34m[1mwandb[0m: 	learning_rate: 0.0009021733403612044
[34m[1mwandb[0m: 	num_warmup_steps: 175
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.31773049198561826
[34m[1mwandb[0m: 	tversky_alpha: 0.5249575442313725
[34m[1mwandb[0m: 	tversky_betta: 0.82441676179646
[34m[1mwandb[0m: 	weight_decay: 0.02750458535028044


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8917,0.881615,0.094683,0.172987,0.163754,0.129219
2,0.524,0.551037,0.323367,0.488704,0.485366,0.404367
3,0.4364,0.512375,0.359336,0.528693,0.525595,0.442465
4,0.3987,0.501562,0.370648,0.540836,0.538183,0.454416
5,0.3726,0.487062,0.385898,0.556893,0.554224,0.470061


Mean: 0.12921870336565897
IoU: 0.09468312948559772
Dice: 0.17298728177183154
kappa: 0.16375427724572023
Mean: 0.40436681806120967
IoU: 0.323367450547333
Dice: 0.48870395053708043
kappa: 0.4853661855750864
Mean: 0.4424652766966427
IoU: 0.3593355506841031
Dice: 0.5286929345786077
kappa: 0.5255950027091822
Mean: 0.4544155246175177
IoU: 0.3706476573343316
Dice: 0.5408357944523482
kappa: 0.5381833919007039
Mean: 0.47006092227579926
IoU: 0.385898198326479
Dice: 0.5568925607847167
kappa: 0.5542236462251195


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▇▇██
eval/IoU,▁▆▇██
eval/kappa_score,▁▇▇██
eval/loss,█▂▁▁▁
eval/mean,▁▇▇██
eval/runtime,▄▆▁▂█
eval/samples_per_second,▅▃█▇▁
eval/steps_per_second,▅▃█▇▁
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55689
eval/IoU,0.3859
eval/kappa_score,0.55422
eval/loss,0.48706
eval/mean,0.47006
eval/runtime,43.5998
eval/samples_per_second,4.817
eval/steps_per_second,0.619
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1rgbiky1 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.564393105962784
[34m[1mwandb[0m: 	betta_2: 0.9046003060234356
[34m[1mwandb[0m: 	class_weight_0: 0.8385884938238142
[34m[1mwandb[0m: 	class_weight_1: 36.88330299993871
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.5780467060491598
[34m[1mwandb[0m: 	focal_gamma: 2.35219192263352
[34m[1mwandb[0m: 	learning_rate: 0.000792220535475399
[34m[1mwandb[0m: 	num_warmup_steps: 200
[34m[1mwandb[0m: 	objective: LovaszLoss
[34m[1mwandb[0m: 	smooth: 0.50722247021763
[34m[1mwandb[0m: 	tversky_alpha: 0.5680831404942668
[34m[1mwandb[0m: 	tversky_betta: 0.95888601245971
[34m[1mwandb[0m: 	weight_decay: 0.003505804839181781


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,1.4928,1.206339,0.00696,0.013824,0.001142,0.004051
2,1.0953,1.121559,0.008805,0.017456,0.004872,0.006838
3,1.0471,1.096797,0.027472,0.053474,0.041828,0.03465
4,1.0248,1.025383,0.027472,0.053474,0.041828,0.03465
5,1.0136,1.012977,0.027472,0.053474,0.041828,0.03465


Mean: 0.004051315356571621
IoU: 0.00696018253863354
Dice: 0.013824146494226452
kappa: 0.0011424481745097026
Mean: 0.006838364728951326
IoU: 0.008804747425933727
Dice: 0.017455800933530344
kappa: 0.004871982031968924
Mean: 0.0346497145091163
IoU: 0.027471705015212452
Dice: 0.05347437770036833
kappa: 0.04182772400302015
Mean: 0.01622922961579272
IoU: 0.014809813199832905
Dice: 0.029187366947380135
kappa: 0.017648646031752535
Mean: -0.004860619374909323
IoU: 0.0010096754994000357
Dice: 0.0020173141661119554
kappa: -0.010730914249218682


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▂███
eval/IoU,▁▂███
eval/kappa_score,▁▂███
eval/loss,█▅▄▁▁
eval/mean,▁▂███
eval/runtime,█▁▂█▁
eval/samples_per_second,▁▇▇▁█
eval/steps_per_second,▁▇▇▁█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.05347
eval/IoU,0.02747
eval/kappa_score,0.04183
eval/loss,1.01298
eval/mean,0.03465
eval/runtime,24.1505
eval/samples_per_second,8.695
eval/steps_per_second,1.118
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: bajntrme with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.6238282672873537
[34m[1mwandb[0m: 	betta_2: 0.5950881359793996
[34m[1mwandb[0m: 	class_weight_0: 2.1847646826994542
[34m[1mwandb[0m: 	class_weight_1: 31.649185671267453
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.837322821748901
[34m[1mwandb[0m: 	focal_gamma: 2.0827235506729287
[34m[1mwandb[0m: 	learning_rate: 0.000734071346430252
[34m[1mwandb[0m: 	num_warmup_steps: 180
[34m[1mwandb[0m: 	objective: JaccardLoss
[34m[1mwandb[0m: 	smooth: 0.8521515782576368
[34m[1mwandb[0m: 	tversky_alpha: 0.7342423176322761
[34m[1mwandb[0m: 	tversky_betta: 0.5982120005143856
[34m[1mwandb[0m: 	weight_decay: 1.048422077344724


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9517,0.855709,0.191386,0.321282,0.316322,0.253854
2,0.706,0.713307,0.309992,0.473273,0.470152,0.390072
3,0.6181,0.672727,0.353535,0.522387,0.519298,0.436416
4,0.5845,0.656528,0.36989,0.540029,0.537224,0.453557
5,0.5558,0.652407,0.375297,0.545769,0.543013,0.459155


Mean: 0.25385373483396884
IoU: 0.1913855912101814
Dice: 0.3212823667202092
kappa: 0.3163218784577563
Mean: 0.390072335801796
IoU: 0.30999219062111666
Dice: 0.4732733413840241
kappa: 0.4701524809824753
Mean: 0.436416383812491
IoU: 0.3535345612598618
Dice: 0.5223871947987704
kappa: 0.5192982063651201
Mean: 0.4535574080613851
IoU: 0.3698903596326894
Dice: 0.5400291447147181
kappa: 0.5372244564900808
Mean: 0.4591549526464832
IoU: 0.3752972749793559
Dice: 0.5457689501856816
kappa: 0.5430126303136105


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▇██
eval/IoU,▁▆▇██
eval/kappa_score,▁▆▇██
eval/loss,█▃▂▁▁
eval/mean,▁▆▇██
eval/runtime,█▅▅▅▁
eval/samples_per_second,▁▄▄▄█
eval/steps_per_second,▁▄▄▄█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.54577
eval/IoU,0.3753
eval/kappa_score,0.54301
eval/loss,0.65241
eval/mean,0.45915
eval/runtime,43.0117
eval/samples_per_second,4.882
eval/steps_per_second,0.628
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: tew917wj with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.7882032358226143
[34m[1mwandb[0m: 	betta_2: 0.528208729315524
[34m[1mwandb[0m: 	class_weight_0: 1.2677743108060675
[34m[1mwandb[0m: 	class_weight_1: 13.337209054519496
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.42639214487814014
[34m[1mwandb[0m: 	focal_gamma: 1.5177015350712786
[34m[1mwandb[0m: 	learning_rate: 0.0007977387998406228
[34m[1mwandb[0m: 	num_warmup_steps: 198
[34m[1mwandb[0m: 	objective: JaccardLoss
[34m[1mwandb[0m: 	smooth: 0.5250137553355211
[34m[1mwandb[0m: 	tversky_alpha: 0.7458511325881648
[34m[1mwandb[0m: 	tversky_betta: 0.732013168103462
[34m[1mwandb[0m: 	weight_decay: 0.4878578795787975


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9649,0.908734,0.146514,0.255582,0.248463,0.197489
2,0.6941,0.732184,0.288853,0.448233,0.444399,0.366626
3,0.6072,0.681078,0.344254,0.512185,0.508609,0.426431
4,0.5706,0.654597,0.366914,0.53685,0.53425,0.450582
5,0.5402,0.641308,0.384074,0.554991,0.552342,0.468208


Mean: 0.19748858700496746
IoU: 0.14651444985249742
Dice: 0.2555823868968192
kappa: 0.24846272415743753
Mean: 0.3666262613273904
IoU: 0.28885310853530033
Dice: 0.4482327840502531
kappa: 0.4443994141194805
Mean: 0.4264310835467911
IoU: 0.34425358451830257
Dice: 0.5121854811964839
kappa: 0.5086085825752796
Mean: 0.45058210153329603
IoU: 0.3669140157425177
Dice: 0.5368501771389144
kappa: 0.5342501873240744
Mean: 0.4682080040604297
IoU: 0.3840742141367705
Dice: 0.5549907804276416
kappa: 0.552341793984089


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▇██
eval/IoU,▁▅▇▇█
eval/kappa_score,▁▆▇██
eval/loss,█▃▂▁▁
eval/mean,▁▅▇██
eval/runtime,▁▅█▃▆
eval/samples_per_second,█▄▁▅▃
eval/steps_per_second,█▅▁▆▃
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55499
eval/IoU,0.38407
eval/kappa_score,0.55234
eval/loss,0.64131
eval/mean,0.46821
eval/runtime,43.3712
eval/samples_per_second,4.842
eval/steps_per_second,0.623
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: a61ulibx with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.7446595624335451
[34m[1mwandb[0m: 	betta_2: 0.5882003386891477
[34m[1mwandb[0m: 	class_weight_0: 1.1242642572482813
[34m[1mwandb[0m: 	class_weight_1: 32.21135371669034
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.38958422682149496
[34m[1mwandb[0m: 	focal_gamma: 1.1348395604742612
[34m[1mwandb[0m: 	learning_rate: 0.000878176310399197
[34m[1mwandb[0m: 	num_warmup_steps: 130
[34m[1mwandb[0m: 	objective: TverskyLoss
[34m[1mwandb[0m: 	smooth: 0.414143355871453
[34m[1mwandb[0m: 	tversky_alpha: 0.9139801486725204
[34m[1mwandb[0m: 	tversky_betta: 0.64724625027283
[34m[1mwandb[0m: 	weight_decay: 1.090952899137828


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9267,0.755635,0.207238,0.343325,0.340171,0.273704
2,0.6375,0.675594,0.280068,0.437583,0.434988,0.357528
3,0.5609,0.63366,0.321834,0.486951,0.484354,0.403094
4,0.5258,0.604225,0.368488,0.538533,0.535676,0.452082
5,0.4971,0.60007,0.368488,0.538533,0.535676,0.452082


Mean: 0.2737042351373376
IoU: 0.20723760513406472
Dice: 0.3433252977752475
kappa: 0.34017086514061046
Mean: 0.35752827716414326
IoU: 0.2800681558073456
Dice: 0.4375831935772282
kappa: 0.43498839852094096
Mean: 0.40309394915972846
IoU: 0.3218338463780259
Dice: 0.4869505305222544
kappa: 0.484354051941431
Mean: 0.45208207195292116
IoU: 0.36848794422387915
Dice: 0.5385329783564334
kappa: 0.5356761996819631
Mean: 0.4466754660709733
IoU: 0.36312903705380223
Dice: 0.5327874723271259
kappa: 0.5302218950881443


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▄▆██
eval/IoU,▁▄▆██
eval/kappa_score,▁▄▆██
eval/loss,█▄▃▁▁
eval/mean,▁▄▆██
eval/runtime,▁▄█▅▅
eval/samples_per_second,█▅▁▄▄
eval/steps_per_second,█▅▁▄▄
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.53853
eval/IoU,0.36849
eval/kappa_score,0.53568
eval/loss,0.60007
eval/mean,0.45208
eval/runtime,43.4139
eval/samples_per_second,4.837
eval/steps_per_second,0.622
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: 39ucpu32 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8670546021898123
[34m[1mwandb[0m: 	betta_2: 0.6145720444121712
[34m[1mwandb[0m: 	class_weight_0: 2.3722451089652297
[34m[1mwandb[0m: 	class_weight_1: 45.093837028138886
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.21832364573276675
[34m[1mwandb[0m: 	focal_gamma: 0.6623312073384524
[34m[1mwandb[0m: 	learning_rate: 0.00065185991557866
[34m[1mwandb[0m: 	num_warmup_steps: 133
[34m[1mwandb[0m: 	objective: JaccardLoss
[34m[1mwandb[0m: 	smooth: 0.06231799555571088
[34m[1mwandb[0m: 	tversky_alpha: 0.6027487887626591
[34m[1mwandb[0m: 	tversky_betta: 0.8285592300138171
[34m[1mwandb[0m: 	weight_decay: 0.7292027165412058


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9475,0.803619,0.230745,0.374969,0.371423,0.301084
2,0.6743,0.681927,0.343008,0.510806,0.507739,0.425374
3,0.6029,0.671055,0.353955,0.522846,0.520065,0.43701
4,0.5734,0.653179,0.372812,0.543137,0.540354,0.456583
5,0.5456,0.647068,0.378819,0.549484,0.546758,0.462789


Mean: 0.301084296617157
IoU: 0.2307454775715693
Dice: 0.3749686377509377
kappa: 0.3714231156627448
Mean: 0.4253736284109299
IoU: 0.3430082511382736
Dice: 0.510805873080162
kappa: 0.5077390056835862
Mean: 0.4370096486581895
IoU: 0.3539545794018955
Dice: 0.5228455736798109
kappa: 0.5200647179144835
Mean: 0.45658304345905987
IoU: 0.37281240373755004
Dice: 0.5431367064029284
kappa: 0.5403536831805696
Mean: 0.46278851911821184
IoU: 0.37881937960369333
Dice: 0.5494836890276018
kappa: 0.5467576586327303


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▇██
eval/IoU,▁▆▇██
eval/kappa_score,▁▆▇██
eval/loss,█▃▂▁▁
eval/mean,▁▆▇██
eval/runtime,█▄▂▁▄
eval/samples_per_second,▁▅▇█▅
eval/steps_per_second,▁▅▇█▅
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.54948
eval/IoU,0.37882
eval/kappa_score,0.54676
eval/loss,0.64707
eval/mean,0.46279
eval/runtime,43.4238
eval/samples_per_second,4.836
eval/steps_per_second,0.622
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: 26ioph5d with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8623727741326974
[34m[1mwandb[0m: 	betta_2: 0.6047306916159851
[34m[1mwandb[0m: 	class_weight_0: 3.444037204136786
[34m[1mwandb[0m: 	class_weight_1: 42.27771740533352
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.47528455916394774
[34m[1mwandb[0m: 	focal_gamma: 2.5899623640680405
[34m[1mwandb[0m: 	learning_rate: 0.0007820810152155838
[34m[1mwandb[0m: 	num_warmup_steps: 177
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.5503456671829645
[34m[1mwandb[0m: 	tversky_alpha: 0.7388994966466216
[34m[1mwandb[0m: 	tversky_betta: 0.6735276405692971
[34m[1mwandb[0m: 	weight_decay: 0.5409193138422501


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9264,0.758243,0.171317,0.292521,0.287011,0.229164
2,0.5288,0.536071,0.336913,0.504016,0.50087,0.418892
3,0.4329,0.524962,0.342787,0.510561,0.507855,0.425321
4,0.4021,0.501964,0.371845,0.542109,0.539218,0.455531
5,0.3753,0.496575,0.375471,0.545953,0.543309,0.45939


Mean: 0.22916393735486795
IoU: 0.1713173528935531
Dice: 0.292520814227401
kappa: 0.2870105218161828
Mean: 0.41889153833473847
IoU: 0.3369126327307473
Dice: 0.5040159311571127
kappa: 0.5008704439387297
Mean: 0.4253211221285045
IoU: 0.3427871712000139
Dice: 0.5105606883236364
kappa: 0.5078550730569951
Mean: 0.45553121913657485
IoU: 0.37184465248497267
Dice: 0.5421089797761134
kappa: 0.539217785788177
Mean: 0.4593899765902969
IoU: 0.37547127444585826
Dice: 0.545952912898346
kappa: 0.5433086787347355


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▇▇██
eval/IoU,▁▇▇██
eval/kappa_score,▁▇▇██
eval/loss,█▂▂▁▁
eval/mean,▁▇▇██
eval/runtime,▁▃█▆▇
eval/samples_per_second,█▆▁▃▂
eval/steps_per_second,█▆▁▃▂
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.54595
eval/IoU,0.37547
eval/kappa_score,0.54331
eval/loss,0.49658
eval/mean,0.45939
eval/runtime,43.5142
eval/samples_per_second,4.826
eval/steps_per_second,0.62
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: rj8mhfk6 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.956324926291342
[34m[1mwandb[0m: 	betta_2: 0.6571297007626127
[34m[1mwandb[0m: 	class_weight_0: 0.5904514002618138
[34m[1mwandb[0m: 	class_weight_1: 47.19524347391132
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.5413691212390902
[34m[1mwandb[0m: 	focal_gamma: 1.0873307679940347
[34m[1mwandb[0m: 	learning_rate: 0.0003100282966736601
[34m[1mwandb[0m: 	num_warmup_steps: 150
[34m[1mwandb[0m: 	objective: JaccardLoss
[34m[1mwandb[0m: 	smooth: 0.6137696598654505
[34m[1mwandb[0m: 	tversky_alpha: 0.9217027110329756
[34m[1mwandb[0m: 	tversky_betta: 0.8637711333720981
[34m[1mwandb[0m: 	weight_decay: 0.6407948043044165


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9854,0.985403,0.014637,0.028851,0.017024,0.01583
2,0.8445,0.761342,0.276391,0.433082,0.428772,0.352582
3,0.6556,0.701425,0.32534,0.490953,0.487536,0.406438
4,0.604,0.683025,0.339104,0.506464,0.5038,0.421452
5,0.5776,0.667478,0.358429,0.527711,0.524931,0.44168


Mean: 0.01583009045108579
IoU: 0.01463664031776155
Dice: 0.028850998941212454
kappa: 0.017023540584410024
Mean: 0.35258153462003705
IoU: 0.27639060715645225
Dice: 0.43308154354441114
kappa: 0.42877246208362185
Mean: 0.4064381208835347
IoU: 0.32533976855548125
Dice: 0.4909530012972849
kappa: 0.4875364732115881
Mean: 0.42145209957191665
IoU: 0.3391037302624607
Dice: 0.5064637228603602
kappa: 0.5038004688813726
Mean: 0.44167991810798024
IoU: 0.3584289114457188
Dice: 0.5277109584840298
kappa: 0.5249309247702416


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▇▇██
eval/IoU,▁▆▇██
eval/kappa_score,▁▇▇██
eval/loss,█▃▂▁▁
eval/mean,▁▇▇██
eval/runtime,▇▁▄▂█
eval/samples_per_second,▂█▅▇▁
eval/steps_per_second,▂█▅█▁
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.52771
eval/IoU,0.35843
eval/kappa_score,0.52493
eval/loss,0.66748
eval/mean,0.44168
eval/runtime,43.5537
eval/samples_per_second,4.822
eval/steps_per_second,0.62
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: bzxvhox9 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.6874134416124567
[34m[1mwandb[0m: 	betta_2: 0.5547855048526433
[34m[1mwandb[0m: 	class_weight_0: 1.3482192663140034
[34m[1mwandb[0m: 	class_weight_1: 35.444927653036395
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.4966372565980739
[34m[1mwandb[0m: 	focal_gamma: 0.550921013655129
[34m[1mwandb[0m: 	learning_rate: 0.0007409319059868123
[34m[1mwandb[0m: 	num_warmup_steps: 96
[34m[1mwandb[0m: 	objective: CrossEntropyLoss
[34m[1mwandb[0m: 	smooth: 0.38684840659755254
[34m[1mwandb[0m: 	tversky_alpha: 0.4154521189935724
[34m[1mwandb[0m: 	tversky_betta: 0.695663926699952
[34m[1mwandb[0m: 	weight_decay: 0.7873043986882813


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.6506,0.590532,0.006336,0.012591,-0.000108,0.003114
2,0.59,0.548416,0.006526,0.012968,0.00027,0.003398
3,0.5596,0.513818,0.006526,0.012968,0.00027,0.003398
4,0.5436,0.502741,0.006621,0.013155,0.000468,0.003545
5,0.5375,0.495273,0.006861,0.013628,0.000964,0.003912


Mean: 0.0031136587317477293
IoU: 0.006335502293963275
Dice: 0.01259123280361542
kappa: -0.0001081848304678168
Mean: 0.0033981018790300863
IoU: 0.006526132324654323
Dice: 0.012967636139921548
kappa: 0.0002700714334058496
Mean: 0.0033551700746331852
IoU: 0.006497789966972768
Dice: 0.01291168253272764
kappa: 0.00021255018229360267
Mean: 0.0035445040531991343
IoU: 0.006621204808651064
Dice: 0.013155305644310743
kappa: 0.00046780329774720464
Mean: 0.003912481824661184
IoU: 0.006860939622569927
Dice: 0.01362837578174759
kappa: 0.0009640240267524414


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▄▄▅█
eval/IoU,▁▄▄▅█
eval/kappa_score,▁▃▃▅█
eval/loss,█▅▂▂▁
eval/mean,▁▃▃▅█
eval/runtime,▁▁▁██
eval/samples_per_second,███▁▁
eval/steps_per_second,███▁▁
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.01363
eval/IoU,0.00686
eval/kappa_score,0.00096
eval/loss,0.49527
eval/mean,0.00391
eval/runtime,44.0923
eval/samples_per_second,4.763
eval/steps_per_second,0.612
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: gbkprws9 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.5990633105257885
[34m[1mwandb[0m: 	betta_2: 0.7258278087455539
[34m[1mwandb[0m: 	class_weight_0: 2.2921386960295353
[34m[1mwandb[0m: 	class_weight_1: 41.50334048905151
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.6399798669499273
[34m[1mwandb[0m: 	focal_gamma: 2.573816155558646
[34m[1mwandb[0m: 	learning_rate: 0.0009741386267502252
[34m[1mwandb[0m: 	num_warmup_steps: 139
[34m[1mwandb[0m: 	objective: FocalLoss
[34m[1mwandb[0m: 	smooth: 1.0294312947951731
[34m[1mwandb[0m: 	tversky_alpha: 0.742478931375277
[34m[1mwandb[0m: 	tversky_betta: 0.5715762755792649
[34m[1mwandb[0m: 	weight_decay: 0.8999223691279095


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.0196,0.004653,0.064883,0.121859,0.119332,0.092108
2,0.0024,0.002087,0.297965,0.459126,0.456145,0.377055
3,0.0018,0.001975,0.320502,0.485424,0.482279,0.40139
4,0.0015,0.001901,0.361757,0.531309,0.528515,0.445136
5,0.0014,0.002068,0.377426,0.548016,0.545225,0.461325


Mean: 0.09210750096778123
IoU: 0.06488285366271525
Dice: 0.12185913866402785
kappa: 0.11933214827284722
Mean: 0.3770547628119562
IoU: 0.2979649448049212
Dice: 0.4591263361888469
kappa: 0.45614458081899123
Mean: 0.4013902344258752
IoU: 0.3205018135704693
Dice: 0.48542426867839444
kappa: 0.482278655281281
Mean: 0.445135878854906
IoU: 0.36175676797874107
Dice: 0.5313089334091543
kappa: 0.528514989731071
Mean: 0.4613252887985735
IoU: 0.37742587313118486
Dice: 0.5480162388313714
kappa: 0.545224704465962


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▇▇██
eval/IoU,▁▆▇██
eval/kappa_score,▁▇▇██
eval/loss,█▁▁▁▁
eval/mean,▁▆▇██
eval/runtime,▅▅█▇▁
eval/samples_per_second,▄▄▁▂█
eval/steps_per_second,▄▄▁▂█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.54802
eval/IoU,0.37743
eval/kappa_score,0.54522
eval/loss,0.00207
eval/mean,0.46133
eval/runtime,43.2188
eval/samples_per_second,4.859
eval/steps_per_second,0.625
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: abndnoio with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.816961729985433
[34m[1mwandb[0m: 	betta_2: 0.7148589352765135
[34m[1mwandb[0m: 	class_weight_0: 1.6063677536340042
[34m[1mwandb[0m: 	class_weight_1: 43.574798491274166
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.6499670240028008
[34m[1mwandb[0m: 	focal_gamma: 3.8203615276006047
[34m[1mwandb[0m: 	learning_rate: 0.0009171434901424866
[34m[1mwandb[0m: 	num_warmup_steps: 120
[34m[1mwandb[0m: 	objective: TverskyLoss
[34m[1mwandb[0m: 	smooth: 0.4142647024275159
[34m[1mwandb[0m: 	tversky_alpha: 0.4629530747109762
[34m[1mwandb[0m: 	tversky_betta: 0.8618804534540281
[34m[1mwandb[0m: 	weight_decay: 0.965293557191436


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8674,0.692429,0.23809,0.384609,0.379942,0.309016
2,0.5735,0.61868,0.299163,0.460547,0.456604,0.377884
3,0.4923,0.569683,0.348573,0.516951,0.51341,0.430991
4,0.4598,0.546656,0.367094,0.537043,0.533641,0.450368
5,0.4327,0.536266,0.380724,0.551485,0.548313,0.464519


Mean: 0.3090163265881169
IoU: 0.23809020717402474
Dice: 0.38460882057612306
kappa: 0.37994244600220906
Mean: 0.3778835927569236
IoU: 0.2991630095895506
Dice: 0.4605473021958442
kappa: 0.4566041759242967
Mean: 0.4309914443586337
IoU: 0.3485733713591592
Dice: 0.5169512890616396
kappa: 0.5134095173581081
Mean: 0.45036751979596124
IoU: 0.3670939928526859
Dice: 0.5370427999419098
kappa: 0.5336410467392366
Mean: 0.4645186297288701
IoU: 0.3807244467627998
Dice: 0.5514850521484335
kappa: 0.5483128126949404


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▄▇▇█
eval/IoU,▁▄▆▇█
eval/kappa_score,▁▄▇▇█
eval/loss,█▅▂▁▁
eval/mean,▁▄▆▇█
eval/runtime,▂▁▆█▇
eval/samples_per_second,▇█▃▁▂
eval/steps_per_second,██▄▁▂
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55149
eval/IoU,0.38072
eval/kappa_score,0.54831
eval/loss,0.53627
eval/mean,0.46452
eval/runtime,43.5523
eval/samples_per_second,4.822
eval/steps_per_second,0.62
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: roxacseu with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.7473022457423206
[34m[1mwandb[0m: 	betta_2: 0.6663424604899317
[34m[1mwandb[0m: 	class_weight_0: 1.439485631801926
[34m[1mwandb[0m: 	class_weight_1: 45.92980593755469
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.7994194148942064
[34m[1mwandb[0m: 	focal_gamma: 2.566275395775653
[34m[1mwandb[0m: 	learning_rate: 0.0008168023387397251
[34m[1mwandb[0m: 	num_warmup_steps: 127
[34m[1mwandb[0m: 	objective: LovaszLoss
[34m[1mwandb[0m: 	smooth: 0.7365133374178976
[34m[1mwandb[0m: 	tversky_alpha: 0.6945701554439949
[34m[1mwandb[0m: 	tversky_betta: 0.46655044974013427
[34m[1mwandb[0m: 	weight_decay: 1.074939417815559


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,1.4984,1.067774,0.0,0.0,0.0,0.0
2,1.0565,1.0546,0.007074,0.014048,0.001371,0.004222
3,1.0116,1.034828,0.007074,0.014048,0.001371,0.004222
4,1.0045,1.011963,0.007074,0.014048,0.001371,0.004222
5,1.001,1.000584,0.00714,0.014179,0.001507,0.004324


Mean: -7.991712560606601e-07
IoU: 0.0
Dice: 0.0
kappa: -1.5983425121213202e-06
Mean: 0.004222193299486856
IoU: 0.0070737095749818625
Dice: 0.014048047342964004
kappa: 0.0013706770239918509
Mean: 0.0
IoU: 0.0
Dice: 0.0
kappa: 0.0
Mean: 0.004179424596928668
IoU: 0.007045092982074468
Dice: 0.013991613744350715
kappa: 0.0013137562117828683
Mean: 0.004323700811540109
IoU: 0.007140132266777809
Dice: 0.014179024423756124
kappa: 0.0015072693563024098


VBox(children=(Label(value='0.002 MB of 0.016 MB uploaded\r'), FloatProgress(value=0.13952638948073293, max=1.…

0,1
eval/Dice,▁████
eval/IoU,▁████
eval/kappa_score,▁▇▇▇█
eval/loss,█▇▅▂▁
eval/mean,▁████
eval/runtime,█▁▁▁▁
eval/samples_per_second,▁████
eval/steps_per_second,▁████
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.01418
eval/IoU,0.00714
eval/kappa_score,0.00151
eval/loss,1.00058
eval/mean,0.00432
eval/runtime,31.6211
eval/samples_per_second,6.641
eval/steps_per_second,0.854
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o2ck8r43 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.629881140625286
[34m[1mwandb[0m: 	betta_2: 0.762926155081574
[34m[1mwandb[0m: 	class_weight_0: 2.0494472977324336
[34m[1mwandb[0m: 	class_weight_1: 33.161879197158015
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.7925216398615187
[34m[1mwandb[0m: 	focal_gamma: 4.791973552104818
[34m[1mwandb[0m: 	learning_rate: 0.0008963872922448589
[34m[1mwandb[0m: 	num_warmup_steps: 178
[34m[1mwandb[0m: 	objective: TverskyLoss
[34m[1mwandb[0m: 	smooth: 0.3624678210640049
[34m[1mwandb[0m: 	tversky_alpha: 0.904747394067856
[34m[1mwandb[0m: 	tversky_betta: 0.3253667600103189
[34m[1mwandb[0m: 	weight_decay: 0.011802090889183992


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9127,0.741742,0.211651,0.349359,0.345361,0.278506
2,0.5784,0.577256,0.300583,0.462228,0.459521,0.380052
3,0.4776,0.557767,0.30658,0.469286,0.466697,0.386638
4,0.435,0.534754,0.320097,0.48496,0.482597,0.401347
5,0.4048,0.521338,0.331194,0.497589,0.495285,0.41324


Mean: 0.27850607556929163
IoU: 0.2116506600481997
Dice: 0.34935921223330196
kappa: 0.3453614910903835
Mean: 0.3800519181969355
IoU: 0.3005827259524377
Dice: 0.46222776906761714
kappa: 0.4595211104414334
Mean: 0.3866383246524113
IoU: 0.3065797988591116
Dice: 0.4692859925230944
kappa: 0.4666968504457111
Mean: 0.4013469159330302
IoU: 0.3200968441258106
Dice: 0.484959638454077
kappa: 0.4825969877402497
Mean: 0.4132395069208583
IoU: 0.3311935197460441
Dice: 0.4975888401398272
kappa: 0.49528549409567246


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▇▇█
eval/IoU,▁▆▇▇█
eval/kappa_score,▁▆▇▇█
eval/loss,█▃▂▁▁
eval/mean,▁▆▇▇█
eval/runtime,▁▅▇█▂
eval/samples_per_second,█▄▂▁▇
eval/steps_per_second,█▃▂▁▇
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.49759
eval/IoU,0.33119
eval/kappa_score,0.49529
eval/loss,0.52134
eval/mean,0.41324
eval/runtime,43.2889
eval/samples_per_second,4.851
eval/steps_per_second,0.624
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: wku2u07v with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8108548538441731
[34m[1mwandb[0m: 	betta_2: 0.9897034106699476
[34m[1mwandb[0m: 	class_weight_0: 1.54217445360956
[34m[1mwandb[0m: 	class_weight_1: 41.294056483151586
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.6937035558034839
[34m[1mwandb[0m: 	focal_gamma: 1.3624889561642566
[34m[1mwandb[0m: 	learning_rate: 0.0006954018716699005
[34m[1mwandb[0m: 	num_warmup_steps: 8
[34m[1mwandb[0m: 	objective: JaccardLoss
[34m[1mwandb[0m: 	smooth: 1.1288603114135989
[34m[1mwandb[0m: 	tversky_alpha: 0.977324538599067
[34m[1mwandb[0m: 	tversky_betta: 0.27002424934734565
[34m[1mwandb[0m: 	weight_decay: 1.114067881929366


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8553,0.767855,0.243176,0.391217,0.387843,0.31551
2,0.674,0.697065,0.330919,0.497279,0.493894,0.412406
3,0.6152,0.666605,0.358824,0.528139,0.525215,0.442019
4,0.5867,0.658302,0.36736,0.537327,0.53457,0.450965
5,0.5638,0.66076,0.36736,0.537327,0.53457,0.450965


Mean: 0.31550954083298366
IoU: 0.24317566494680626
Dice: 0.39121690007857646
kappa: 0.3878434167191611
Mean: 0.4124063577498641
IoU: 0.33091862816366363
Dice: 0.4972785280197768
kappa: 0.49389408733606466
Mean: 0.44201924597407477
IoU: 0.3588236353701958
Dice: 0.5281386429114304
kappa: 0.5252148565779537
Mean: 0.45096512502729336
IoU: 0.3673600453726563
Dice: 0.5373274531691279
kappa: 0.5345702046819304
Mean: 0.4458016430342562
IoU: 0.36238842219913114
Dice: 0.5319898735107766
kappa: 0.5292148638693812


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆███
eval/IoU,▁▆███
eval/kappa_score,▁▆███
eval/loss,█▃▂▁▁
eval/mean,▁▆███
eval/runtime,█▇▁▅▆
eval/samples_per_second,▁▂█▄▃
eval/steps_per_second,▁▃█▄▃
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.53733
eval/IoU,0.36736
eval/kappa_score,0.53457
eval/loss,0.66076
eval/mean,0.45097
eval/runtime,43.4764
eval/samples_per_second,4.83
eval/steps_per_second,0.621
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: p4ywqdpl with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8939061590277075
[34m[1mwandb[0m: 	betta_2: 0.7105636380092184
[34m[1mwandb[0m: 	class_weight_0: 4.320869219814727
[34m[1mwandb[0m: 	class_weight_1: 25.106457627016255
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.8999374515147968
[34m[1mwandb[0m: 	focal_gamma: 2.988810391434598
[34m[1mwandb[0m: 	learning_rate: 0.0007075174320906909
[34m[1mwandb[0m: 	num_warmup_steps: 172
[34m[1mwandb[0m: 	objective: FocalLoss
[34m[1mwandb[0m: 	smooth: 1.276019578350858
[34m[1mwandb[0m: 	tversky_alpha: 0.16546986777235573
[34m[1mwandb[0m: 	tversky_betta: 0.4403217443006471
[34m[1mwandb[0m: 	weight_decay: 1.3006119819667157


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.0072,0.002218,0.059202,0.111786,0.101831,0.080517
2,0.0016,0.001307,0.239806,0.386845,0.381187,0.310497
3,0.001,0.001339,0.286324,0.445182,0.440329,0.363326
4,0.0009,0.001273,0.343946,0.511844,0.508024,0.425985
5,0.0008,0.001394,0.343946,0.511844,0.508024,0.425985


Mean: 0.08051668517730115
IoU: 0.05920200919680195
Dice: 0.11178605909498816
kappa: 0.10183136115780034
Mean: 0.3104967928637799
IoU: 0.2398061312789995
Dice: 0.3868445642087808
kappa: 0.3811874544485603
Mean: 0.36332634846076983
IoU: 0.2863238966302866
Dice: 0.4451816488527562
kappa: 0.440328800291253
Mean: 0.42598464478697146
IoU: 0.3439455496754269
Dice: 0.5118444713158095
kappa: 0.508023739898516
Mean: 0.41552596846833123
IoU: 0.3341893972723273
Dice: 0.500962453989753
kappa: 0.49686253966433513


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▇██
eval/IoU,▁▅▇██
eval/kappa_score,▁▆▇██
eval/loss,█▁▁▁▂
eval/mean,▁▆▇██
eval/runtime,█▅▁▃▂
eval/samples_per_second,▁▄█▆▇
eval/steps_per_second,▁▃█▆▇
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.51184
eval/IoU,0.34395
eval/kappa_score,0.50802
eval/loss,0.00139
eval/mean,0.42598
eval/runtime,43.2776
eval/samples_per_second,4.852
eval/steps_per_second,0.624
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: i6spxd67 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.930011706003182
[34m[1mwandb[0m: 	betta_2: 0.9253443597132932
[34m[1mwandb[0m: 	class_weight_0: 3.778649112900166
[34m[1mwandb[0m: 	class_weight_1: 11.22606316076324
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.7920924175891148
[34m[1mwandb[0m: 	focal_gamma: 2.285697809424394
[34m[1mwandb[0m: 	learning_rate: 0.0007933997999162081
[34m[1mwandb[0m: 	num_warmup_steps: 42
[34m[1mwandb[0m: 	objective: LovaszLoss
[34m[1mwandb[0m: 	smooth: 0.9072731495147894
[34m[1mwandb[0m: 	tversky_alpha: 0.6209329114130125
[34m[1mwandb[0m: 	tversky_betta: 0.7509479968831958
[34m[1mwandb[0m: 	weight_decay: 1.44438485098195


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,1.4526,1.123028,0.006913,0.013731,0.001048,0.003981
2,1.0278,1.04001,0.006991,0.013884,0.001203,0.004097
3,1.0068,1.035054,0.006991,0.013884,0.001203,0.004097
4,1.0025,1.003117,0.006991,0.013884,0.001203,0.004097
5,1.0008,1.000965,0.006991,0.013884,0.001203,0.004097


Mean: 0.003980515463428636
IoU: 0.006913186466742306
Dice: 0.013731444894471336
kappa: 0.0010478444601149661
Mean: 0.004097036053276019
IoU: 0.0069905948191646946
Dice: 0.013884131301981158
kappa: 0.0012034772873873445
Mean: 0.00397880073809913
IoU: 0.006912045893608585
Dice: 0.013729194961560566
kappa: 0.0010455555825896745
Mean: 0.003975823521811305
IoU: 0.006908784509576658
Dice: 0.013722761417642493
kappa: 0.0010428625340459519
Mean: 0.0035873421315699974
IoU: 0.006652049370884537
Dice: 0.013216184033086286
kappa: 0.000522634892255458


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁████
eval/IoU,▁████
eval/kappa_score,▁████
eval/loss,█▃▃▁▁
eval/mean,▁████
eval/runtime,██▁▁▁
eval/samples_per_second,▁▁███
eval/steps_per_second,▁▁███
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.01388
eval/IoU,0.00699
eval/kappa_score,0.0012
eval/loss,1.00097
eval/mean,0.0041
eval/runtime,23.4941
eval/samples_per_second,8.938
eval/steps_per_second,1.149
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: ss2vwkal with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8728910702743591
[34m[1mwandb[0m: 	betta_2: 0.6156982494201135
[34m[1mwandb[0m: 	class_weight_0: 2.081252802252578
[34m[1mwandb[0m: 	class_weight_1: 8.243519985607286
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.4853521256472897
[34m[1mwandb[0m: 	focal_gamma: 3.6674615451709105
[34m[1mwandb[0m: 	learning_rate: 0.0006478615372493986
[34m[1mwandb[0m: 	num_warmup_steps: 60
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 1.4603172291169433
[34m[1mwandb[0m: 	tversky_alpha: 0.13787991427331822
[34m[1mwandb[0m: 	tversky_betta: 0.12043299452654808
[34m[1mwandb[0m: 	weight_decay: 1.3682517959626408


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.844,0.635733,0.255648,0.407197,0.403202,0.329425
2,0.4993,0.538518,0.341722,0.509379,0.506493,0.424108
3,0.438,0.532092,0.343629,0.511493,0.508646,0.426137
4,0.4091,0.502709,0.374613,0.545045,0.54222,0.458416
5,0.3863,0.496158,0.378457,0.549102,0.546343,0.4624


Mean: 0.3294249736053978
IoU: 0.2556481714406152
Dice: 0.407197138904456
kappa: 0.40320177577018046
Mean: 0.4241078712977101
IoU: 0.34172227912799663
Dice: 0.509378556865116
kappa: 0.5064934634674236
Mean: 0.42613740198368544
IoU: 0.3436285214428573
Dice: 0.5114933420345251
kappa: 0.5086462825245135
Mean: 0.4584164338041822
IoU: 0.3746127277908356
Dice: 0.5450447536490984
kappa: 0.5422201398175288
Mean: 0.46239997959766743
IoU: 0.37845702082221033
Dice: 0.5491023878226853
kappa: 0.5463429383731245


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▆██
eval/IoU,▁▆▆██
eval/kappa_score,▁▆▆██
eval/loss,█▃▃▁▁
eval/mean,▁▆▆██
eval/runtime,▇▇█▇▁
eval/samples_per_second,▁▂▁▂█
eval/steps_per_second,▁▁▁▁█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.5491
eval/IoU,0.37846
eval/kappa_score,0.54634
eval/loss,0.49616
eval/mean,0.4624
eval/runtime,43.2924
eval/samples_per_second,4.851
eval/steps_per_second,0.624
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: o0b7apca with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.6294689182186379
[34m[1mwandb[0m: 	betta_2: 0.6669213372166207
[34m[1mwandb[0m: 	class_weight_0: 3.4896857896544
[34m[1mwandb[0m: 	class_weight_1: 10.335706443686828
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.7097873490532013
[34m[1mwandb[0m: 	focal_gamma: 0.9896388172583808
[34m[1mwandb[0m: 	learning_rate: 0.0004487272434322498
[34m[1mwandb[0m: 	num_warmup_steps: 77
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.3669361074269126
[34m[1mwandb[0m: 	tversky_alpha: 0.6068108585122916
[34m[1mwandb[0m: 	tversky_betta: 0.20622973804509623
[34m[1mwandb[0m: 	weight_decay: 1.1080791556138363


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8836,0.683173,0.240393,0.387607,0.383157,0.311775
2,0.5268,0.559947,0.320438,0.485351,0.482008,0.401223
3,0.4372,0.531385,0.350125,0.518655,0.515344,0.432734
4,0.4039,0.513712,0.365825,0.535684,0.53284,0.449333
5,0.3803,0.508837,0.37036,0.540529,0.537776,0.454068


Mean: 0.3117750012629517
IoU: 0.2403925315180261
Dice: 0.3876071895141568
kappa: 0.38315747100787734
Mean: 0.4012227281953946
IoU: 0.32043793450675706
Dice: 0.48535099777552976
kappa: 0.4820075218840322
Mean: 0.43273449388125523
IoU: 0.3501245314437429
Dice: 0.5186551659339758
kappa: 0.5153444563187676
Mean: 0.44933261067633196
IoU: 0.3658249907983008
Dice: 0.5356835513523333
kappa: 0.5328402305543631
Mean: 0.4540675860828379
IoU: 0.3703596643548605
Dice: 0.5405291384276387
kappa: 0.5377755078108153


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▅▇██
eval/IoU,▁▅▇██
eval/kappa_score,▁▅▇██
eval/loss,█▃▂▁▁
eval/mean,▁▅▇██
eval/runtime,▅█▄▆▁
eval/samples_per_second,▄▁▅▃█
eval/steps_per_second,▄▁▅▂█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.54053
eval/IoU,0.37036
eval/kappa_score,0.53778
eval/loss,0.50884
eval/mean,0.45407
eval/runtime,43.127
eval/samples_per_second,4.869
eval/steps_per_second,0.626
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 49ygzp31 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.5235374681528719
[34m[1mwandb[0m: 	betta_2: 0.6816044531247581
[34m[1mwandb[0m: 	class_weight_0: 1.3924617617792947
[34m[1mwandb[0m: 	class_weight_1: 44.85923342540586
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.6560744248042729
[34m[1mwandb[0m: 	focal_gamma: 3.5488196579789246
[34m[1mwandb[0m: 	learning_rate: 1.3009822060879884e-05
[34m[1mwandb[0m: 	num_warmup_steps: 18
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.13900516493919185
[34m[1mwandb[0m: 	tversky_alpha: 0.40620230806402535
[34m[1mwandb[0m: 	tversky_betta: 0.4588779709239395
[34m[1mwandb[0m: 	weight_decay: 0.11494105313993835


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9746,0.975474,0.01387,0.027361,0.015154,0.014512
2,0.9669,0.974689,0.015049,0.029651,0.017575,0.016312
3,0.9622,0.9733,0.01579,0.03109,0.019007,0.017399
4,0.9601,0.971689,0.016662,0.032778,0.020744,0.018703
5,0.9584,0.970739,0.017552,0.034499,0.022551,0.020052


Mean: 0.014511967576773206
IoU: 0.013870094960461559
Dice: 0.027360694490160417
kappa: 0.015153840193084855
Mean: 0.016312040254785175
IoU: 0.015048744003713829
Dice: 0.02965127358190942
kappa: 0.01757533650585652
Mean: 0.01739853123928845
IoU: 0.015790471250254852
Dice: 0.031090016488970664
kappa: 0.019006591228322045
Mean: 0.01870290617855931
IoU: 0.016662020157374974
Dice: 0.03277789437790892
kappa: 0.02074379219974365
Mean: 0.020051689629064312
IoU: 0.01755236323527826
Dice: 0.034499184257154154
kappa: 0.022551016022850368


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▃▅▆█
eval/IoU,▁▃▅▆█
eval/kappa_score,▁▃▅▆█
eval/loss,█▇▅▂▁
eval/mean,▁▃▅▆█
eval/runtime,▆▆▆█▁
eval/samples_per_second,▃▃▃▁█
eval/steps_per_second,▃▃▃▁█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.0345
eval/IoU,0.01755
eval/kappa_score,0.02255
eval/loss,0.97074
eval/mean,0.02005
eval/runtime,43.016
eval/samples_per_second,4.882
eval/steps_per_second,0.628
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: ci5t37lr with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8858163444838021
[34m[1mwandb[0m: 	betta_2: 0.8143865708746238
[34m[1mwandb[0m: 	class_weight_0: 3.704705940358119
[34m[1mwandb[0m: 	class_weight_1: 39.1904586429003
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.5116269850751485
[34m[1mwandb[0m: 	focal_gamma: 1.7087746949820404
[34m[1mwandb[0m: 	learning_rate: 0.0006902614581064151
[34m[1mwandb[0m: 	num_warmup_steps: 84
[34m[1mwandb[0m: 	objective: FocalLoss
[34m[1mwandb[0m: 	smooth: 0.7840431280668245
[34m[1mwandb[0m: 	tversky_alpha: 0.48790437352155935
[34m[1mwandb[0m: 	tversky_betta: 0.3779699123603384
[34m[1mwandb[0m: 	weight_decay: 0.06775418991454668


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.0358,0.006017,0.0184,0.036135,0.035733,0.027066
2,0.0045,0.003918,0.088291,0.162256,0.161115,0.124703
3,0.0031,0.003635,0.108013,0.194966,0.193689,0.150851
4,0.0027,0.003386,0.22547,0.367973,0.365964,0.295717
5,0.0024,0.003253,0.258108,0.410311,0.408145,0.333126


Mean: 0.027066260849680158
IoU: 0.018399686432611008
Dice: 0.03613450922606611
kappa: 0.03573283526674931
Mean: 0.12470267771364779
IoU: 0.0882908077963638
Dice: 0.16225591021050761
kappa: 0.1611145476309318
Mean: 0.15085096601900141
IoU: 0.10801260124026217
Dice: 0.19496637695159325
kappa: 0.19368933079774064
Mean: 0.2957171532079148
IoU: 0.22547026993048752
Dice: 0.3679734636781958
kappa: 0.36596403648534215
Mean: 0.3331264149246229
IoU: 0.25810773223367417
Dice: 0.4103110180801824
kappa: 0.4081450976155716


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▃▄▇█
eval/IoU,▁▃▄▇█
eval/kappa_score,▁▃▄▇█
eval/loss,█▃▂▁▁
eval/mean,▁▃▄▇█
eval/runtime,█▇█▁▂
eval/samples_per_second,▁▂▁█▇
eval/steps_per_second,▁▁▁██
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.41031
eval/IoU,0.25811
eval/kappa_score,0.40815
eval/loss,0.00325
eval/mean,0.33313
eval/runtime,43.1548
eval/samples_per_second,4.866
eval/steps_per_second,0.626
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: jbiwnm7o with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8436024805812876
[34m[1mwandb[0m: 	betta_2: 0.5553217025573203
[34m[1mwandb[0m: 	class_weight_0: 4.474480545533571
[34m[1mwandb[0m: 	class_weight_1: 46.89024881738088
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.5753386097047765
[34m[1mwandb[0m: 	focal_gamma: 0.06605300534842151
[34m[1mwandb[0m: 	learning_rate: 0.0007525439190135291
[34m[1mwandb[0m: 	num_warmup_steps: 91
[34m[1mwandb[0m: 	objective: TverskyLoss
[34m[1mwandb[0m: 	smooth: 1.4591731361246745
[34m[1mwandb[0m: 	tversky_alpha: 0.42779169584818544
[34m[1mwandb[0m: 	tversky_betta: 0.7395300034977248
[34m[1mwandb[0m: 	weight_decay: 1.1905659956007923


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8586,0.641215,0.25675,0.408593,0.404082,0.330416
2,0.5138,0.556049,0.339687,0.507114,0.503615,0.421651
3,0.4537,0.553109,0.35397,0.522862,0.519765,0.436867
4,0.4248,0.520674,0.38162,0.552424,0.549433,0.465526
5,0.3995,0.514362,0.38432,0.555247,0.552211,0.468265


Mean: 0.33041588535952365
IoU: 0.25674984815845703
Dice: 0.4085934023141967
kappa: 0.4040819225605903
Mean: 0.421650730121806
IoU: 0.339686801353357
Dice: 0.5071137537672298
kappa: 0.503614658890255
Mean: 0.4368672439355672
IoU: 0.35396978218388064
Dice: 0.522862159616216
kappa: 0.5197647056872537
Mean: 0.4655262019448055
IoU: 0.3816198730192283
Dice: 0.552423833026202
kappa: 0.5494325308703827
Mean: 0.4682652001373333
IoU: 0.3843198250287247
Dice: 0.5552471590454179
kappa: 0.5522105752459419


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▆██
eval/IoU,▁▆▆██
eval/kappa_score,▁▆▆██
eval/loss,█▃▃▁▁
eval/mean,▁▆▆██
eval/runtime,▁█▅▆█
eval/samples_per_second,█▁▄▃▁
eval/steps_per_second,█▁▄▂▁
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55525
eval/IoU,0.38432
eval/kappa_score,0.55221
eval/loss,0.51436
eval/mean,0.46827
eval/runtime,43.4197
eval/samples_per_second,4.837
eval/steps_per_second,0.622
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: uamqxziz with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.8958579135562101
[34m[1mwandb[0m: 	betta_2: 0.751756279379073
[34m[1mwandb[0m: 	class_weight_0: 2.353097189988615
[34m[1mwandb[0m: 	class_weight_1: 24.657624590963955
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.16707501609128267
[34m[1mwandb[0m: 	focal_gamma: 0.6166165653737221
[34m[1mwandb[0m: 	learning_rate: 0.00024950737264278414
[34m[1mwandb[0m: 	num_warmup_steps: 114
[34m[1mwandb[0m: 	objective: JaccardLoss
[34m[1mwandb[0m: 	smooth: 0.5800964743496481
[34m[1mwandb[0m: 	tversky_alpha: 0.969015178863768
[34m[1mwandb[0m: 	tversky_betta: 0.9590654348978008
[34m[1mwandb[0m: 	weight_decay: 1.3558788774708412


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.984,0.975993,0.022769,0.044523,0.034419,0.028594
2,0.8162,0.736538,0.292314,0.452388,0.449125,0.37072
3,0.6498,0.708782,0.317924,0.482462,0.479541,0.398733
4,0.6062,0.685744,0.341467,0.509095,0.506256,0.423862
5,0.5801,0.673086,0.354846,0.523818,0.520998,0.437922


Mean: 0.0285939941427498
IoU: 0.022768553556666578
Dice: 0.044523374281481724
kappa: 0.03441943472883302
Mean: 0.37071952764702687
IoU: 0.2923139683146541
Dice: 0.4523884682541498
kappa: 0.4491250869793997
Mean: 0.39873259273979145
IoU: 0.3179239216774047
Dice: 0.48246172096605233
kappa: 0.4795412638021782
Mean: 0.4238618201678263
IoU: 0.3414673029524359
Dice: 0.5090952305746109
kappa: 0.5062563373832167
Mean: 0.43792225180368416
IoU: 0.35484634895706746
Dice: 0.5238178472861086
kappa: 0.5209981546503009


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▇▇██
eval/IoU,▁▇▇██
eval/kappa_score,▁▇▇██
eval/loss,█▂▂▁▁
eval/mean,▁▇▇██
eval/runtime,▁▆▅▄█
eval/samples_per_second,█▃▄▅▁
eval/steps_per_second,█▃▅▅▁
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.52382
eval/IoU,0.35485
eval/kappa_score,0.521
eval/loss,0.67309
eval/mean,0.43792
eval/runtime,43.4071
eval/samples_per_second,4.838
eval/steps_per_second,0.622
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: fnz36lva with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.9032595424378208
[34m[1mwandb[0m: 	betta_2: 0.9901394918116092
[34m[1mwandb[0m: 	class_weight_0: 1.6141350563049686
[34m[1mwandb[0m: 	class_weight_1: 37.066111578989606
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.606030401065036
[34m[1mwandb[0m: 	focal_gamma: 2.164085160387294
[34m[1mwandb[0m: 	learning_rate: 5.6748842077669834e-05
[34m[1mwandb[0m: 	num_warmup_steps: 119
[34m[1mwandb[0m: 	objective: TverskyLoss
[34m[1mwandb[0m: 	smooth: 1.0066962421646646
[34m[1mwandb[0m: 	tversky_alpha: 0.6829604005319596
[34m[1mwandb[0m: 	tversky_betta: 0.41224245121039416
[34m[1mwandb[0m: 	weight_decay: 1.2993331494138105


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9814,0.981765,0.013993,0.0276,0.015539,0.014766
2,0.9721,0.975532,0.017245,0.033906,0.022135,0.01969
3,0.9544,0.942734,0.101907,0.184965,0.177928,0.139918
4,0.9002,0.894744,0.136861,0.240769,0.2345,0.18568
5,0.8652,0.883812,0.158446,0.273549,0.268041,0.213243


Mean: 0.014766066417364037
IoU: 0.013993134436634158
Dice: 0.027600057557408655
kappa: 0.015538998398093917
Mean: 0.01968998029658605
IoU: 0.017245365034801605
Dice: 0.03390600857485669
kappa: 0.022134595558370496
Mean: 0.13991780573401594
IoU: 0.10190738795971051
Dice: 0.18496543189242431
kappa: 0.17792822350832138
Mean: 0.18568044608980025
IoU: 0.1368605383009311
Dice: 0.24076926534097642
kappa: 0.2345003538786694
Mean: 0.21324305060550458
IoU: 0.15844558266272654
Dice: 0.27354859828380373
kappa: 0.2680405185482826


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▁▅▇█
eval/IoU,▁▁▅▇█
eval/kappa_score,▁▁▆▇█
eval/loss,██▅▂▁
eval/mean,▁▁▅▇█
eval/runtime,▆█▇▇▁
eval/samples_per_second,▃▁▂▂█
eval/steps_per_second,▃▁▃▁█
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.27355
eval/IoU,0.15845
eval/kappa_score,0.26804
eval/loss,0.88381
eval/mean,0.21324
eval/runtime,43.1729
eval/samples_per_second,4.864
eval/steps_per_second,0.625
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 32yyotyq with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.6119388435971098
[34m[1mwandb[0m: 	betta_2: 0.7861969950282433
[34m[1mwandb[0m: 	class_weight_0: 4.481979348490236
[34m[1mwandb[0m: 	class_weight_1: 17.852198200251536
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.19665178437679592
[34m[1mwandb[0m: 	focal_gamma: 3.065457350702975
[34m[1mwandb[0m: 	learning_rate: 0.0005944355051384574
[34m[1mwandb[0m: 	num_warmup_steps: 170
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.1936205812167024
[34m[1mwandb[0m: 	tversky_alpha: 0.14805244528643058
[34m[1mwandb[0m: 	tversky_betta: 0.6259813217342477
[34m[1mwandb[0m: 	weight_decay: 1.410013315021715


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.9438,0.864984,0.125656,0.223258,0.217391,0.171523
2,0.5625,0.587163,0.290708,0.450463,0.447389,0.369049
3,0.4459,0.557383,0.322052,0.487201,0.4831,0.402576
4,0.4098,0.511917,0.363575,0.533267,0.530408,0.446992
5,0.3827,0.50062,0.376345,0.546876,0.544159,0.460252


Mean: 0.17152313315912385
IoU: 0.12565571091962918
Dice: 0.2232578037861541
kappa: 0.2173905553986185
Mean: 0.3690489458988028
IoU: 0.2907084917485937
Dice: 0.4504634371077158
kappa: 0.44738940004901195
Mean: 0.4025763944124572
IoU: 0.3220523628295492
Dice: 0.48720061607888226
kappa: 0.4831004259953652
Mean: 0.4469915079690947
IoU: 0.36357454030198577
Dice: 0.5332668358877781
kappa: 0.5304084756362036
Mean: 0.46025204711608614
IoU: 0.3763446249459644
Dice: 0.5468755689887477
kappa: 0.544159469286208


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▇██
eval/IoU,▁▆▆██
eval/kappa_score,▁▆▇██
eval/loss,█▃▂▁▁
eval/mean,▁▆▇██
eval/runtime,▄▂█▁▂
eval/samples_per_second,▅▇▁█▇
eval/steps_per_second,▁█▁██
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.54688
eval/IoU,0.37634
eval/kappa_score,0.54416
eval/loss,0.50062
eval/mean,0.46025
eval/runtime,43.3573
eval/samples_per_second,4.843
eval/steps_per_second,0.623
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Agent Starting Run: y335qn79 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.6974465151227456
[34m[1mwandb[0m: 	betta_2: 0.9393380182313148
[34m[1mwandb[0m: 	class_weight_0: 4.332417667883528
[34m[1mwandb[0m: 	class_weight_1: 27.302347312822032
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.22332802532920143
[34m[1mwandb[0m: 	focal_gamma: 3.4100024881360618
[34m[1mwandb[0m: 	learning_rate: 0.0008991692461364375
[34m[1mwandb[0m: 	num_warmup_steps: 0
[34m[1mwandb[0m: 	objective: TverskyLoss
[34m[1mwandb[0m: 	smooth: 0.6808015987930836
[34m[1mwandb[0m: 	tversky_alpha: 0.5629822858711736
[34m[1mwandb[0m: 	tversky_betta: 0.4137723565583292
[34m[1mwandb[0m: 	weight_decay: 0.6208185344538202


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.7196,0.622072,0.239637,0.386624,0.383648,0.311642
2,0.4875,0.562267,0.310431,0.473785,0.470253,0.390342
3,0.4315,0.510454,0.351199,0.519833,0.516934,0.434067
4,0.4036,0.510694,0.351206,0.519841,0.517138,0.434172
5,0.3808,0.498761,0.358165,0.527425,0.524836,0.4415


Mean: 0.3116423316540002
IoU: 0.239636811623431
Dice: 0.38662422634836424
kappa: 0.3836478516845694
Mean: 0.3903422741469913
IoU: 0.31043128673207904
Dice: 0.47378491321925753
kappa: 0.4702532615619036
Mean: 0.4340667245471529
IoU: 0.35119921933093073
Dice: 0.5198333662519921
kappa: 0.516934229763375
Mean: 0.4341719111356265
IoU: 0.35120609869806957
Dice: 0.5198409021931856
kappa: 0.5171377235731833
Mean: 0.4415003729820707
IoU: 0.3581649263069308
Dice: 0.5274247911567543
kappa: 0.5248358196572106


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▅███
eval/IoU,▁▅███
eval/kappa_score,▁▅███
eval/loss,█▅▂▂▁
eval/mean,▁▅███
eval/runtime,█▄▃▁▆
eval/samples_per_second,▁▅▆█▃
eval/steps_per_second,▁▆▆█▃
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.52742
eval/IoU,0.35816
eval/kappa_score,0.52484
eval/loss,0.49876
eval/mean,0.4415
eval/runtime,43.404
eval/samples_per_second,4.838
eval/steps_per_second,0.622
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: adzglcfv with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.723700733304389
[34m[1mwandb[0m: 	betta_2: 0.8623148335077795
[34m[1mwandb[0m: 	class_weight_0: 2.107086535538091
[34m[1mwandb[0m: 	class_weight_1: 44.436829097962686
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.904923921575955
[34m[1mwandb[0m: 	focal_gamma: 3.7137061370482103
[34m[1mwandb[0m: 	learning_rate: 0.0007726718203435344
[34m[1mwandb[0m: 	num_warmup_steps: 152
[34m[1mwandb[0m: 	objective: DiceLoss
[34m[1mwandb[0m: 	smooth: 0.8612710874385126
[34m[1mwandb[0m: 	tversky_alpha: 0.1280711896365804
[34m[1mwandb[0m: 	tversky_betta: 0.4201100017927669
[34m[1mwandb[0m: 	weight_decay: 0.03317303075935696


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,0.8953,0.629719,0.256961,0.408861,0.405417,0.331189
2,0.5236,0.536006,0.335876,0.502856,0.499484,0.41768
3,0.4302,0.527596,0.342212,0.509922,0.50658,0.424396
4,0.3975,0.496244,0.376809,0.547366,0.544389,0.460599
5,0.3695,0.492174,0.382289,0.553124,0.550423,0.466356


Mean: 0.33118918536211195
IoU: 0.25696146896518046
Dice: 0.4088613299765335
kappa: 0.40541690175904344
Mean: 0.4176802611686229
IoU: 0.33587647788847164
Dice: 0.502855590989024
kappa: 0.49948404444877414
Mean: 0.4243960802256677
IoU: 0.34221201230928144
Dice: 0.5099224402268673
kappa: 0.506580148142054
Mean: 0.46059901858807145
IoU: 0.3768093489404024
Dice: 0.5473660521413458
kappa: 0.5443886882357405
Mean: 0.4663560672172935
IoU: 0.38228876025486197
Dice: 0.5531243127295296
kappa: 0.5504233741797251


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/Dice,▁▆▆██
eval/IoU,▁▅▆██
eval/kappa_score,▁▆▆██
eval/loss,█▃▃▁▁
eval/mean,▁▅▆██
eval/runtime,▅▁▅█▇
eval/samples_per_second,▄█▄▁▂
eval/steps_per_second,▄█▄▁▃
train/epoch,▁▁▃▃▅▅▆▆███
train/global_step,▁▁▃▃▅▅▆▆███

0,1
eval/Dice,0.55312
eval/IoU,0.38229
eval/kappa_score,0.55042
eval/loss,0.49217
eval/mean,0.46636
eval/runtime,43.4982
eval/samples_per_second,4.828
eval/steps_per_second,0.621
total_flos,1.219893043396608e+18
train/epoch,5.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 1auic82e with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betta_1: 0.9552723961619992
[34m[1mwandb[0m: 	betta_2: 0.8690295460589099
[34m[1mwandb[0m: 	class_weight_0: 1.790318702394839
[34m[1mwandb[0m: 	class_weight_1: 18.14968159440661
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	focal_alpha: 0.5579754055573171
[34m[1mwandb[0m: 	focal_gamma: 3.726141630312916
[34m[1mwandb[0m: 	learning_rate: 0.00024195991091649285
[34m[1mwandb[0m: 	num_warmup_steps: 97
[34m[1mwandb[0m: 	objective: LovaszLoss
[34m[1mwandb[0m: 	smooth: 0.2321194661159769
[34m[1mwandb[0m: 	tversky_alpha: 0.678223264777535
[34m[1mwandb[0m: 	tversky_betta: 0.6158880382491824
[34m[1mwandb[0m: 	weight_decay: 0.04521691935476141


Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b2 and are newly initialized: ['decode_head.batch_norm.bias', 'decode_head.batch_norm.num_batches_tracked', 'decode_head.batch_norm.running_mean', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.weight', 'decode_head.classifier.bias', 'decode_head.classifier.weight', 'decode_head.linear_c.0.proj.bias', 'decode_head.linear_c.0.proj.weight', 'decode_head.linear_c.1.proj.bias', 'decode_head.linear_c.1.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.2.proj.weight', 'decode_head.linear_c.3.proj.bias', 'decode_head.linear_c.3.proj.weight', 'decode_head.linear_fuse.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Iou,Dice,Kappa Score,Mean
1,1.5204,1.162397,0.004355,0.008671,-0.003567,0.000394
2,1.0579,1.00864,0.004355,0.008671,-0.003567,0.000394


Mean: 0.00039364822142389346
IoU: 0.004354506544332539
Dice: 0.008671254056130092
kappa: -0.0035672101014847524
Mean: -3.848738709955057e-06
IoU: 0.0
Dice: 0.0
kappa: -7.697477419910115e-06


[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
