# Importing & Loading Dependencies

In [1]:
!pip install monai

import nibabel as nib
from monai.transforms import LoadImage, Compose, NormalizeIntensityd, RandFlipd, RandAdjustContrastd, Resized,  CropForegroundd, SpatialPadd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from typing import Optional, Sequence, Tuple, Union
from torch.nn.functional import interpolate

from monai.networks.blocks.convolutions import Convolution
from monai.networks.layers.factories import Act, Norm
from monai.networks.layers.utils import get_act_layer, get_norm_layer
from monai.metrics import DiceMetric, HausdorffDistanceMetric

from torch import nn, optim, amp
from itertools import chain
from monai.losses import DiceLoss

from tqdm import tqdm
from pathlib import Path
import math
import os
import random

Collecting monai
  Downloading monai-1.5.0-py3-none-any.whl.metadata (13 kB)
Downloading monai-1.5.0-py3-none-any.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: monai
Successfully installed monai-1.5.0


# Creating Dataset with Preprocessing

In [2]:
class CustomDataset3D(Dataset):
    def __init__(self, data_dirs, patient_lists, mode):
        self.data_dirs = data_dirs
        self.patient_lists = patient_lists
        self.mode = mode

    @staticmethod
    def resize_with_aspect_ratio(keys, target_size):
        def transform(data):
            for key in keys:
                volume = data[key]
                original_shape = volume.shape[-3:]
    
                scaling_factor = min(
                    target_size[0] / original_shape[0],
                    target_size[1] / original_shape[1],
                    target_size[2] / original_shape[2]
                )
    
                # Computing the intermediate size while preserving aspect ratio
                new_shape = [
                    int(dim * scaling_factor) for dim in original_shape
                ]
    
                # Resizing to the intermediate shape
                resize_transform = Resized(keys=[key], spatial_size=new_shape, mode="trilinear" if key == "imgs" else "nearest-exact")
                data = resize_transform(data)
    
                # Padding to the final target size
                pad_transform = SpatialPadd(keys=[key], spatial_size=target_size, mode="constant")
                data = pad_transform(data)
            return data

        return transform

    def preprocess(cls, data, mode):
        if mode == 'training':
          transform = Compose([
            CropForegroundd(keys=["imgs", "masks"], source_key="imgs"),
            cls.resize_with_aspect_ratio(keys=["imgs", "masks"], target_size=[128, 128, 128]),
            NormalizeIntensityd( keys=['imgs'], nonzero=False, channel_wise=True),
              
            RandFlipd(keys=["imgs", "masks"],   
                    prob=0.5,                 
                    spatial_axis=2,  
            ),

            RandAdjustContrastd(
                keys=["imgs"],          
                prob=0.15,             
                gamma=(0.65, 1.5),   
            ),
            
        ])

        elif mode == 'validation':
          transform = Compose([
            CropForegroundd(keys=["imgs", "masks"], source_key="imgs"),
            cls.resize_with_aspect_ratio(keys=["imgs", "masks"], target_size=[128, 128, 128]),
            NormalizeIntensityd( keys=['imgs'], nonzero=False, channel_wise=True)

        ])

        else: # 'testing'
          transform = Compose([
            CropForegroundd(keys=["imgs", "masks"], source_key="imgs"),
            cls.resize_with_aspect_ratio(keys=["imgs", "masks"], target_size=[128, 128, 128]),
            NormalizeIntensityd( keys=['imgs'], nonzero=False, channel_wise=True)

        ])

        augmented_data = transform(data)
        return augmented_data
        
    def __len__(self):
        return len(self.patient_lists)

    def __getitem__(self, idx):
        patient_id = self.patient_lists[idx]
        loadimage = LoadImage(reader='NibabelReader', image_only=True)

        data_type=patient_id.split('-')[1]
        if data_type == 'GLI':
            patient_folder_path = os.path.join('/kaggle/input/bratsglioma/Training', patient_id)
        elif data_type == 'SSA':
            patient_folder_path = os.path.join('/kaggle/input/bratsafrica24', patient_id)
        elif data_type == 'PED':
            patient_folder_path = os.path.join('/kaggle/input/bratsped/Training', patient_id)
        elif data_type == 'MEN':
            patient_folder_path = os.path.join('/kaggle/input/bratsmen', patient_id)
        else:
            patient_folder_path = os.path.join('/kaggle/input/bratsmet24', patient_id)

        def resolve_file_path(folder, name):
            file_path = os.path.join(folder, name)
            # Check if the given path is a directory (case with 4 subdirs)
            if os.path.isdir(file_path):
                # Find the first file inside the directory that ends with .nii
                for root, _, files in os.walk(file_path):
                    for file in files:
                        if file.endswith(".nii"):
                            return os.path.join(root, file)
            return file_path

        # Resolve paths for all required image types
        t1c_path  = resolve_file_path(patient_folder_path, patient_id + '-t1c.nii')
        t1n_path  = resolve_file_path(patient_folder_path, patient_id + '-t1n.nii')
        t2f_path  = resolve_file_path(patient_folder_path, patient_id + '-t2f.nii')
        t2w_path  = resolve_file_path(patient_folder_path, patient_id + '-t2w.nii')
        seg_path  = os.path.join(patient_folder_path, patient_id + '-seg.nii')

        t1c_loader   = loadimage( t1c_path )
        t1n_loader   = loadimage( t1n_path )
        t2f_loader   = loadimage( t2f_path )
        t2w_loader   = loadimage( t2w_path )
        masks_loader = loadimage( seg_path )

        # Make the dimension of channel
        t1c_tensor   = torch.Tensor(t1c_loader).unsqueeze(0)
        t1n_tensor   = torch.Tensor(t1n_loader).unsqueeze(0)
        t2f_tensor   = torch.Tensor(t2f_loader).unsqueeze(0)
        t2w_tensor   = torch.Tensor(t2w_loader).unsqueeze(0)
        masks_tensor = torch.Tensor(masks_loader).unsqueeze(0)

        concat_tensor = torch.cat( (t1c_tensor, t1n_tensor, t2f_tensor, t2w_tensor, masks_tensor), 0 )
        data = {            
            'imgs'  : np.array(concat_tensor[0:4,:,:,:]),
            'masks' : np.array(concat_tensor[4:,:,:,:])
        }

        augmented_imgs_masks = self.preprocess(data, self.mode)
        imgs  = np.array(augmented_imgs_masks['imgs'])
        masks = np.array(augmented_imgs_masks['masks'])

        y = {

            'imgs'  : torch.from_numpy(imgs).type(torch.FloatTensor),
            'masks' : torch.from_numpy(masks).type(torch.FloatTensor),
            'patient_id' : patient_id,
            'data_type' : data_type

        }

        return y

# Data Loaders

In [3]:
def combine_datasets(dataset_lists, batch_size=3):
    max_len = max(len(dataset) for dataset in dataset_lists)

    # Ensure batch_size matches the number of datasets
    if batch_size != len(dataset_lists):
        raise ValueError("Batch size must equal the number of datasets for this function.")

    combined_paths = []

    for i in range(0, max_len, batch_size):
        for j in range(batch_size):
            index = (i + j) % max_len
            batch = [dataset[index % len(dataset)] for dataset in dataset_lists]
            combined_paths.extend(batch)

            # if j == 0:  
            #     print(f"Batch {(i // batch_size) + 1}: {batch}")

    return combined_paths

In [4]:
def prepare_data_loaders(args):
    train_datasets, val_datasets, test_datasets = [], [], []
    split_ratio = {'training': 0.71, 'validation': 0.09, 'testing': 0.2}
    
    for i, data_dir in enumerate(args['data_dirs']):
        patient_lists = os.listdir( data_dir )
        patient_lists.sort()
        total_patients = len(patient_lists)
        
        random.seed(5)
        random.shuffle(patient_lists)
    
        train_split = int(split_ratio['training'] * total_patients)
        val_split = int(split_ratio['validation'] * total_patients)
    
        train_patient_lists = patient_lists[:train_split]
        val_patient_lists = patient_lists[train_split : train_split + val_split]
        test_patient_lists = patient_lists[train_split + val_split :]
    
        train_patient_lists.sort()
        val_patient_lists.sort()
        test_patient_lists.sort()
        
        print(f'Number of training samples in {data_dir.split("/")[3]} DataSet: {len(train_patient_lists)}')
        print(f'Number of validation samples in {data_dir.split("/")[3]} DataSet: {len(val_patient_lists)}')
        print(f'Number of testing samples in {data_dir.split("/")[3]} DataSet: {len(test_patient_lists)} ')

        train_datasets.append(train_patient_lists)
        val_datasets.append(val_patient_lists)
        test_datasets.append(test_patient_lists)
            
    combined_trainDataset = combine_datasets(train_datasets, batch_size=args['train_batch_size'])
    combined_valDataset = list(chain.from_iterable(val_datasets))
    combined_testDataset = list(chain.from_iterable(test_datasets))
    
    print(f'Number of combined training samples', len(combined_trainDataset))
    print(f'Number of combined validation samples', len(combined_valDataset))
    print(f'Number of combined testing samples', len(combined_testDataset))
    
    trainDataset = CustomDataset3D( args['data_dirs'], combined_trainDataset, mode='training')
    valDataset = CustomDataset3D( args['data_dirs'], combined_valDataset, mode='validation')
    testDataset = CustomDataset3D( args['data_dirs'], combined_testDataset, mode='testing')
    
    trainLoader = DataLoader(
        trainDataset, batch_size=args['train_batch_size'], num_workers=args['workers'], prefetch_factor=2,
        pin_memory=True, shuffle=False)
    
    valLoader = DataLoader(
        valDataset, batch_size=args['val_batch_size'], num_workers=args['workers'], prefetch_factor=2,
        pin_memory=True, shuffle=False)
    
    testLoader = DataLoader(
        testDataset, batch_size=args['test_batch_size'], num_workers=args['workers'], prefetch_factor=2,
        pin_memory=True, shuffle=False)

    return trainLoader, valLoader, testLoader

# Visualizing Data

In [5]:
# args = {
#     'workers': 2,
#     'epochs': 10,
#     'train_batch_size': 2,
#     'val_batch_size': 2,
#     'test_batch_size': 2,
#     'learning_rate': 1e-3,
#     'weight_decay': 1e-5,
#     'lambd': 0.0051,
#     'data_dir': '/kaggle/input/bratsafrica24/',
#     'in_checkpoint_dir': Path('/kaggle/input/adultgliomamodel-45epochs'),
#     'out_checkpoint_dir': Path('/kaggle/working/')
# }

# trainLoader, valLoader, testLoader = prepare_data_loaders(args)

# for step, y in enumerate( trainLoader ):
#   print(y['imgs'].shape)
#   print(y['patient_id'])

#   fig, axes = plt.subplots(1, 4, figsize=(16, 4))
#   for sequence in range(4):
#     sequence_data = y['imgs'][0][sequence, :, :, :].cpu().detach().numpy()
#     slice_index = sequence_data.shape[2] // 2
#     axes[sequence].imshow(np.rot90(sequence_data[:, :, slice_index]), cmap='gray', origin='lower')
#     axes[sequence].set_title(f'Sequence {sequence + 1}')

#   plt.show()

# DynUNet Model

In [6]:
class UnetBasicBlock(nn.Module):
    """
    A CNN module module that can be used for DynUNet, based on:
    `Automated Design of Deep Learning Methods for Biomedical Image Segmentation <https://arxiv.org/abs/1904.08128>`_.
    `nnU-Net: Self-adapting Framework for U-Net-Based Medical Image Segmentation <https://arxiv.org/abs/1809.10486>`_.

    Args:
        spatial_dims: number of spatial dimensions.
        in_channels: number of input channels.
        out_channels: number of output channels.
        kernel_size: convolution kernel size.
        stride: convolution stride.
        norm_name: feature normalization type and arguments.
        act_name: activation layer type and arguments.
        dropout: dropout probability.

    """

    def __init__(
        self,
        spatial_dims: int,
        in_channels: int,
        out_channels: int,
        kernel_size: Union[Sequence[int], int],
        stride: Union[Sequence[int], int],
        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
        act_name: Union[Tuple, str] = ("leakyrelu", {"inplace": True, "negative_slope": 0.01}),
        dropout: Optional[Union[Tuple, str, float]] = None,
    ):
        super().__init__()
        self.conv1 = get_conv_layer(
            spatial_dims,
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            dropout=dropout,
            conv_only=True,
        )

        self.conv2 = get_conv_layer(
            spatial_dims,
            out_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=1,
            dropout=dropout,
            conv_only=True
        )
        self.lrelu = get_act_layer(name=act_name)
        self.norm1 = get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=out_channels)
        self.norm2 = get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=out_channels)

    def forward(self, inp):
        out = self.conv1(inp)
        out = self.norm1(out)
        out = self.lrelu(out)
        out = self.conv2(out)
        out = self.norm2(out)
        out = self.lrelu(out)
        return out



class UnetUpBlock(nn.Module):
    """
    An upsampling module that can be used for DynUNet, based on:
    `Automated Design of Deep Learning Methods for Biomedical Image Segmentation <https://arxiv.org/abs/1904.08128>`_.
    `nnU-Net: Self-adapting Framework for U-Net-Based Medical Image Segmentation <https://arxiv.org/abs/1809.10486>`_.

    Args:
        spatial_dims: number of spatial dimensions.
        in_channels: number of input channels.
        out_channels: number of output channels.
        kernel_size: convolution kernel size.
        stride: convolution stride.
        upsample_kernel_size: convolution kernel size for transposed convolution layers.
        norm_name: feature normalization type and arguments.
        act_name: activation layer type and arguments.
        dropout: dropout probability.
        trans_bias: transposed convolution bias.

    """

    def __init__(
        self,
        spatial_dims: int,
        in_channels: int,
        out_channels: int,
        kernel_size: Union[Sequence[int], int],
        upsample_kernel_size: Union[Sequence[int], int],
        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
        act_name: Union[Tuple, str] = ("leakyrelu", {"inplace": True, "negative_slope": 0.01}),
        dropout: Optional[Union[Tuple, str, float]] = None,
        trans_bias: bool = False,
    ):
        super().__init__()
        upsample_stride = upsample_kernel_size
        
        # ( a purple arrow in the paper )
        self.transp_conv = get_conv_layer(
            spatial_dims,
            in_channels,
            out_channels,
            kernel_size=upsample_kernel_size,
            stride=upsample_stride,
            dropout=dropout,
            bias=trans_bias,
            conv_only=True,
            is_transposed=True,
        )
        
        # A light blue conv blocks in the decoder of nnUNet
        self.conv_block = UnetBasicBlock(
            spatial_dims,
            out_channels + out_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=1,
            dropout=dropout,
            norm_name=norm_name,
            act_name=act_name,
        )

    def forward(self, inp, skip):
        # number of channels for skip should equals to out_channels
        out = self.transp_conv(inp)
        out = torch.cat((out, skip), dim=1)
        out = self.conv_block(out)
        return out



class UnetOutBlock(nn.Module):
    def __init__(
        self, spatial_dims: int, in_channels: int, out_channels: int, dropout: Optional[Union[Tuple, str, float]] = None
    ):
        super().__init__()
        self.conv = get_conv_layer(
            spatial_dims, in_channels, out_channels, kernel_size=1, stride=1, dropout=dropout, bias=True, conv_only=True
        )

    def forward(self, inp):
        return self.conv(inp)
    

def get_conv_layer(
    spatial_dims: int,
    in_channels: int,
    out_channels: int,
    kernel_size: Union[Sequence[int], int] = 3,
    stride: Union[Sequence[int], int] = 1,
    act: Optional[Union[Tuple, str]] = Act.PRELU,
    norm: Union[Tuple, str] = Norm.INSTANCE,
    dropout: Optional[Union[Tuple, str, float]] = None,
    bias: bool = False,
    conv_only: bool = True,
    is_transposed: bool = False,
):
    padding = get_padding(kernel_size, stride)
    output_padding = None
    if is_transposed:
        output_padding = get_output_padding(kernel_size, stride, padding)
    
    return Convolution(
        spatial_dims,
        in_channels,
        out_channels,
        strides=stride,
        kernel_size=kernel_size,
        act=act,
        norm=norm,
        dropout=dropout,
        bias=bias,
        conv_only=conv_only,
        is_transposed=is_transposed,
        padding=padding,
        output_padding=output_padding,
    )


def get_padding(
    kernel_size: Union[Sequence[int], int], stride: Union[Sequence[int], int]
) -> Union[Tuple[int, ...], int]:

    kernel_size_np = np.atleast_1d(kernel_size)
    stride_np = np.atleast_1d(stride)
    padding_np = (kernel_size_np - stride_np + 1) / 2
    if np.min(padding_np) < 0:
        raise AssertionError("padding value should not be negative, please change the kernel size and/or stride.")
    padding = tuple(int(p) for p in padding_np)

    return padding if len(padding) > 1 else padding[0]


def get_output_padding(
    kernel_size: Union[Sequence[int], int], stride: Union[Sequence[int], int], padding: Union[Sequence[int], int]
) -> Union[Tuple[int, ...], int]:
    kernel_size_np = np.atleast_1d(kernel_size)
    stride_np = np.atleast_1d(stride)
    padding_np = np.atleast_1d(padding)

    out_padding_np = 2 * padding_np + stride_np - kernel_size_np
    if np.min(out_padding_np) < 0:
        raise AssertionError("out_padding value should not be negative, please change the kernel size and/or stride.")
    out_padding = tuple(int(p) for p in out_padding_np)

    return out_padding if len(out_padding) > 1 else out_padding[0]

def set_requires_grad(nets, requires_grad=False):
    if not isinstance(nets, list):
        nets = [nets]
    for net in nets:
        if net is not None:
            for param in net.parameters():
                param.requires_grad = requires_grad

In [7]:
class DynUNet(nn.Module):
    def __init__(
        self,
        spatial_dims: int,
        in_channels: int,
        out_channels: int,
        deep_supervision: bool,
        KD: bool = False
    ):
        super().__init__()
        self.spatial_dims = spatial_dims
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.deep_supervision = deep_supervision
        self.KD_enabled = KD
        
        self.input_conv = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=self.in_channels,
                                     out_channels=64,
                                     kernel_size=3,
                                     stride=1
                                     )
        self.down1 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=64,
                                     out_channels=96,
                                     kernel_size=3,
                                     stride=2 # Reduces spatial dims by 2
                                     )
        self.down2 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=96,
                                     out_channels=128,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.down3 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=128,
                                     out_channels=192,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.down4 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=192,
                                     out_channels=256,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.down5 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=256,
                                     out_channels=384,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.bottleneck = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=384,
                                     out_channels=512,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.up1 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=512,
                                out_channels=384,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        self.up2 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=384,
                                out_channels=256,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        self.up3 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=256,
                                out_channels=192,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        self.up4 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=192,
                                out_channels=128,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        
        self.up5 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=128,
                                out_channels=96,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )        
        self.up6 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=96,
                                out_channels=64,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        self.out1 = UnetOutBlock( spatial_dims=self.spatial_dims,
                                  in_channels=64,
                                  out_channels=self.out_channels,
                                  )
        self.out2 = UnetOutBlock( spatial_dims=self.spatial_dims,
                                  in_channels=96,
                                  out_channels=self.out_channels,
                                  )
        self.out3 = UnetOutBlock( spatial_dims=self.spatial_dims,
                                  in_channels=128,
                                  out_channels=self.out_channels,
                                  )
        
    def forward( self, input ):
        
        # Input
        x0 = self.input_conv( input ) # x0.shape = (B x 64 x 128 x 128 x 128)
        
        # Encoder
        x1 = self.down1( x0 ) # x1.shape = (B x 96 x 64 x 64 x 64) 
        x2 = self.down2( x1 ) # x2.shape = (B x 128 x 32 x 32 x 32)
        x3 = self.down3( x2 ) # x3.shape = (B x 192 x 16 x 16 x 16)
        x4 = self.down4( x3 ) # x4.shape = (B x 256 x 8 x 8 x 8)   
        x5 = self.down5( x4 ) # x5.shape = (B x 384 x 4 x 4 x 4)   
        
        # Bottleneck
        x6 = self.bottleneck( x5 ) # x6.shape = (B x 512 x 2 x 2 x 2)
        
        # Decoder
        x7  = self.up1( x6, x5 )  # x7.shape  = (B x 384 x 4 x 4 x 4)
        x8  = self.up2( x7, x4 )  # x8.shape  = (B x 256 x 8 x 8 x 8)
        x9  = self.up3( x8, x3 )  # x9.shape  = (B x 192 x 16 x 16 x 16)
        x10 = self.up4( x9, x2 )  # x10.shape = (B x 128 x 32 x 32 x 32)
        x11 = self.up5( x10, x1 ) # x11.shape = (B x 96 x 64 x 64 x 64)
        x12 = self.up6( x11, x0 ) # x12.shape = (B x 64 x 128 x 128 x 128)
        
        # Output
        output1 = self.out1( x12 )
        
        if (self.training and self.deep_supervision) or self.KD_enabled:
            
            # output['pred'].shape = B x 3 x 4 x 128 x 128 x 128
            output2 = interpolate( self.out2( x11 ), output1.shape[2:])
            output3 = interpolate( self.out3( x10 ), output1.shape[2:])
            output_all = [ output1, output2, output3 ]
            return { 'pred' : torch.stack(output_all, dim=1),
                     'bottleneck_feature_map' : x6 }
        
        return { 'pred' : output1 }

# Visualizing Model Instance

In [8]:
# !pip install torchsummary
# from torchsummary import summary

# # Initialize your DynUNet model
# model = DynUNet(spatial_dims=3, in_channels=4, out_channels=4, deep_supervision=True, KD=True)

# # Print model summary
# summary(model, input_size=(4, 128, 128, 128))  # Adjust input_size according to your needs

# ClearML

In [9]:
!pip install clearml
from clearml import Task

%env CLEARML_WEB_HOST=https://app.clear.ml/
%env CLEARML_API_HOST=https://api.clear.ml
%env CLEARML_FILES_HOST=https://files.clear.ml
%env CLEARML_API_ACCESS_KEY=CLEARML_API_ACCESS_KEY
%env CLEARML_API_SECRET_KEY=CLEARML_API_SECRET_KEY

Collecting clearml
  Downloading clearml-2.0.0-py2.py3-none-any.whl.metadata (17 kB)
Collecting furl>=2.0.0 (from clearml)
  Downloading furl-2.1.4-py2.py3-none-any.whl.metadata (25 kB)
Collecting pathlib2>=2.3.0 (from clearml)
  Downloading pathlib2-2.3.7.post1-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting orderedmultidict>=1.0.1 (from furl>=2.0.0->clearml)
  Downloading orderedmultidict-1.0.1-py2.py3-none-any.whl.metadata (1.3 kB)
Downloading clearml-2.0.0-py2.py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading furl-2.1.4-py2.py3-none-any.whl (27 kB)
Downloading pathlib2-2.3.7.post1-py2.py3-none-any.whl (18 kB)
Downloading orderedmultidict-1.0.1-py2.py3-none-any.whl (11 kB)
Installing collected packages: pathlib2, orderedmultidict, furl, clearml
Successfully installed clearml-2.0.0 furl-2.1.4 orderedmultidict-1.0.1 pathlib2-2.3.7.post1
env: CLEARML_WEB_HOST=

# GPUs Check

In [10]:
if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPUs available: {num_gpus}")
    for i in range(num_gpus):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No GPU available. Running on CPU.")

Number of GPUs available: 2
GPU 0: Tesla T4
GPU 1: Tesla T4


In [11]:
# # For freeing gpu
# import gc; gc.collect(); torch.cuda.empty_cache()

# Loss Function

In [12]:
class LossFunction(nn.Module):
    def __init__(self):
        super(LossFunction, self).__init__()
        self.dice = DiceLoss(sigmoid=True, batch=True, smooth_nr=1e-05, smooth_dr=1e-05)
        self.ce = nn.BCEWithLogitsLoss()

    def _loss(self, p, y):
        return self.dice(p, y) + self.ce(p, y.float())

    def forward(self, p, y):
        y_wt, y_tc, y_et = y > 0, ((y == 1) + (y == 3)) > 0, y == 3
        p_wt, p_tc, p_et = p[:, 1].unsqueeze(1), p[:, 2].unsqueeze(1), p[:, 3].unsqueeze(1)
        l_wt, l_tc, l_et = self._loss(p_wt, y_wt), self._loss(p_tc, y_tc), self._loss(p_et, y_et)
        return l_wt + l_tc + l_et

# Student KD Model

In [14]:
class Student_KD_loss(nn.Module):
    def __init__(self):
        super().__init__()
        self.student = DynUNet( spatial_dims=3, in_channels=4, out_channels=4, deep_supervision=True)
        self.loss_fn = LossFunction()
        self.temperature = 5.0
        self.bce_loss = nn.BCEWithLogitsLoss()

    def forward(self, teacher_outputs, y):
        with amp.autocast('cuda:0'):
            student_outputs = self.student( y['imgs'] )

            # Student loss with Deep supervision -> (Dice loss)
            segloss_s_decoder_1 = self.loss_fn( student_outputs['pred'][:,0], y['masks'] ) # student_outputs['pred'].shape = B x 3 x 4 x 128 x 128 x 128
            segloss_s_decoder_2 = self.loss_fn( student_outputs['pred'][:,1], y['masks'] )
            segloss_s_decoder_3 = self.loss_fn( student_outputs['pred'][:,2], y['masks'] )

            student_seg_loss = segloss_s_decoder_1 + 0.5*segloss_s_decoder_2 + 0.25*segloss_s_decoder_3

            #-----------------------------------------------------------------------------------#

            # KD loss between bottleneck layers -> (KL without CBAM Loss)
            B, C, H, W, D = teacher_outputs['bottleneck_feature_map'].shape
            teacher_bottleneck = teacher_outputs['bottleneck_feature_map'].view(B, C, -1)
            student_bottleneck = student_outputs['bottleneck_feature_map'].view(B, C, -1)

            # Normalize to probability distributions
            student_probs = F.log_softmax(student_bottleneck, dim=1)  # Log probabilities
            teacher_probs = F.softmax(teacher_bottleneck, dim=1)      # Probabilities

            # Compute KL loss instead of MSE
            kl_loss_with_teacher = F.kl_div(student_probs, teacher_probs, reduction='batchmean')

            #-----------------------------------------------------------------------------------#

            beta, alpha = 1.0, 1.0

            print("Seg loss: ", student_seg_loss)
            print("KL loss with teacher: ", kl_loss_with_teacher)

            print("Seg loss weighted: ", alpha*student_seg_loss)
            print("KL loss with teacher weighted: ", beta*kl_loss_with_teacher)

            batch_total_student_loss = alpha*student_seg_loss + beta*kl_loss_with_teacher

            print("-------------Final student loss-------------")
            print(batch_total_student_loss)
            print("-------------Final student loss-------------")

        KD_output = {
            'batch_total_student_loss' : batch_total_student_loss,
            'seg_weighted'   : alpha*student_seg_loss,
            'kl_weighted'    : beta*kl_loss_with_teacher,
        }

        return KD_output

# Training & Validation

In [15]:
def evaluate(model, loader, epoch, task):
    torch.manual_seed(0)
    model.eval()
    loss_fn = LossFunction()
    n_val_batches = len(loader)
    
    tumors_val_losses, running_loss = validate_model(model, loader, loss_fn)
    epoch_val_loss = running_loss / n_val_batches
    log_val_epoch_losses(tumors_val_losses, epoch, task, epoch_val_loss)
    
    print(f"------Final validation dice loss after epoch {epoch + 1}: {epoch_val_loss}-------")
    
    model.student.to('cuda:1')
    model.train()
    
    return epoch_val_loss

def validate_model(model, loader, loss_fn):
    tumors_val_losses = {'GLI': [], 'PED': [], 'SSA': [], 'MEN':[], 'MET':[]}
    running_loss = 0
    n_val_batches = len(loader)
    
    with tqdm(total=n_val_batches, desc='Validating', unit='batch', leave=False) as pbar:
        with torch.no_grad():
            for y in loader:
                val_loss, data_type = process_batch(model, y, loss_fn)
                tumors_val_losses[data_type].append(val_loss.item())
                running_loss += val_loss
                pbar.update(1)
    
    return tumors_val_losses, running_loss

def process_batch(model, y, loss_fn):
    y['imgs'], y['masks'] = y['imgs'].to('cuda'), y['masks'].to('cuda')
    data_type = y['data_type'][0]
    
    with torch.amp.autocast('cuda'):
        output = model.student.to('cuda')(y['imgs'])
        val_loss = loss_fn(output['pred'], y['masks'])
        
    print(f"Validation dice loss per batch: {val_loss}")
    return val_loss, data_type

def log_val_epoch_losses(tumors_val_losses, epoch, task, epoch_val_loss):
    for tumor_type, losses in tumors_val_losses.items():
        avg_loss = sum(losses) / len(losses) if losses else 0
        task.get_logger().report_scalar(
            title=f"{tumor_type} Losses over Epochs",
            series=f"{tumor_type} Epoch valLoss",
            iteration=epoch + 1,
            value=avg_loss
        )

    task.get_logger().report_scalar("KD Losses over Epochs", "val_loss", iteration=epoch+1, value=epoch_val_loss)

In [16]:
def setup_environment(args):
    torch.manual_seed(0)
    args['out_checkpoint_dir'].mkdir(parents=True, exist_ok=True)

def initialize_models():
    teacher_model = DynUNet(spatial_dims=3, in_channels=4, out_channels=4, deep_supervision=True, KD=True).to('cuda:0')
    student_model = Student_KD_loss().to('cuda:1')
    return teacher_model, student_model

def initialize_optimizer_scheduler(student_model, args):
    optimizer = optim.AdamW(student_model.parameters(), lr=args['learning_rate'], weight_decay=args['weight_decay'], eps=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, cooldown=1, threshold=0.001, min_lr=1e-6)
    return optimizer, scheduler

def load_teacher_model(teacher_model, data_type, teacher_model_paths):
    teacher_model_path = teacher_model_paths.get(data_type)
    if teacher_model_path and Path(teacher_model_path).is_file():
        ckpt = torch.load(teacher_model_path, map_location='cuda:0', weights_only=True)
        teacher_model.load_state_dict(ckpt['teacher_model'])
        print(f"Loaded model: {teacher_model_path}")

def load_student_checkpoint(student_model, optimizer, scaler, scheduler, args):
    checkpoint_path = args['in_checkpoint_dir'] / 'Student_model_after_epoch_12_trainLoss_1.5565_valLoss_0.4173.pth'
    if checkpoint_path.is_file():
        print(f"Found model {checkpoint_path}")
        ckpt = torch.load(checkpoint_path, map_location='cuda:1', weights_only=True)
        student_model.student.load_state_dict(ckpt['student_model'])
        optimizer.load_state_dict(ckpt['optimizer_student'])
        scaler.load_state_dict(ckpt['grad_scaler_state'])
        scheduler.load_state_dict(ckpt['scheduler_state_dict'])
        print(f"Loaded student model: {checkpoint_path} with lr: {optimizer.param_groups[0]['lr']}")
        return ckpt['epoch'] + 1
    return 0

def train_epoch(epoch, trainLoader, train_config, start_ep):
    student_model = train_config['student_model']
    teacher_model = train_config['teacher_model']
    optimizer = train_config['optimizer']
    scaler = train_config['scaler']
    accumulation_steps = train_config['accumulation_steps']
    teacher_model_paths = train_config['teacher_model_paths']
    task = train_config['task']
    
    student_model.train()
    teacher_model.eval()
    
    epoch_losses = {'total': 0, 'kl': 0, 'seg': 0}
    tumors_losses = {'GLI': [], 'PED': [], 'SSA': [], 'MEN': [], 'MET': []}
    
    with tqdm(total=len(trainLoader), desc=f"(Epoch {epoch + 1}/{start_ep + train_config['epochs']})", unit='batch') as pbar:
        optimizer.zero_grad()
        
        for step, y in enumerate(trainLoader):
            batch_loss = 0
            for sub_step, data_type in enumerate(y['data_type']):
                imgs = y['imgs'][sub_step].unsqueeze(0).to('cuda:0')
                masks = y['masks'][sub_step].unsqueeze(0).to('cuda:0')
                
                load_teacher_model(teacher_model, data_type, teacher_model_paths)
                
                with amp.autocast('cuda:0'):
                    teacher_outputs = teacher_model(imgs)
                
                detached_teacher_output = {k: v.detach().to('cuda:1') for k, v in teacher_outputs.items()}
                imgs, masks = imgs.to('cuda:1'), masks.to('cuda:1')
                
                with amp.autocast('cuda:1'):
                    student_outputs = student_model(detached_teacher_output, {'imgs': imgs, 'masks': masks})
                    loss = (student_outputs['batch_total_student_loss'] / accumulation_steps)
                    batch_loss += loss.item()
                    tumors_losses[data_type].append(loss.item())
                
                scaler.scale(loss).backward()

                task.get_logger().report_scalar(
                    title=f"Tumors training losses per epoch {epoch+1}",
                    series=f"{data_type} loss",
                    iteration=len(tumors_losses[data_type]),
                    value=float(loss.item())
                )
                
                for key in epoch_losses:
                    if key != 'total':
                        epoch_losses[key] += (student_outputs.get(f'{key}_weighted', 0) / accumulation_steps)
                        
                if (sub_step + 1) % accumulation_steps == 0 or (sub_step + 1) == len(y['data_type']):
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
                
            epoch_losses['total'] += batch_loss 
            pbar.update(1)
    
    for key in epoch_losses:
        epoch_losses[key] /= len(trainLoader)
        
    return epoch_losses, tumors_losses

def log_KD_losses_over_epochs(epoch, epoch_losses, tumors_losses, task):
    for loss_type, val in epoch_losses.items():
        task.get_logger().report_scalar(
            title="KD Losses over Epochs",
            series=f"{loss_type} loss",
            iteration=epoch + 1,
            value=epoch_losses[loss_type]
        )    

    for tumor_type, losses in tumors_losses.items():
        task.get_logger().report_scalar(
            title=f"{tumor_type} Losses over Epochs",
            series=f"{tumor_type} Epoch trainLoss",
            iteration=epoch + 1,
            value=sum(losses) / len(losses) if losses else 0
        )

def validate_and_save(epoch, valLoader, train_config, epoch_losses):
    student_model = train_config['student_model']
    scheduler = train_config['scheduler']
    optimizer = train_config['optimizer']
    scaler = train_config['scaler']
    out_checkpoint_dir = train_config['out_checkpoint_dir']
    task = train_config['task']

    val_loss = evaluate(student_model, valLoader, epoch, task)
    scheduler.step(val_loss)

    task.get_logger().report_scalar("LR", "learning_rate", iteration=epoch+1, value=optimizer.param_groups[0]['lr'])
    print(f"Learning rate after epoch {epoch + 1}: {optimizer.param_groups[0]['lr']}")

    state = {
        'epoch': epoch,
        'student_model': student_model.student.state_dict(),
        'optimizer_student': optimizer.state_dict(),
        'lr': optimizer.param_groups[0]['lr'],
        'grad_scaler_state': scaler.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'val_dice_loss': val_loss
    }

    checkpoint_path = out_checkpoint_dir / f'Student_model_after_epoch_{epoch + 1}_trainLoss_{epoch_losses["total"]:.4f}_valLoss_{val_loss:.4f}.pth'
    torch.save(state, checkpoint_path)
    print(f"Model saved after epoch {epoch + 1}")

def run_KD(trainLoader, valLoader, args):
    setup_environment(args)
    teacher_model, student_model = initialize_models()
    optimizer, scheduler = initialize_optimizer_scheduler(student_model, args)
    scaler = amp.GradScaler('cuda:1')
    
    teacher_model_paths = {
        'GLI': '/kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth',
        'SSA': '/kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth',
        'PED': '/kaggle/input/pednewlabel/Teacher_model_after_epoch_99_trainLoss_1.4512_valLoss_1.0042.pth',
        'MEN': '/kaggle/input/meningiomateachernewlabels/Teacher_model_after_epoch_85_trainLoss_0.5824_valLoss_0.3318.pth',
        'MET': '/kaggle/input/met-teacher-new-labels/Teacher_model_after_epoch_100_trainLoss_1.6278_valLoss_0.7199.pth'
    }
    
    start_epoch = load_student_checkpoint(student_model, optimizer, scaler, scheduler, args)
    task = Task.init(project_name="Ablation Studies", task_name=f"Fairness Ablation Study KL and Seg NO CBAM", reuse_last_task_id=True)
    task.connect(args)
    task.add_tags(['KL+SEG', "Ahmed Pro"])

    print(f'''Starting Knowledge Distillation:
            Epochs:          From {start_epoch + 1} to {start_epoch + args['epochs']}
            Batch size:      5 (effective through gradient accumulation)
            Learning rate:  {args['learning_rate']}
            Training data coming from: {args['data_dirs']}
    ''')

    train_config = {
        'teacher_model': teacher_model,
        'student_model': student_model,
        'optimizer': optimizer,
        'scheduler': scheduler,
        'scaler': scaler,
        'accumulation_steps': 5,
        'teacher_model_paths': teacher_model_paths,
        'out_checkpoint_dir': args['out_checkpoint_dir'],
        'task': task,
        'epochs': args['epochs']
    }
    
    for epoch in range(start_epoch, start_epoch + args['epochs']):
        epoch_losses, tumors_losses = train_epoch(epoch, trainLoader, train_config, start_epoch)
        log_KD_losses_over_epochs(epoch, epoch_losses, tumors_losses, task)
        validate_and_save(epoch, valLoader, train_config, epoch_losses)
    
    print("Training completed.")
    task.close()

In [17]:
args = {
    'workers': 2,
    'epochs': 2,
    'train_batch_size': 5,
    'val_batch_size': 2,
    'test_batch_size': 1,
    'learning_rate': 1e-3,
    'weight_decay': 1e-5,
    'lambd': 0.0051,
    'data_dirs': ["/kaggle/input/bratsglioma/Training/", "/kaggle/input/bratsafrica24/", "/kaggle/input/bratsped/Training/", "/kaggle/input/bratsmen/", "/kaggle/input/bratsmet24/"],
    'in_checkpoint_dir': Path('/kaggle/input/data-abl-study-kl-seg-no-cbam/'),
    'out_checkpoint_dir': Path('/kaggle/working/')
}

trainLoader, valLoader, testLoader = prepare_data_loaders(args)
run_KD(trainLoader, valLoader, args)

Number of training samples in bratsglioma DataSet: 888
Number of validation samples in bratsglioma DataSet: 112
Number of testing samples in bratsglioma DataSet: 251 
Number of training samples in bratsafrica24 DataSet: 102
Number of validation samples in bratsafrica24 DataSet: 13
Number of testing samples in bratsafrica24 DataSet: 30 
Number of training samples in bratsped DataSet: 70
Number of validation samples in bratsped DataSet: 8
Number of testing samples in bratsped DataSet: 21 
Number of training samples in bratsmen DataSet: 710
Number of validation samples in bratsmen DataSet: 90
Number of testing samples in bratsmen DataSet: 200 
Number of training samples in bratsmet24 DataSet: 232
Number of validation samples in bratsmet24 DataSet: 29
Number of testing samples in bratsmet24 DataSet: 67 
Number of combined training samples 4450
Number of combined validation samples 252
Number of combined testing samples 569
Found model /kaggle/input/data-abl-study-kl-seg-no-cbam/Student_mod

(Epoch 13/14):   0%|          | 0/890 [00:00<?, ?batch/s]

2025-06-17 02:03:40,015 - clearml.model - INFO - Selected model id: 8a2350af3bc84e35ad386c84abc504e3
Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0917, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3175, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0917, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3175, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4092, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
2025-06-17 02:03:49,430 - clearml.model - INFO - Selected model id: 20f814c13615424b941827d1db259433
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2900, device='cud

(Epoch 13/14):   0%|          | 1/890 [00:56<13:52:29, 56.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5108, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3432, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5108, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3432, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8540, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8511, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2743, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8511, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2743, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   0%|          | 2/890 [01:11<7:58:59, 32.36s/batch] 

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5355, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4337, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5355, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4337, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9692, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3417, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3417, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   0%|          | 3/890 [01:26<6:01:31, 24.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3450, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3450, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2935, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7557, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3406, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7557, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3406, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   0%|          | 4/890 [01:41<5:05:19, 20.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2789, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2789, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3570, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4885, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3035, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4885, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|          | 5/890 [01:56<4:34:42, 18.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7330, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7330, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3440, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8746, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2786, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8746, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2786, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|          | 6/890 [02:11<4:16:26, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4217, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4217, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0414, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7304, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2931, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7304, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2931, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|          | 7/890 [02:26<4:04:47, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3058, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3058, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3576, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2797, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2797, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|          | 8/890 [02:41<3:55:44, 16.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7616, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7289, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7616, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7289, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4905, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3398, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3398, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|          | 9/890 [02:56<3:50:18, 15.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4272, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4272, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1762, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9883, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3057, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9883, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|          | 10/890 [03:11<3:45:58, 15.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2904, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2830, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2904, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2830, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5734, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2788, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2788, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|          | 11/890 [03:26<3:42:41, 15.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7029, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.0626, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7029, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.0626, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.7655, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6857, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4205, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6857, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4205, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|▏         | 12/890 [03:41<3:41:45, 15.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8867, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2684, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8867, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2684, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1552, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9289, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4037, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9289, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   1%|▏         | 13/890 [03:56<3:41:03, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7220, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2811, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7220, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2811, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0031, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7479, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7479, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2940, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 14/890 [04:11<3:40:11, 15.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2648, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2892, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2648, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2892, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5540, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5629, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2963, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5629, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2963, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 15/890 [04:26<3:39:17, 15.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1868, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4582, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1868, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4582, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6450, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2596, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2596, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 16/890 [04:41<3:38:52, 15.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3743, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3743, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2640, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8225, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3447, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8225, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3447, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 17/890 [04:56<3:38:55, 15.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3390, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3390, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4291, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2963, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2963, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 18/890 [05:11<3:38:41, 15.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9313, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2945, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9313, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2945, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2257, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4467, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2977, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4467, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2977, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 19/890 [05:26<3:38:23, 15.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3453, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3052, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3453, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6505, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3782, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3400, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3782, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3400, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 20/890 [05:41<3:40:17, 15.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6854, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6854, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9832, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3265, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3265, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 21/890 [05:57<3:40:40, 15.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3217, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3217, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2136, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4515, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3093, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4515, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3093, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   2%|▏         | 22/890 [06:12<3:38:50, 15.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1891, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1891, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4745, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7207, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3715, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7207, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3715, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 23/890 [06:27<3:41:16, 15.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8854, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0716, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3681, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0716, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3681, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 24/890 [06:43<3:43:33, 15.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4538, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3790, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4538, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3790, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8327, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 25/890 [06:59<3:45:23, 15.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8993, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3367, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8993, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3367, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2359, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7437, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2766, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7437, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2766, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 26/890 [07:14<3:42:28, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5728, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3403, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5728, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3403, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9130, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0753, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3174, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0753, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3174, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 27/890 [07:29<3:41:44, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8510, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2843, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8510, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2843, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1353, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3113, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3113, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 28/890 [07:45<3:41:07, 15.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4391, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4391, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0996, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9858, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3365, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9858, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3365, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 29/890 [08:00<3:38:52, 15.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3587, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3587, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9249, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6285, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3075, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6285, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3075, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 30/890 [08:15<3:37:30, 15.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4440, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4440, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0694, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3223, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3223, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   3%|▎         | 31/890 [08:30<3:36:58, 15.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.8183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6674, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6674, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.4858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3416, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3416, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▎         | 32/890 [08:45<3:36:10, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4715, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4993, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4715, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4993, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9708, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3517, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3517, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▎         | 33/890 [09:00<3:35:29, 15.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7420, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3148, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7420, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3148, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0568, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7105, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3214, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7105, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3214, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▍         | 34/890 [09:15<3:34:44, 15.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9487, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9487, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3506, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1488, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3901, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1488, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3901, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▍         | 35/890 [09:30<3:34:11, 15.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2691, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2691, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8677, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3163, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3163, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▍         | 36/890 [09:45<3:34:02, 15.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4252, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4252, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7588, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2918, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2918, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▍         | 37/890 [10:00<3:33:49, 15.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6475, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2742, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6475, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2742, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9217, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6510, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3542, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6510, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3542, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▍         | 38/890 [10:15<3:33:37, 15.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2969, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2969, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9699, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8790, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3182, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8790, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3182, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▍         | 39/890 [10:30<3:33:23, 15.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3410, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3410, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9356, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3114, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3114, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   4%|▍         | 40/890 [10:45<3:33:39, 15.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7708, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3646, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7708, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3646, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1354, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3100, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3100, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   5%|▍         | 41/890 [11:00<3:32:42, 15.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5716, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3563, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5716, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3563, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9280, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5726, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2644, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5726, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2644, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   5%|▍         | 42/890 [11:15<3:32:44, 15.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2814, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2814, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8746, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3379, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3379, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   5%|▍         | 43/890 [11:30<3:32:25, 15.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4219, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4219, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8246, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2819, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2819, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   5%|▍         | 44/890 [11:45<3:32:08, 15.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4654, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4654, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0715, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4951, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3034, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4951, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   5%|▌         | 45/890 [12:00<3:31:47, 15.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8117, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2958, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8117, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2958, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1075, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3316, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3316, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   5%|▌         | 46/890 [12:15<3:31:10, 15.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2618, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2618, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3790, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5771, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3362, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5771, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3362, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   5%|▌         | 47/890 [12:30<3:31:44, 15.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5638, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3782, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5638, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3782, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9420, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3337, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3337, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   5%|▌         | 48/890 [12:46<3:31:17, 15.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3116, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3116, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7709, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5812, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2704, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5812, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2704, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▌         | 49/890 [13:01<3:31:01, 15.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5695, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2841, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5695, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2841, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8535, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4984, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3765, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4984, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3765, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▌         | 50/890 [13:16<3:30:35, 15.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5682, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0729, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7115, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3227, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7115, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3227, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▌         | 51/890 [13:31<3:30:53, 15.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4378, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3382, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4378, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3382, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7760, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1407, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3296, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1407, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3296, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▌         | 52/890 [13:46<3:30:12, 15.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3025, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8228, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3382, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3382, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▌         | 53/890 [14:01<3:30:35, 15.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7531, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4136, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7531, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4136, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1668, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6064, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2976, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6064, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2976, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▌         | 54/890 [14:16<3:30:47, 15.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4528, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3391, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4528, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3391, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7920, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7369, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2973, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7369, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2973, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▌         | 55/890 [14:31<3:30:26, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0295, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2621, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0295, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2621, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2916, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3146, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3146, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▋         | 56/890 [14:46<3:30:09, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7416, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3147, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7416, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3147, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0563, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5231, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3968, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5231, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3968, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   6%|▋         | 57/890 [15:01<3:29:49, 15.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7915, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1948, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3234, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1948, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3234, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 58/890 [15:17<3:30:06, 15.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6439, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3016, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6439, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9455, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3985, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3985, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 59/890 [15:32<3:29:18, 15.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3892, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3892, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8851, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3045, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 60/890 [15:47<3:29:51, 15.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5616, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3048, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5616, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8665, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3080, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3080, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 61/890 [16:02<3:28:51, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8547, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6432, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3226, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6432, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3226, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 62/890 [16:17<3:29:03, 15.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8069, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2750, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8069, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2750, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0819, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5923, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2753, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5923, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2753, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 63/890 [16:32<3:28:25, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3648, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3648, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8740, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2728, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2728, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 64/890 [16:48<3:28:44, 15.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3963, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3963, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8694, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6929, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3607, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6929, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3607, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 65/890 [17:03<3:27:56, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4322, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3503, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4322, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3503, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7824, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3684, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3121, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3684, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3121, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   7%|▋         | 66/890 [17:18<3:27:28, 15.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5310, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3023, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5310, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8332, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7180, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3222, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7180, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3222, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 67/890 [17:33<3:26:52, 15.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6309, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6309, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1281, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9006, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3034, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9006, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 68/890 [17:48<3:27:50, 15.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3474, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3382, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3474, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3382, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6856, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8595, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3559, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8595, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3559, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 69/890 [18:03<3:27:17, 15.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5364, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5364, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9672, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3239, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3239, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 70/890 [18:18<3:26:41, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2964, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2964, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0835, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3051, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3051, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 71/890 [18:33<3:26:18, 15.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4559, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3289, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4559, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3289, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7848, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7967, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3538, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7967, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3538, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 72/890 [18:48<3:25:50, 15.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7679, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7679, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0539, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4462, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3360, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4462, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3360, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 73/890 [19:03<3:25:29, 15.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3902, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3902, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1919, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5916, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3063, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5916, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3063, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 74/890 [19:19<3:25:50, 15.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6700, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3802, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6700, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3802, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0502, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6748, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2777, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6748, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2777, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   8%|▊         | 75/890 [19:34<3:25:14, 15.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3962, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3962, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8566, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2815, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2815, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▊         | 76/890 [19:49<3:24:44, 15.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1345, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6238, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1345, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6238, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7583, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5812, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3057, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5812, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▊         | 77/890 [20:04<3:24:11, 15.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5455, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5455, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7136, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7399, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2868, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7399, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2868, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▉         | 78/890 [20:19<3:24:04, 15.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3698, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.6037, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3698, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.6037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9735, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2686, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2686, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▉         | 79/890 [20:34<3:23:35, 15.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4557, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4148, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4557, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4148, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8705, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3592, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3592, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▉         | 80/890 [20:49<3:23:44, 15.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6552, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7928, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6552, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7928, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4480, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5572, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3030, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5572, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▉         | 81/890 [21:04<3:23:39, 15.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5209, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5209, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8217, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8880, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2957, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8880, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2957, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▉         | 82/890 [21:19<3:23:09, 15.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3138, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3138, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6276, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7650, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3672, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7650, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3672, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▉         | 83/890 [21:34<3:23:24, 15.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8135, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8135, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2171, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3312, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3312, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):   9%|▉         | 84/890 [21:50<3:23:14, 15.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0133, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3222, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0133, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3222, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3355, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3607, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9171, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3607, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|▉         | 85/890 [22:05<3:22:26, 15.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7646, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3637, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7646, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3637, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1283, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3758, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3758, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|▉         | 86/890 [22:20<3:22:21, 15.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6929, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3383, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6929, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3383, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0312, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3809, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3809, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|▉         | 87/890 [22:35<3:22:28, 15.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3874, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3874, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9564, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9081, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2927, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9081, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2927, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|▉         | 88/890 [22:50<3:22:50, 15.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3144, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3144, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2393, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3287, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3287, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|█         | 89/890 [23:06<3:22:59, 15.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3510, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3510, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0805, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8131, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3556, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8131, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3556, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|█         | 90/890 [23:21<3:23:13, 15.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8816, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6071, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3189, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6071, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3189, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|█         | 91/890 [23:36<3:22:54, 15.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1378, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5290, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1378, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5290, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6668, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8399, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3201, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8399, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3201, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|█         | 92/890 [23:51<3:22:58, 15.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4663, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4663, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4295, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2841, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2841, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  10%|█         | 93/890 [24:07<3:22:43, 15.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2974, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2974, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7178, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9554, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3239, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9554, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3239, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█         | 94/890 [24:22<3:22:00, 15.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1777, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3105, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1777, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3105, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4883, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8121, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3645, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8121, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3645, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█         | 95/890 [24:37<3:21:49, 15.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0170, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8386, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0170, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8386, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8556, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3019, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█         | 96/890 [24:53<3:23:08, 15.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8531, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3705, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8531, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3705, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2236, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2997, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2997, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█         | 97/890 [25:09<3:26:18, 15.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4305, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4521, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4305, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4521, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8826, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3618, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3618, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█         | 98/890 [25:24<3:25:02, 15.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2479, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4906, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2479, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4906, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7385, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9111, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3649, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9111, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3649, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█         | 99/890 [25:39<3:23:30, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1056, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3513, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1056, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3513, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4569, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3978, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█         | 100/890 [25:55<3:23:26, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9387, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3182, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9387, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3182, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2569, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0244, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3580, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0244, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3580, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█▏        | 101/890 [26:10<3:22:37, 15.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3208, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3208, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4331, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3917, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3917, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  11%|█▏        | 102/890 [26:26<3:24:25, 15.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1732, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3049, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1732, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4781, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7611, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3271, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7611, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3271, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 103/890 [26:42<3:23:47, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4149, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4177, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4149, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4177, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8326, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8350, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2871, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8350, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2871, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 104/890 [26:58<3:25:21, 15.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7893, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2997, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7893, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2997, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0890, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7827, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3392, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7827, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3392, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 105/890 [27:13<3:23:35, 15.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6280, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3505, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6280, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3505, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9785, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3456, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3456, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 106/890 [27:28<3:21:07, 15.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6152, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6152, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1260, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6759, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3113, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6759, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3113, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 107/890 [27:43<3:20:21, 15.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3676, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2751, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2751, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 108/890 [27:58<3:19:39, 15.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2896, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2896, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3281, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2925, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2925, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 109/890 [28:13<3:18:24, 15.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3943, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3943, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1844, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3102, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3102, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 110/890 [28:29<3:18:17, 15.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7194, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4259, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7194, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4259, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1453, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7312, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3096, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7312, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3096, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  12%|█▏        | 111/890 [28:44<3:19:07, 15.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7406, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4042, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7406, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1448, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3081, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3081, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 112/890 [29:01<3:23:53, 15.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5931, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3981, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5931, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3981, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9912, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8779, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2962, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8779, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2962, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 113/890 [29:17<3:23:28, 15.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4625, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3778, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4625, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3778, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8402, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8832, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4150, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8832, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4150, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 114/890 [29:32<3:21:04, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7372, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4226, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7372, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4226, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1598, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4405, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4405, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 115/890 [29:47<3:19:35, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5748, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3931, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5748, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3931, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9678, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9042, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3124, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9042, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3124, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 116/890 [30:02<3:18:45, 15.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5790, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2628, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5790, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2628, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8418, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5797, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3178, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5797, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3178, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 117/890 [30:17<3:17:34, 15.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.2376, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.2376, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1425, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6350, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3373, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6350, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3373, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 118/890 [30:33<3:17:23, 15.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3470, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3470, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1268, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3329, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3329, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 119/890 [30:48<3:17:17, 15.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5821, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4044, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5821, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9865, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3097, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3145, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3097, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3145, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  13%|█▎        | 120/890 [31:03<3:16:39, 15.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3881, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3162, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3881, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3162, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7043, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4649, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3046, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4649, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▎        | 121/890 [31:19<3:16:06, 15.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2495, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2495, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9639, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3869, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3296, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3869, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3296, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▎        | 122/890 [31:34<3:15:48, 15.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7992, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3250, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7992, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3250, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1241, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3113, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3113, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▍        | 123/890 [31:49<3:15:23, 15.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2825, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2825, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3088, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4468, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3453, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4468, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3453, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▍        | 124/890 [32:05<3:16:14, 15.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5830, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5830, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2093, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4234, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3825, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4234, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3825, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▍        | 125/890 [32:20<3:16:46, 15.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6929, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.1784, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6929, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.1784, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8713, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3771, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3771, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▍        | 126/890 [32:36<3:16:02, 15.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8404, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3706, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8404, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3706, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2110, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3143, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3143, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▍        | 127/890 [32:51<3:15:42, 15.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7578, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4204, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7578, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4204, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1782, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5916, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5916, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▍        | 128/890 [33:06<3:15:22, 15.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8646, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3337, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8646, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3337, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1983, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2200, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2200, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  14%|█▍        | 129/890 [33:22<3:13:55, 15.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3307, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3307, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8490, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3782, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3782, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  15%|█▍        | 130/890 [33:37<3:12:57, 15.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4198, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4198, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2182, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9300, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3431, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9300, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3431, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  15%|█▍        | 131/890 [33:52<3:12:08, 15.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4659, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.6285, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4659, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.6285, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0944, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6550, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3088, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6550, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3088, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  15%|█▍        | 132/890 [34:08<3:14:22, 15.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4725, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3680, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4725, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3680, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8405, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7984, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3219, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7984, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3219, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  15%|█▍        | 133/890 [34:23<3:13:30, 15.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9113, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.0391, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9113, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.0391, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9504, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1904, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3977, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1904, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3977, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  15%|█▌        | 134/890 [34:38<3:12:38, 15.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3030, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0142, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6863, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3521, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6863, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3521, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  15%|█▌        | 135/890 [34:53<3:11:50, 15.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3903, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3903, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1567, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6168, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3236, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6168, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3236, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  15%|█▌        | 136/890 [35:09<3:12:17, 15.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3369, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3369, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5370, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3266, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4566, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3266, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4566, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  15%|█▌        | 137/890 [35:24<3:11:53, 15.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5867, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5867, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0903, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3375, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3375, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▌        | 138/890 [35:39<3:11:47, 15.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7326, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3033, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7326, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0359, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3847, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3847, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▌        | 139/890 [35:54<3:11:05, 15.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2184, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5029, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2748, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5029, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2748, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▌        | 140/890 [36:10<3:12:41, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4748, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4748, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0449, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3249, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3249, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▌        | 141/890 [36:25<3:11:24, 15.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2779, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3712, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2779, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3712, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6491, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3127, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3127, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▌        | 142/890 [36:40<3:10:53, 15.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6240, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2771, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6240, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2771, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9011, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3193, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3193, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▌        | 143/890 [36:56<3:10:16, 15.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7111, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7111, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4469, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5531, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2926, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5531, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2926, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▌        | 144/890 [37:11<3:09:20, 15.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6447, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6447, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2132, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7742, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3679, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7742, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3679, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▋        | 145/890 [37:26<3:09:00, 15.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4708, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4708, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2967, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7675, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7474, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7474, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  16%|█▋        | 146/890 [37:41<3:08:23, 15.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3225, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3787, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3225, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3787, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7012, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3279, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3279, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 147/890 [37:56<3:08:15, 15.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3403, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3403, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1655, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5993, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5993, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3002, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 148/890 [38:12<3:09:26, 15.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5432, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4628, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5432, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4628, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0060, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3536, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3536, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 149/890 [38:27<3:08:25, 15.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3980, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3980, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6899, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1014, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3515, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1014, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3515, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 150/890 [38:42<3:08:03, 15.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4924, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4924, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0189, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5455, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3167, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5455, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3167, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 151/890 [38:58<3:08:21, 15.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6534, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2994, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6534, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2994, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9528, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5814, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3303, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5814, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3303, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 152/890 [39:13<3:07:28, 15.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5914, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3093, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5914, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3093, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9007, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6992, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3659, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6992, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3659, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 153/890 [39:28<3:07:15, 15.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5670, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2826, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5670, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2826, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8496, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0459, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4055, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0459, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4055, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 154/890 [39:43<3:07:15, 15.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6425, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6425, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5018, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6108, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3287, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6108, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3287, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  17%|█▋        | 155/890 [39:59<3:07:09, 15.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7243, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7954, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7243, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7954, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5198, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6022, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3138, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6022, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3138, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 156/890 [40:14<3:07:42, 15.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3202, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3202, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8995, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7269, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3151, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7269, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3151, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 157/890 [40:29<3:06:24, 15.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5802, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5802, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9822, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9024, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3339, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9024, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3339, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 158/890 [40:44<3:05:40, 15.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4849, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5407, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4849, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5407, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0256, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3794, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3794, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 159/890 [41:00<3:05:42, 15.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6398, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3562, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6398, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3562, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9960, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3685, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3685, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 160/890 [41:15<3:05:31, 15.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5439, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2632, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5439, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2632, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5242, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4046, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5242, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 161/890 [41:30<3:05:52, 15.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6038, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3017, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6038, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9055, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4501, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3277, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4501, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3277, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 162/890 [41:46<3:05:18, 15.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5734, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2989, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5734, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2989, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8723, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3635, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3635, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 163/890 [42:01<3:05:47, 15.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7355, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3780, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7355, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3780, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1135, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8106, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8106, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3171, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  18%|█▊        | 164/890 [42:17<3:08:40, 15.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7341, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2745, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7341, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2745, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0087, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4452, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4452, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▊        | 165/890 [42:33<3:07:55, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8348, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3220, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8348, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3220, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1568, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2726, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2726, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▊        | 166/890 [42:48<3:06:35, 15.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6735, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7991, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6735, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7991, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4726, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6435, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3086, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6435, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3086, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▉        | 167/890 [43:03<3:05:33, 15.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9636, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3672, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9636, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3672, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3307, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3146, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3146, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▉        | 168/890 [43:18<3:04:48, 15.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.9765, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.9765, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4154, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8146, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2951, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8146, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2951, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▉        | 169/890 [43:34<3:04:13, 15.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9147, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4711, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9147, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4711, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8728, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2994, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8728, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2994, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▉        | 170/890 [43:49<3:05:04, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8957, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8957, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4631, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3589, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3705, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3705, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▉        | 171/890 [44:05<3:04:45, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3492, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3492, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4126, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3287, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3287, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▉        | 172/890 [44:20<3:05:16, 15.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7282, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4338, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7282, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4338, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1620, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2675, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3539, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2675, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3539, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  19%|█▉        | 173/890 [44:36<3:05:14, 15.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8322, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4554, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8322, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4554, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2877, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4325, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4325, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|█▉        | 174/890 [44:51<3:04:01, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8140, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3140, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8140, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3140, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1279, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4016, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3556, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4016, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3556, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|█▉        | 175/890 [45:07<3:03:30, 15.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6030, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3592, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6030, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3592, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9622, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5930, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3475, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5930, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3475, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|█▉        | 176/890 [45:22<3:02:13, 15.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1339, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.3416, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1339, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.3416, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4755, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3800, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3800, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|█▉        | 177/890 [45:37<3:01:42, 15.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9594, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2889, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9594, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2889, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2483, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6853, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2887, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6853, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2887, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|██        | 178/890 [45:53<3:02:56, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3316, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3316, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0306, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3885, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3885, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|██        | 179/890 [46:08<3:01:53, 15.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2277, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2603, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2277, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2603, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4879, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3451, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3451, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|██        | 180/890 [46:23<3:02:08, 15.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5426, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3982, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5426, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3982, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9408, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9316, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2875, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9316, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2875, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|██        | 181/890 [46:39<3:02:59, 15.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1432, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3709, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1432, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3709, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3293, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3293, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  20%|██        | 182/890 [46:54<3:01:42, 15.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3808, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3808, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9580, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5515, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3085, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5515, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3085, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██        | 183/890 [47:09<3:00:57, 15.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4525, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4221, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4525, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4221, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8746, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0059, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0059, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2967, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██        | 184/890 [47:25<3:01:47, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6392, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5411, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6392, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5411, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1803, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7978, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3653, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7978, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3653, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██        | 185/890 [47:41<3:02:47, 15.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2775, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2775, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0528, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8436, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3706, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8436, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3706, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██        | 186/890 [47:57<3:04:27, 15.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5965, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2791, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5965, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2791, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8756, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8104, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3755, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8104, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3755, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██        | 187/890 [48:12<3:02:56, 15.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3597, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3597, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3291, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3291, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██        | 188/890 [48:29<3:05:08, 15.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2682, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8653, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4615, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3792, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4615, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3792, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██        | 189/890 [48:44<3:04:18, 15.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1725, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3413, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1725, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3413, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5138, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8028, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2963, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8028, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2963, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██▏       | 190/890 [49:00<3:02:45, 15.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6886, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8868, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6886, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8868, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5753, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7606, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3150, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7606, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3150, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  21%|██▏       | 191/890 [49:15<3:01:12, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2682, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7537, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3392, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3392, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 192/890 [49:31<3:01:45, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3671, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3868, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3671, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3868, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7539, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4199, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4199, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 193/890 [49:46<3:00:32, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6611, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4059, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6611, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0670, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8501, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2993, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8501, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2993, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 194/890 [50:02<3:01:35, 15.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1930, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3085, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1930, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3085, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5015, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7522, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2780, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7522, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2780, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 195/890 [50:18<3:01:29, 15.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3747, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3747, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1027, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7931, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3675, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7931, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3675, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 196/890 [50:33<3:00:38, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3785, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3785, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0173, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6381, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3473, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6381, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3473, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 197/890 [50:49<2:59:48, 15.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5816, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2720, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5816, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2720, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8536, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6210, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3019, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6210, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 198/890 [51:04<2:59:10, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8230, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3954, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8230, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3954, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2184, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3508, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3508, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 199/890 [51:20<2:58:21, 15.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8748, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8748, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3682, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2430, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7080, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3608, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7080, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3608, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  22%|██▏       | 200/890 [51:35<2:58:24, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8659, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.9087, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8659, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.9087, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7746, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1077, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3922, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1077, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3922, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 201/890 [51:51<2:57:44, 15.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5348, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4693, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5348, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4693, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0041, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3299, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3299, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 202/890 [52:06<2:57:23, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7029, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3445, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7029, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3445, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0474, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3511, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3511, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 203/890 [52:21<2:56:38, 15.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9391, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3134, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9391, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3134, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2525, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7510, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3484, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7510, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3484, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 204/890 [52:37<2:57:41, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5525, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2645, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5525, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2645, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8171, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3083, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3083, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 205/890 [52:52<2:56:35, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3705, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3705, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0202, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2827, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2827, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 206/890 [53:08<2:56:01, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9409, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3414, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9409, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3414, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2823, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7702, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3643, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7702, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3643, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 207/890 [53:23<2:55:12, 15.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5796, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5716, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5796, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5716, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1513, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3138, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3138, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 208/890 [53:39<2:55:01, 15.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5369, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3618, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5369, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3618, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8987, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4194, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3183, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4194, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3183, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  23%|██▎       | 209/890 [53:54<2:54:21, 15.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0879, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3870, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0879, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3870, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4749, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2924, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2924, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▎       | 210/890 [54:10<2:55:18, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5650, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5650, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8787, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8718, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3665, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8718, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3665, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▎       | 211/890 [54:25<2:55:33, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3272, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3272, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0005, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2983, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2983, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▍       | 212/890 [54:41<2:56:19, 15.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3224, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3224, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7461, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5586, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4022, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5586, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▍       | 213/890 [54:56<2:55:30, 15.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4623, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4623, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4800, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0686, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3089, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0686, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3089, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▍       | 214/890 [55:12<2:54:41, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0192, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3910, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0192, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3910, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4102, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7218, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2932, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7218, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2932, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▍       | 215/890 [55:27<2:53:51, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6159, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2859, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6159, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2859, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9018, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6719, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4220, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6719, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4220, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▍       | 216/890 [55:43<2:54:26, 15.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7611, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7611, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0626, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4053, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4053, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▍       | 217/890 [55:58<2:53:57, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9242, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4605, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9242, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4605, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3847, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7710, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3638, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7710, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3638, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  24%|██▍       | 218/890 [56:14<2:53:52, 15.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9115, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2676, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9115, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2676, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1791, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8314, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3542, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8314, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3542, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  25%|██▍       | 219/890 [56:29<2:52:54, 15.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3255, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3255, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6787, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3543, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6787, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3543, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  25%|██▍       | 220/890 [56:45<2:52:24, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3766, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3766, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7681, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  25%|██▍       | 221/890 [57:00<2:51:15, 15.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8687, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3326, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8687, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3326, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2013, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7438, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3263, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7438, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3263, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  25%|██▍       | 222/890 [57:15<2:51:07, 15.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2064, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3193, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2064, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3193, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5257, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3160, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3160, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  25%|██▌       | 223/890 [57:31<2:50:56, 15.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4104, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4539, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4104, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4539, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8643, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3266, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3266, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  25%|██▌       | 224/890 [57:46<2:50:26, 15.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9638, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3707, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3707, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  25%|██▌       | 225/890 [58:01<2:50:23, 15.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4285, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4285, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9537, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4361, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3444, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4361, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3444, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  25%|██▌       | 226/890 [58:16<2:49:41, 15.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7855, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4545, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7855, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4545, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2400, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3546, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3546, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▌       | 227/890 [58:32<2:49:33, 15.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7507, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4722, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7507, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4722, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2229, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8437, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3903, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8437, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3903, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▌       | 228/890 [58:47<2:49:56, 15.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5205, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4874, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5205, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4874, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0079, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0338, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3135, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0338, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3135, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▌       | 229/890 [59:03<2:49:53, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0004, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3200, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0004, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3200, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3203, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5361, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3814, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5361, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3814, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▌       | 230/890 [59:18<2:49:08, 15.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3243, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3243, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7957, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0185, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3452, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0185, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3452, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▌       | 231/890 [59:33<2:48:34, 15.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6167, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3913, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6167, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3913, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0080, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8149, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3959, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8149, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3959, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▌       | 232/890 [59:49<2:47:56, 15.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6668, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3230, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6668, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3230, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9899, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3459, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3459, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▌       | 233/890 [1:00:04<2:48:20, 15.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5007, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2546, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5007, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2546, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7553, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4040, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4040, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▋       | 234/890 [1:00:19<2:47:35, 15.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8916, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3401, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8916, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3401, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2317, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7692, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3150, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7692, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3150, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  26%|██▋       | 235/890 [1:00:35<2:48:04, 15.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3630, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3390, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3630, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3390, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7020, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0146, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3640, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0146, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3640, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 236/890 [1:00:50<2:48:07, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8031, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3393, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8031, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3393, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1424, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6824, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2975, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6824, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2975, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 237/890 [1:01:06<2:48:33, 15.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4551, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4181, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4551, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4181, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8732, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3332, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3332, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 238/890 [1:01:21<2:47:43, 15.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9861, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4774, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9861, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4774, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4634, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3506, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3506, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 239/890 [1:01:37<2:47:28, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4671, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3784, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4671, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3784, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8456, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8557, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3762, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8557, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3762, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 240/890 [1:01:53<2:48:22, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3268, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3268, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0480, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3729, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3729, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 241/890 [1:02:08<2:47:29, 15.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4134, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4134, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8399, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2433, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2736, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2433, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2736, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 242/890 [1:02:24<2:47:47, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6642, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7024, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6642, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3665, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3622, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3622, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 243/890 [1:02:40<2:50:59, 15.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8345, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3795, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8345, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3795, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2140, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3487, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3487, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  27%|██▋       | 244/890 [1:02:56<2:50:48, 15.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4111, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4111, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6892, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2941, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2941, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 245/890 [1:03:12<2:49:49, 15.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8102, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3718, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3718, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 246/890 [1:03:27<2:47:42, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4445, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3046, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4445, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7491, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3600, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3600, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 247/890 [1:03:42<2:46:25, 15.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3155, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3155, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3026, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6843, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3879, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6843, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3879, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 248/890 [1:03:58<2:46:00, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4347, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4347, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8950, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5438, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3110, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5438, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3110, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 249/890 [1:04:13<2:45:08, 15.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3130, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3130, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2869, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2822, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2822, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 250/890 [1:04:28<2:44:08, 15.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.9551, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.9551, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5801, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3343, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3343, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 251/890 [1:04:44<2:43:48, 15.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3409, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3409, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8445, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9479, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3273, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9479, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3273, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 252/890 [1:04:59<2:43:20, 15.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6076, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6076, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9054, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3049, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  28%|██▊       | 253/890 [1:05:15<2:43:42, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4555, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4597, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4555, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4597, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9151, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2739, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2739, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▊       | 254/890 [1:05:30<2:43:27, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7437, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4405, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7437, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4405, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1842, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3131, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3131, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▊       | 255/890 [1:05:45<2:42:44, 15.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3849, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3849, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1155, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3348, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1155, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3348, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▉       | 256/890 [1:06:01<2:42:25, 15.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3940, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1673, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2991, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2991, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▉       | 257/890 [1:06:16<2:43:31, 15.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3161, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3161, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8916, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3243, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3243, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▉       | 258/890 [1:06:32<2:44:02, 15.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5684, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4693, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5684, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4693, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0377, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6020, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3472, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6020, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3472, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▉       | 259/890 [1:06:48<2:43:32, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7161, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3635, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7161, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3635, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0796, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8848, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3193, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8848, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3193, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▉       | 260/890 [1:07:04<2:44:43, 15.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7224, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3482, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7224, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3482, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0706, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4658, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4658, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3860, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▉       | 261/890 [1:07:19<2:43:47, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8861, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3518, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8861, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3518, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2379, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3567, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3567, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  29%|██▉       | 262/890 [1:07:34<2:42:26, 15.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5076, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3146, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5076, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3146, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8223, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4271, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3916, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4271, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3916, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|██▉       | 263/890 [1:07:50<2:41:20, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8999, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2774, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2774, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|██▉       | 264/890 [1:08:05<2:40:56, 15.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8039, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2657, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8039, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2657, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0696, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3239, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3239, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|██▉       | 265/890 [1:08:20<2:40:13, 15.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5776, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2733, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5776, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2733, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8508, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6630, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3135, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6630, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3135, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|██▉       | 266/890 [1:08:36<2:40:18, 15.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0268, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2979, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0268, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2979, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3248, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5447, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2975, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5447, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2975, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|███       | 267/890 [1:08:51<2:40:34, 15.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2479, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3468, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2479, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3468, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5947, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6532, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2762, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6532, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2762, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|███       | 268/890 [1:09:07<2:41:45, 15.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2857, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2857, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9663, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3296, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3296, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|███       | 269/890 [1:09:23<2:40:24, 15.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5560, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5560, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2844, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8404, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3263, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3263, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|███       | 270/890 [1:09:38<2:39:48, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5880, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4076, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5880, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4076, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9956, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3330, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3330, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  30%|███       | 271/890 [1:09:53<2:38:35, 15.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2911, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2911, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8258, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3069, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8258, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3069, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███       | 272/890 [1:10:09<2:39:21, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6369, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2776, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6369, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2776, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9146, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8182, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2705, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8182, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2705, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███       | 273/890 [1:10:24<2:39:31, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9380, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3735, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9380, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3735, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3115, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5148, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3800, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5148, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3800, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███       | 274/890 [1:10:40<2:38:40, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1447, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3636, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1447, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3636, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5084, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1712, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3404, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1712, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3404, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███       | 275/890 [1:10:55<2:38:53, 15.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7912, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5199, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7912, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5199, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3110, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8787, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3653, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8787, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3653, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███       | 276/890 [1:11:11<2:39:21, 15.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8703, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3224, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8703, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3224, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1928, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4086, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3579, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4086, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3579, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███       | 277/890 [1:11:27<2:38:47, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7015, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3466, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7015, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3466, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5804, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2963, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5804, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2963, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███       | 278/890 [1:11:42<2:37:44, 15.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6522, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6522, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5069, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6253, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3527, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6253, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3527, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███▏      | 279/890 [1:11:57<2:36:52, 15.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7981, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3585, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7981, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3585, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1566, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6586, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6586, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2967, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  31%|███▏      | 280/890 [1:12:12<2:36:21, 15.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6374, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2886, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6374, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2886, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9260, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5215, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3824, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5215, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3824, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 281/890 [1:12:28<2:36:00, 15.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5007, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3149, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5007, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3149, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8156, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2947, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2947, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 282/890 [1:12:43<2:36:25, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2970, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2970, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9949, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9510, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3347, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9510, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3347, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 283/890 [1:12:59<2:35:52, 15.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2771, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2771, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9522, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5268, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3220, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5268, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3220, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 284/890 [1:13:14<2:35:40, 15.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4031, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5389, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2889, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2889, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 285/890 [1:13:29<2:34:49, 15.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5052, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2718, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5052, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2718, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7770, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8223, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3082, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8223, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3082, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 286/890 [1:13:45<2:34:30, 15.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3883, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3996, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3883, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3996, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7879, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3629, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3629, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 287/890 [1:14:00<2:33:55, 15.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2225, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3444, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2225, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3444, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5669, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8943, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3383, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8943, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3383, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 288/890 [1:14:16<2:34:40, 15.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5073, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2895, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5073, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2895, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7968, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9185, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3614, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9185, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3614, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  32%|███▏      | 289/890 [1:14:31<2:34:33, 15.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6655, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3049, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6655, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9704, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3173, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3173, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 290/890 [1:14:48<2:37:30, 15.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3135, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3135, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9640, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4557, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3708, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4557, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3708, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 291/890 [1:15:03<2:37:15, 15.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3451, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3451, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8967, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2950, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2950, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 292/890 [1:15:19<2:36:02, 15.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3649, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3649, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0842, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7433, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2958, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7433, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2958, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 293/890 [1:15:34<2:35:51, 15.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0771, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3125, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0771, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3125, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3896, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7473, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3275, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7473, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3275, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 294/890 [1:15:50<2:34:33, 15.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3145, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3145, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2269, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6924, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3355, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6924, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3355, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 295/890 [1:16:05<2:33:23, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2349, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6539, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2349, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6539, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8889, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3058, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3058, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 296/890 [1:16:22<2:36:30, 15.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2848, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2848, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4190, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8345, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2773, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8345, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2773, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 297/890 [1:16:37<2:36:28, 15.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3380, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3380, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1042, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7775, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3354, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7775, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3354, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  33%|███▎      | 298/890 [1:16:53<2:36:21, 15.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3252, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3252, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6058, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5747, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3487, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5747, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3487, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▎      | 299/890 [1:17:09<2:36:10, 15.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9550, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3405, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9550, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3405, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2955, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2884, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2884, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▎      | 300/890 [1:17:25<2:35:17, 15.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4622, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3441, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4622, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3441, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8064, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7443, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3345, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7443, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3345, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▍      | 301/890 [1:17:40<2:34:32, 15.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1007, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3061, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1007, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3061, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4069, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6020, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3191, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6020, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3191, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▍      | 302/890 [1:17:56<2:33:23, 15.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3603, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3603, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0445, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3364, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3364, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▍      | 303/890 [1:18:11<2:32:17, 15.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3113, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3113, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0100, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5654, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3842, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5654, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3842, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▍      | 304/890 [1:18:27<2:32:30, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3322, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3322, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9711, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9590, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3437, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9590, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3437, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▍      | 305/890 [1:18:42<2:31:42, 15.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5056, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5579, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5056, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5579, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0635, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6639, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3465, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6639, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3465, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▍      | 306/890 [1:18:59<2:33:04, 15.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3208, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3208, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9793, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2900, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2900, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  34%|███▍      | 307/890 [1:19:14<2:32:01, 15.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3996, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3996, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8629, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7721, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2678, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7721, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2678, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  35%|███▍      | 308/890 [1:19:30<2:33:18, 15.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2657, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2657, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9565, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3326, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3326, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  35%|███▍      | 309/890 [1:19:46<2:32:19, 15.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7080, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2956, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7080, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2956, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0036, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3444, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3444, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  35%|███▍      | 310/890 [1:20:01<2:30:53, 15.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2977, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2977, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9089, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4917, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2849, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4917, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2849, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  35%|███▍      | 311/890 [1:20:16<2:29:35, 15.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8343, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8343, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1347, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2651, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2651, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  35%|███▌      | 312/890 [1:20:32<2:30:14, 15.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4276, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2982, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4276, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2982, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7258, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3271, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3271, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  35%|███▌      | 313/890 [1:20:48<2:29:23, 15.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6693, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4025, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6693, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0718, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8747, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3735, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8747, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3735, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  35%|███▌      | 314/890 [1:21:03<2:29:54, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9837, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6404, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9837, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6404, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6242, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6566, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3317, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6566, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3317, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  35%|███▌      | 315/890 [1:21:19<2:29:43, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3129, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3129, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3273, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2629, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2629, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▌      | 316/890 [1:21:34<2:28:47, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2798, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8797, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2695, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8797, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2695, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▌      | 317/890 [1:21:50<2:28:02, 15.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5571, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3635, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5571, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3635, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9206, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3057, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▌      | 318/890 [1:22:05<2:27:22, 15.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6136, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3427, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6136, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3427, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9563, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3777, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8171, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3777, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▌      | 319/890 [1:22:21<2:26:53, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2759, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2759, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3315, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9293, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9293, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▌      | 320/890 [1:22:36<2:26:45, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4204, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4204, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5744, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▌      | 321/890 [1:22:52<2:26:45, 15.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5878, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4696, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5878, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4696, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0575, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6364, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2655, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6364, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2655, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▌      | 322/890 [1:23:08<2:28:00, 15.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5312, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3126, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5312, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3126, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8438, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2765, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2765, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▋      | 323/890 [1:23:23<2:28:29, 15.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4586, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4586, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9763, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7466, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2988, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7466, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2988, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  36%|███▋      | 324/890 [1:23:39<2:28:26, 15.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8132, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8132, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6001, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4133, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4439, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3457, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4439, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3457, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 325/890 [1:23:55<2:28:58, 15.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3100, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3100, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8462, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3903, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3322, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3903, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3322, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 326/890 [1:24:11<2:29:05, 15.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3545, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3545, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8491, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6238, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3230, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6238, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3230, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 327/890 [1:24:26<2:27:09, 15.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5227, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5227, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1497, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3171, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 328/890 [1:24:43<2:27:57, 15.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8584, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3821, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8584, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3821, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2406, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9672, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3323, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9672, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3323, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 329/890 [1:24:58<2:27:54, 15.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5052, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2922, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5052, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2922, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7974, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8594, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3258, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8594, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3258, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 330/890 [1:25:14<2:26:36, 15.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5285, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3311, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5285, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3311, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8596, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2923, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2923, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 331/890 [1:25:30<2:26:42, 15.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2862, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2862, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1624, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3562, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3562, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 332/890 [1:25:45<2:26:10, 15.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6113, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3519, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6113, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3519, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9632, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0226, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3093, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0226, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3093, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  37%|███▋      | 333/890 [1:26:01<2:25:04, 15.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6525, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3871, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6525, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3871, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0396, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2969, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2969, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 334/890 [1:26:16<2:23:47, 15.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3180, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3180, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7535, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7020, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3506, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7020, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3506, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 335/890 [1:26:31<2:23:03, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6229, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4458, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6229, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4458, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0687, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3242, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3242, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 336/890 [1:26:47<2:23:35, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3712, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3712, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9449, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7752, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3092, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7752, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3092, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 337/890 [1:27:02<2:22:35, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5725, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3191, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5725, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3191, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8916, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1070, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3440, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1070, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3440, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 338/890 [1:27:18<2:22:48, 15.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7155, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2926, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7155, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2926, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0081, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3311, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3311, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 339/890 [1:27:34<2:23:35, 15.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8548, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8548, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4179, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5206, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2618, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5206, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2618, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 340/890 [1:27:50<2:23:32, 15.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6016, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3821, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6016, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3821, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9837, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1669, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3322, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1669, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3322, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 341/890 [1:28:05<2:22:33, 15.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0976, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3330, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0976, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3330, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4305, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7809, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3025, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7809, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  38%|███▊      | 342/890 [1:28:20<2:21:32, 15.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5391, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4281, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5391, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4281, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9672, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3295, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3295, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▊      | 343/890 [1:28:36<2:20:53, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3969, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3969, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9581, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2509, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2545, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2509, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2545, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▊      | 344/890 [1:28:52<2:21:38, 15.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6804, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3309, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6804, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3309, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0113, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8025, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2959, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8025, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2959, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▉      | 345/890 [1:29:07<2:22:15, 15.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3248, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3248, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9390, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7279, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3060, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7279, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3060, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▉      | 346/890 [1:29:23<2:22:57, 15.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5629, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3229, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5629, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3229, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8126, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2783, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8126, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2783, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▉      | 347/890 [1:29:39<2:22:22, 15.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7487, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3634, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7487, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3634, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1121, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2622, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2622, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▉      | 348/890 [1:29:55<2:22:30, 15.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8274, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3566, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8274, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3566, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1840, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3165, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3165, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▉      | 349/890 [1:30:10<2:20:56, 15.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6542, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3531, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6542, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3531, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0074, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7366, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3585, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7366, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3585, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▉      | 350/890 [1:30:25<2:19:34, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8493, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3079, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8493, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3079, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1573, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2865, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2865, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  39%|███▉      | 351/890 [1:30:41<2:18:43, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5860, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3184, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5860, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3184, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9044, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3299, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3299, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|███▉      | 352/890 [1:30:56<2:19:18, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8086, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3194, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8086, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3194, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1281, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6912, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3067, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6912, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3067, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|███▉      | 353/890 [1:31:12<2:19:33, 15.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3960, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3960, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5894, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9256, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3195, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9256, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3195, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|███▉      | 354/890 [1:31:28<2:21:02, 15.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4738, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4738, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5934, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4693, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2878, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4693, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2878, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|███▉      | 355/890 [1:31:44<2:19:34, 15.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7821, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2699, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7821, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2699, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0520, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4478, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2603, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4478, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2603, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|████      | 356/890 [1:31:59<2:18:28, 15.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3227, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3227, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7371, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7676, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3078, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7676, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3078, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|████      | 357/890 [1:32:15<2:19:25, 15.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7878, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3483, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7878, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3483, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1362, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0848, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3106, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0848, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3106, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|████      | 358/890 [1:32:31<2:18:21, 15.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.9938, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.0425, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9938, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.0425, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(6.0363, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5462, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3357, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5462, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3357, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|████      | 359/890 [1:32:46<2:17:30, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4116, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4116, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3350, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5300, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2962, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5300, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2962, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  40%|████      | 360/890 [1:33:01<2:17:01, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5542, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.0860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5542, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.0860, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6402, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6622, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3156, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6622, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3156, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████      | 361/890 [1:33:17<2:16:30, 15.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7995, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.6217, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7995, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.6217, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4212, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8691, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2918, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8691, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2918, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████      | 362/890 [1:33:33<2:16:52, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5775, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4111, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5775, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4111, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9887, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4916, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3490, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4916, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3490, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████      | 363/890 [1:33:48<2:16:13, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4349, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4349, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5398, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4435, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4435, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████      | 364/890 [1:34:04<2:16:28, 15.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7241, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3427, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7241, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3427, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0668, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3645, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3645, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████      | 365/890 [1:34:19<2:15:42, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2898, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2898, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9558, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4638, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2608, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4638, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2608, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████      | 366/890 [1:34:35<2:15:34, 15.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2763, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2763, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0368, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5345, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3384, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5345, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3384, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████      | 367/890 [1:34:50<2:15:16, 15.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3635, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3635, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1922, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6468, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3077, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6468, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3077, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████▏     | 368/890 [1:35:06<2:14:50, 15.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5587, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2958, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5587, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2958, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8545, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5867, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3813, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5867, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3813, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  41%|████▏     | 369/890 [1:35:21<2:15:46, 15.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5984, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3046, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5984, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9030, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6111, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3362, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6111, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3362, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 370/890 [1:35:37<2:16:08, 15.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3164, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3164, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0500, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3508, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3508, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 371/890 [1:35:53<2:15:54, 15.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4824, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2771, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4824, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2771, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7595, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4889, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3042, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4889, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 372/890 [1:36:09<2:15:57, 15.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0087, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3053, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0087, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3140, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3172, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3172, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 373/890 [1:36:24<2:14:33, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6998, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5310, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6998, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5310, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2308, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3188, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3188, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 374/890 [1:36:39<2:13:05, 15.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2897, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2897, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7634, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2677, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2677, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 375/890 [1:36:55<2:12:37, 15.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4992, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3503, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4992, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3503, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8495, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3242, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3242, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 376/890 [1:37:10<2:12:35, 15.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3897, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3897, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9373, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3300, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3300, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 377/890 [1:37:26<2:12:03, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5127, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5127, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2908, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8035, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3768, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3768, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  42%|████▏     | 378/890 [1:37:41<2:12:00, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4321, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4321, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9464, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4159, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3107, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4159, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3107, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 379/890 [1:37:57<2:12:58, 15.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4542, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5119, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4542, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5119, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9661, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5492, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2976, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5492, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2976, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 380/890 [1:38:13<2:13:18, 15.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3265, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3265, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1845, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2771, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2771, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 381/890 [1:38:29<2:13:07, 15.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7072, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8238, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7072, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8238, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5310, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3658, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3658, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 382/890 [1:38:44<2:11:40, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4992, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3327, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4992, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3327, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8320, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3559, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3559, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 383/890 [1:38:59<2:10:44, 15.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4654, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3446, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4654, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3446, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8100, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6867, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2896, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6867, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2896, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 384/890 [1:39:15<2:11:19, 15.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3443, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7171, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3443, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0614, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 385/890 [1:39:31<2:11:22, 15.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6053, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3113, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6053, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3113, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9167, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3209, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3209, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 386/890 [1:39:46<2:10:14, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5635, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5408, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5635, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5408, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1044, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5429, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5429, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  43%|████▎     | 387/890 [1:40:02<2:10:05, 15.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6803, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6642, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6803, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6642, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3445, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8993, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2847, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8993, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2847, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▎     | 388/890 [1:40:17<2:09:44, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2927, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2927, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3938, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3515, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3515, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▎     | 389/890 [1:40:32<2:08:53, 15.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7301, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3652, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7301, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3652, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0954, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7864, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3158, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7864, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3158, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▍     | 390/890 [1:40:48<2:08:14, 15.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3090, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3090, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8701, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8863, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3155, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8863, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3155, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▍     | 391/890 [1:41:03<2:07:32, 15.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5703, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3167, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5703, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3167, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8870, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3189, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3189, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▍     | 392/890 [1:41:18<2:07:30, 15.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6923, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3945, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6923, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3945, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0869, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4899, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3713, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4899, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3713, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▍     | 393/890 [1:41:33<2:07:05, 15.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3285, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3285, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7116, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2838, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2838, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▍     | 394/890 [1:41:49<2:08:14, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5273, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3377, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5273, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3377, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8650, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7550, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2985, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7550, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2985, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▍     | 395/890 [1:42:05<2:08:17, 15.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3661, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3661, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8142, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3286, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3286, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  44%|████▍     | 396/890 [1:42:21<2:08:14, 15.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9589, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2648, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9589, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2648, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2237, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3024, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  45%|████▍     | 397/890 [1:42:37<2:09:09, 15.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2723, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2723, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8847, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6273, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3056, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6273, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3056, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  45%|████▍     | 398/890 [1:42:52<2:07:52, 15.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.2857, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4040, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.2857, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.6897, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2834, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2834, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  45%|████▍     | 399/890 [1:43:07<2:07:08, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7638, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2686, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7638, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2686, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0324, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8531, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3108, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8531, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3108, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  45%|████▍     | 400/890 [1:43:24<2:08:36, 15.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3367, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3367, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8844, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3624, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3624, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  45%|████▌     | 401/890 [1:43:39<2:08:07, 15.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7055, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7055, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9915, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2913, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2913, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  45%|████▌     | 402/890 [1:43:55<2:07:03, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4907, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3625, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4907, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3625, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8532, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3047, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3047, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  45%|████▌     | 403/890 [1:44:10<2:06:59, 15.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3459, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3459, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5506, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6076, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3775, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6076, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3775, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  45%|████▌     | 404/890 [1:44:26<2:06:52, 15.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1321, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2806, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1321, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2806, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4127, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0457, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3942, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0457, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3942, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▌     | 405/890 [1:44:42<2:06:53, 15.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3352, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3352, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2308, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5872, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3751, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5872, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3751, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▌     | 406/890 [1:44:57<2:06:06, 15.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6416, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2671, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6416, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2671, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9087, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0031, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3379, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0031, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3379, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▌     | 407/890 [1:45:13<2:05:01, 15.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6471, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2989, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6471, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2989, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9460, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6617, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3480, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6617, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3480, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▌     | 408/890 [1:45:28<2:05:04, 15.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4948, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3365, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4948, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3365, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8313, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5116, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2923, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5116, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2923, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▌     | 409/890 [1:45:44<2:04:33, 15.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2873, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2873, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9482, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2699, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2699, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▌     | 410/890 [1:45:59<2:04:02, 15.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6361, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2820, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6361, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2820, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9182, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3321, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3321, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▌     | 411/890 [1:46:15<2:03:12, 15.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2696, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2696, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1085, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7326, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2526, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7326, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2526, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▋     | 412/890 [1:46:31<2:04:21, 15.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0729, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2560, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0729, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2560, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3290, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4058, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2749, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4058, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2749, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  46%|████▋     | 413/890 [1:46:46<2:03:54, 15.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3028, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5630, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2619, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2619, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 414/890 [1:47:01<2:03:13, 15.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2186, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2841, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2186, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2841, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5027, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7475, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3285, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7475, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3285, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 415/890 [1:47:17<2:02:56, 15.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4815, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2695, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4815, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2695, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7510, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7691, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3091, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7691, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3091, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 416/890 [1:47:33<2:04:06, 15.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0787, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3096, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0787, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3096, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3882, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2992, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2992, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 417/890 [1:47:49<2:03:30, 15.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4559, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3289, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4559, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3289, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7848, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2607, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2607, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 418/890 [1:48:05<2:04:52, 15.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6075, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2863, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6075, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2863, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8937, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7853, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3793, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7853, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3793, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 419/890 [1:48:21<2:04:25, 15.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3412, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3412, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6362, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2911, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2911, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 420/890 [1:48:37<2:04:29, 15.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2875, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2875, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0599, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7301, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3685, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7301, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3685, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 421/890 [1:48:52<2:03:15, 15.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5432, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5432, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8569, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2747, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2747, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  47%|████▋     | 422/890 [1:49:08<2:02:03, 15.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6886, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2995, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6886, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2995, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9881, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3289, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3289, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 423/890 [1:49:23<2:01:25, 15.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2878, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2878, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7632, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3315, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3315, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 424/890 [1:49:39<2:00:58, 15.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4398, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4398, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8695, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2126, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2724, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2126, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2724, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 425/890 [1:49:54<2:01:10, 15.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6581, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3170, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6581, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3170, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9751, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8037, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8037, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 426/890 [1:50:10<2:00:47, 15.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5884, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2426, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5884, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2426, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8309, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2982, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2982, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 427/890 [1:50:26<2:01:05, 15.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6013, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2598, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6013, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2598, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8611, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3652, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3042, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3652, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 428/890 [1:50:42<2:01:57, 15.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4530, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4530, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4621, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3105, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3105, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 429/890 [1:50:58<2:01:24, 15.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0789, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3082, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0789, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3082, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3872, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3138, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3138, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 430/890 [1:51:13<2:00:47, 15.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2987, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2987, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1773, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3162, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3162, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  48%|████▊     | 431/890 [1:51:29<1:59:50, 15.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5367, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2954, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5367, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2954, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8321, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9456, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3122, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9456, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3122, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▊     | 432/890 [1:51:44<1:59:17, 15.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4714, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4714, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2163, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2878, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2878, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▊     | 433/890 [1:52:00<1:58:38, 15.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9705, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4396, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9705, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4396, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4102, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6930, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3343, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6930, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3343, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▉     | 434/890 [1:52:16<1:59:25, 15.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.8336, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.8336, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.0699, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3095, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3095, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▉     | 435/890 [1:52:32<1:59:45, 15.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5721, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3859, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5721, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3859, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9580, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9470, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3245, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9470, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3245, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▉     | 436/890 [1:52:48<1:59:59, 15.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7718, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7718, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8900, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4205, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3070, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4205, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3070, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▉     | 437/890 [1:53:04<1:59:55, 15.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3885, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3669, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3885, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3669, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7555, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3130, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3130, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▉     | 438/890 [1:53:20<1:59:08, 15.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.0609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6056, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6056, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.6665, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3005, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▉     | 439/890 [1:53:35<1:57:43, 15.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1883, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2814, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1883, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2814, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4698, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9599, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4337, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9599, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4337, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  49%|████▉     | 440/890 [1:53:51<1:58:48, 15.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8866, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6084, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2824, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6084, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2824, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|████▉     | 441/890 [1:54:07<1:58:05, 15.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8779, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2562, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8779, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2562, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1341, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5206, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2672, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5206, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2672, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|████▉     | 442/890 [1:54:23<1:58:06, 15.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2958, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2958, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9558, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1032, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3936, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1032, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3936, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|████▉     | 443/890 [1:54:38<1:57:56, 15.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2917, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2917, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9403, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7190, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3287, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7190, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3287, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|████▉     | 444/890 [1:54:55<1:58:16, 15.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6884, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6884, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2967, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9851, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7522, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3209, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7522, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3209, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|█████     | 445/890 [1:55:10<1:57:15, 15.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7559, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4595, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7559, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4595, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2154, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4250, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4250, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|█████     | 446/890 [1:55:26<1:56:25, 15.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5132, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2933, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5132, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2933, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8065, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3486, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3486, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|█████     | 447/890 [1:55:41<1:55:31, 15.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2904, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2904, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4275, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7238, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3182, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7238, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3182, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|█████     | 448/890 [1:55:57<1:56:18, 15.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3508, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3508, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9070, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7641, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2842, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7641, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2842, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  50%|█████     | 449/890 [1:56:13<1:56:42, 15.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7759, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2587, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7759, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2587, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0346, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5860, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3365, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5860, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3365, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████     | 450/890 [1:56:29<1:56:10, 15.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1957, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3289, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1957, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3289, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5246, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6198, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3257, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6198, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3257, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████     | 451/890 [1:56:45<1:56:08, 15.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9019, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4557, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9019, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4557, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3575, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8015, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4192, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8015, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4192, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████     | 452/890 [1:57:01<1:56:49, 16.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2887, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2887, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5934, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3316, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3316, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████     | 453/890 [1:57:30<2:23:54, 19.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5456, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2776, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5456, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2776, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8232, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3295, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3295, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████     | 454/890 [1:57:46<2:16:09, 18.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4924, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3041, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4924, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3041, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7965, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6583, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3153, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6583, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3153, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████     | 455/890 [1:58:03<2:11:12, 18.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4979, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4979, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3196, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9475, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3216, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9475, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3216, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████     | 456/890 [1:58:20<2:07:50, 17.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1134, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4999, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2924, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4999, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2924, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████▏    | 457/890 [1:58:36<2:04:48, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7686, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3223, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7686, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3223, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0909, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2668, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2668, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  51%|█████▏    | 458/890 [1:58:53<2:03:11, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6644, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3515, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6644, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3515, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0158, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2652, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2652, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 459/890 [1:59:09<2:02:10, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4004, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3799, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4004, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3799, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7802, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0711, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3166, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0711, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3166, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 460/890 [1:59:26<2:01:12, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2844, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8525, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2944, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2944, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 461/890 [1:59:42<1:59:35, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2822, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2822, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7362, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3076, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3076, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 462/890 [1:59:59<1:58:08, 16.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4377, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4148, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4377, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4148, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8525, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3153, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3153, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 463/890 [2:00:15<1:57:18, 16.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7343, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3299, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7343, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3299, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0642, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3136, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3136, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 464/890 [2:00:31<1:56:39, 16.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3837, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3837, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3663, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4195, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4195, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 465/890 [2:00:47<1:56:04, 16.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2806, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2806, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6901, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1815, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3346, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1815, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3346, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 466/890 [2:01:04<1:56:12, 16.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2961, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2961, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7085, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3773, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3773, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  52%|█████▏    | 467/890 [2:01:20<1:55:54, 16.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2744, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2744, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9428, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4376, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3102, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4376, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3102, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 468/890 [2:01:37<1:55:53, 16.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3456, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3456, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1312, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3268, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3268, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 469/890 [2:01:53<1:55:35, 16.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4310, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3907, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4310, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3907, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8217, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3022, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 470/890 [2:02:11<1:56:50, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3331, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3331, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9123, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5458, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2654, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5458, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2654, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 471/890 [2:02:27<1:55:34, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4168, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4168, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8771, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7549, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3337, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7549, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3337, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 472/890 [2:02:43<1:54:39, 16.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5373, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.9225, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5373, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.9225, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4598, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6349, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3396, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6349, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3396, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 473/890 [2:03:00<1:54:21, 16.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5120, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3088, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5120, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3088, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8207, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3177, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3177, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 474/890 [2:03:16<1:54:33, 16.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6412, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6412, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4682, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1094, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7815, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3147, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7815, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3147, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 475/890 [2:03:33<1:54:17, 16.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3907, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3063, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3907, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6970, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2948, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2948, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  53%|█████▎    | 476/890 [2:03:49<1:53:47, 16.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5309, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3101, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5309, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3101, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8410, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6673, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3402, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6673, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3402, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▎    | 477/890 [2:04:06<1:53:50, 16.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3717, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5525, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3717, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5525, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9242, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5960, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3219, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5960, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3219, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▎    | 478/890 [2:04:22<1:52:57, 16.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5900, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4452, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5900, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4452, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0353, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2467, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3095, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2467, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3095, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▍    | 479/890 [2:04:38<1:52:03, 16.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6074, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2870, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6074, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2870, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8943, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3485, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3485, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▍    | 480/890 [2:04:55<1:52:31, 16.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3360, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3360, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7344, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3896, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3383, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3896, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3383, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▍    | 481/890 [2:05:11<1:51:57, 16.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2541, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2541, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7295, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5879, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2984, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5879, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2984, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▍    | 482/890 [2:05:28<1:52:03, 16.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4475, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2610, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4475, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2610, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7085, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2692, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2692, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▍    | 483/890 [2:05:44<1:51:26, 16.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8231, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2720, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8231, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2720, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0951, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6302, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2795, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6302, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2795, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▍    | 484/890 [2:06:01<1:51:15, 16.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7836, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7836, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3171, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1007, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5411, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3364, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5411, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3364, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  54%|█████▍    | 485/890 [2:06:17<1:51:33, 16.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4966, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3186, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4966, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3186, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8152, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6905, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2893, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6905, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2893, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  55%|█████▍    | 486/890 [2:06:34<1:50:29, 16.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3300, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3300, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1357, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9541, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2869, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9541, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2869, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  55%|█████▍    | 487/890 [2:06:50<1:49:50, 16.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2988, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2988, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2325, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4836, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4836, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3682, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  55%|█████▍    | 488/890 [2:07:06<1:49:46, 16.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8893, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3673, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8893, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3673, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2566, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3673, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3673, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  55%|█████▍    | 489/890 [2:07:22<1:48:57, 16.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1726, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2681, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1726, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2681, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4407, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3446, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3446, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  55%|█████▌    | 490/890 [2:07:39<1:49:46, 16.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6482, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3360, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6482, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3360, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9843, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4021, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  55%|█████▌    | 491/890 [2:07:56<1:49:10, 16.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6165, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3418, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6165, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3418, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9584, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3218, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3218, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  55%|█████▌    | 492/890 [2:08:12<1:49:36, 16.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3840, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3840, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0804, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2929, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2929, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  55%|█████▌    | 493/890 [2:08:29<1:49:09, 16.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6530, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3553, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6530, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3553, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0083, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6930, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3674, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6930, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3674, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▌    | 494/890 [2:08:45<1:48:31, 16.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0248, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5093, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0248, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5093, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5341, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4723, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3503, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4723, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3503, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▌    | 495/890 [2:09:01<1:47:51, 16.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2909, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2909, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8856, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2976, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2976, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▌    | 496/890 [2:09:18<1:49:14, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7535, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4272, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7535, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4272, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1808, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1069, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1069, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3003, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▌    | 497/890 [2:09:35<1:48:36, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3312, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3312, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0484, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0325, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3189, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0325, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3189, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▌    | 498/890 [2:09:51<1:47:48, 16.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6209, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6209, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2521, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3085, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3085, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▌    | 499/890 [2:10:08<1:48:31, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3605, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3605, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9359, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3163, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3163, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▌    | 500/890 [2:10:25<1:47:50, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3808, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3808, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9106, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2765, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2765, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▋    | 501/890 [2:10:41<1:47:03, 16.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6074, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4859, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6074, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4859, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0933, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3186, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3186, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  56%|█████▋    | 502/890 [2:10:57<1:46:35, 16.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5042, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2882, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5042, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2882, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7924, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6368, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3381, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6368, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3381, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 503/890 [2:11:14<1:46:21, 16.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5778, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3423, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5778, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3423, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9201, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2985, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2985, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 504/890 [2:11:30<1:45:54, 16.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3517, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3517, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8432, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7409, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3408, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7409, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3408, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 505/890 [2:11:47<1:45:29, 16.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4770, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3351, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4770, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3351, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8121, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6504, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2910, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6504, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2910, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 506/890 [2:12:03<1:45:43, 16.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4622, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8442, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4622, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8442, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3064, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1261, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3343, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1261, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3343, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 507/890 [2:12:20<1:45:38, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4295, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4295, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8647, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5802, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3105, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5802, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3105, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 508/890 [2:12:37<1:46:42, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4721, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.2255, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4721, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.2255, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6976, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.1651, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3358, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.1651, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3358, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 509/890 [2:12:54<1:45:56, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4788, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2674, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4788, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2674, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7462, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3694, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3694, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 510/890 [2:13:10<1:44:56, 16.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3114, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3114, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9512, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6058, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2901, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6058, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2901, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  57%|█████▋    | 511/890 [2:13:27<1:44:17, 16.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7149, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2792, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7149, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2792, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9941, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2577, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2577, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 512/890 [2:13:43<1:44:03, 16.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4421, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3215, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4421, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3215, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7636, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6702, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3232, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6702, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3232, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 513/890 [2:13:59<1:43:37, 16.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3017, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4198, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6023, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2469, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6023, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2469, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 514/890 [2:14:16<1:43:55, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.0158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.9066, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.0158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.9066, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.9224, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4785, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2704, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4785, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2704, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 515/890 [2:14:33<1:43:53, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3239, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3239, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4329, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1676, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3769, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1676, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3769, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 516/890 [2:14:49<1:43:10, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3631, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0909, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2838, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2838, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 517/890 [2:15:06<1:42:44, 16.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4935, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4935, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2540, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2950, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2950, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 518/890 [2:15:22<1:42:20, 16.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6953, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6953, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4908, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1862, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3009, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 519/890 [2:15:39<1:41:45, 16.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5620, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4661, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5620, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4661, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0282, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1164, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2606, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1164, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2606, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  58%|█████▊    | 520/890 [2:15:55<1:42:00, 16.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1055, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3582, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1055, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3582, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4637, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0236, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2735, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0236, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2735, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▊    | 521/890 [2:16:12<1:41:24, 16.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2801, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2801, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7537, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.3339, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3293, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3339, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3293, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▊    | 522/890 [2:16:28<1:40:59, 16.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3162, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3162, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3736, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7623, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5263, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7623, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5263, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▉    | 523/890 [2:16:45<1:41:38, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5688, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3086, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5688, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3086, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8774, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4243, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4243, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▉    | 524/890 [2:17:02<1:41:41, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6568, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2716, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6568, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2716, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9284, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▉    | 525/890 [2:17:18<1:41:01, 16.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9866, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3164, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9866, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3164, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3029, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7930, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2567, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7930, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2567, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▉    | 526/890 [2:17:35<1:40:23, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6528, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3192, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6528, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3192, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9720, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0414, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3376, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0414, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3376, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▉    | 527/890 [2:17:51<1:39:39, 16.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5823, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2622, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5823, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2622, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8445, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7914, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2949, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7914, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2949, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▉    | 528/890 [2:18:09<1:41:11, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8831, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3038, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8831, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1869, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4756, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3601, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4756, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3601, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  59%|█████▉    | 529/890 [2:18:25<1:40:48, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9060, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2839, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9060, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2839, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1899, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4138, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3574, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4138, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3574, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|█████▉    | 530/890 [2:18:42<1:40:24, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5455, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3297, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5455, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3297, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8752, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3153, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3086, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3153, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3086, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|█████▉    | 531/890 [2:18:59<1:40:06, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6717, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3288, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6717, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3288, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0005, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4110, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3229, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4110, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3229, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|█████▉    | 532/890 [2:19:16<1:40:22, 16.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.8450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2608, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2608, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.1057, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3697, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3697, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|█████▉    | 533/890 [2:19:32<1:39:41, 16.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7472, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3585, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7472, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3585, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1057, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3401, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3401, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|██████    | 534/890 [2:19:49<1:38:42, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3370, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3370, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2289, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2832, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2832, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|██████    | 535/890 [2:20:05<1:38:05, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6865, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6504, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6865, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6504, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3369, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3117, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3117, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|██████    | 536/890 [2:20:22<1:37:47, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0907, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5668, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0907, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5668, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6574, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3172, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3172, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|██████    | 537/890 [2:20:38<1:37:19, 16.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5435, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.9099, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5435, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.9099, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4534, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3505, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3505, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  60%|██████    | 538/890 [2:20:55<1:37:21, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7141, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2880, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7141, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2880, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0021, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.2205, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3180, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2205, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3180, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████    | 539/890 [2:21:11<1:37:01, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6569, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5577, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6569, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5577, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2146, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8417, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3419, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8417, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3419, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████    | 540/890 [2:21:29<1:37:39, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3181, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3181, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9717, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2662, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3119, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2662, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3119, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████    | 541/890 [2:21:45<1:37:30, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3473, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3473, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3940, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7413, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1875, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3479, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1875, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3479, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████    | 542/890 [2:22:02<1:36:48, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5799, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3861, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5799, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3861, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9661, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0696, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3533, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0696, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3533, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████    | 543/890 [2:22:18<1:36:02, 16.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1906, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3192, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1906, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3192, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5098, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4361, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2691, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4361, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2691, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████    | 544/890 [2:22:35<1:35:32, 16.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2872, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7751, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2872, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7751, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0623, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5948, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4457, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5948, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4457, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████    | 545/890 [2:22:52<1:35:39, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4314, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2703, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4314, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2703, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7017, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1473, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3152, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1473, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3152, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████▏   | 546/890 [2:23:08<1:35:37, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5729, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3030, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5729, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8759, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0589, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3161, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0589, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3161, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  61%|██████▏   | 547/890 [2:23:25<1:35:58, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6879, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6076, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6879, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6076, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2955, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1608, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3843, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1608, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3843, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 548/890 [2:23:42<1:35:09, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6312, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4371, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6312, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4371, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0683, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8261, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3559, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8261, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3559, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 549/890 [2:23:58<1:34:19, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5426, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4328, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5426, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4328, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9754, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3645, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3645, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 550/890 [2:24:14<1:33:28, 16.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6319, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5467, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6319, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5467, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1786, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8776, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3271, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8776, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3271, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 551/890 [2:24:31<1:33:31, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3130, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3130, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2157, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6355, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3410, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6355, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3410, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 552/890 [2:24:48<1:33:15, 16.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3023, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5302, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3155, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3155, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 553/890 [2:25:04<1:32:41, 16.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5084, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5084, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3069, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2771, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2771, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 554/890 [2:25:21<1:32:32, 16.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8543, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4571, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8543, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4571, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3114, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3088, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3088, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 555/890 [2:25:37<1:32:22, 16.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7881, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2988, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7881, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2988, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0869, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2808, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2808, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  62%|██████▏   | 556/890 [2:25:54<1:32:04, 16.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3887, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2762, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3887, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2762, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6649, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7817, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3599, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7817, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3599, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 557/890 [2:26:11<1:32:37, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3215, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3215, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9597, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2961, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2961, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 558/890 [2:26:28<1:32:59, 16.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4001, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0212, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8738, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2980, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8738, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2980, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 559/890 [2:26:44<1:31:56, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4389, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4389, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9245, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2749, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2749, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 560/890 [2:27:01<1:31:26, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4306, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4306, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0145, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8018, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3281, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8018, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3281, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 561/890 [2:27:17<1:31:12, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7921, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3155, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7921, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3155, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1076, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3189, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3189, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 562/890 [2:27:35<1:31:41, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6154, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3159, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6154, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3159, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9314, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7498, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3512, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7498, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3512, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 563/890 [2:27:51<1:30:55, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4432, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3087, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4432, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3087, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7518, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3105, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3105, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 564/890 [2:28:08<1:30:51, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7601, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3820, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7601, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3820, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1422, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8425, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3065, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8425, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3065, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  63%|██████▎   | 565/890 [2:28:25<1:31:27, 16.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3619, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3619, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1788, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9554, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2904, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9554, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2904, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▎   | 566/890 [2:28:42<1:30:30, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1375, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3931, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1375, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3931, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5307, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4853, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4028, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4853, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▎   | 567/890 [2:28:58<1:29:38, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9615, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3661, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9615, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3661, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3276, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1539, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3431, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1539, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3431, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▍   | 568/890 [2:29:15<1:29:11, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5004, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4789, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5004, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4789, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9792, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4404, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4280, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4404, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4280, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▍   | 569/890 [2:29:31<1:28:36, 16.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3616, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3616, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9183, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7493, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3028, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7493, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▍   | 570/890 [2:29:48<1:28:47, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6896, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5512, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6896, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5512, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2409, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6943, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3629, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6943, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3629, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▍   | 571/890 [2:30:05<1:28:43, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3728, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3728, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0911, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3003, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▍   | 572/890 [2:30:21<1:28:35, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0715, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3157, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0715, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3157, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3872, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5234, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5234, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3631, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▍   | 573/890 [2:30:38<1:28:18, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6458, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5765, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6458, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5765, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2223, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7925, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2724, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7925, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2724, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  64%|██████▍   | 574/890 [2:30:55<1:27:37, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4131, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4131, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3125, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3125, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  65%|██████▍   | 575/890 [2:31:11<1:26:51, 16.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1137, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1137, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5171, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6308, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6759, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3106, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6759, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3106, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  65%|██████▍   | 576/890 [2:31:27<1:26:33, 16.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2699, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2699, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1055, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6395, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3131, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6395, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3131, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  65%|██████▍   | 577/890 [2:31:44<1:25:52, 16.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8390, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8390, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5317, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0334, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3061, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0334, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3061, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  65%|██████▍   | 578/890 [2:32:01<1:26:32, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5083, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5652, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5083, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5652, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0735, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8300, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2707, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8300, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2707, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  65%|██████▌   | 579/890 [2:32:17<1:26:23, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9486, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3733, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3733, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  65%|██████▌   | 580/890 [2:32:34<1:26:29, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7051, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2789, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7051, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2789, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9840, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2966, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2966, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3009, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  65%|██████▌   | 581/890 [2:32:51<1:25:49, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4467, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.9196, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4467, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.9196, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.3664, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3962, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3962, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  65%|██████▌   | 582/890 [2:33:07<1:25:12, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5961, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4642, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5961, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4642, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0603, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3203, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3203, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▌   | 583/890 [2:33:24<1:24:51, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5986, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2954, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5986, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2954, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8940, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6072, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3133, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6072, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3133, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▌   | 584/890 [2:33:40<1:24:12, 16.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6648, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3078, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6648, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3078, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9726, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2755, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2755, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▌   | 585/890 [2:33:57<1:23:49, 16.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8626, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3877, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8626, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3877, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2502, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3520, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3520, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▌   | 586/890 [2:34:13<1:24:03, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4223, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4223, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3456, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6406, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3442, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6406, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3442, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▌   | 587/890 [2:34:30<1:23:48, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0077, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6407, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2857, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6407, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2857, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▌   | 588/890 [2:34:47<1:23:37, 16.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1876, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2754, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1876, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2754, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4630, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7843, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2761, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7843, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2761, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▌   | 589/890 [2:35:03<1:23:15, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4067, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4067, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0988, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5022, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3762, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5022, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3762, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▋   | 590/890 [2:35:20<1:23:08, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4171, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1781, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5846, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3097, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5846, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3097, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  66%|██████▋   | 591/890 [2:35:37<1:23:10, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7559, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3123, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7559, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3123, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0682, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8866, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3431, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8866, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3431, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 592/890 [2:35:53<1:22:38, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3051, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8388, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3846, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3846, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 593/890 [2:36:10<1:22:28, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3178, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3178, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4834, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8257, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3118, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8257, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3118, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 594/890 [2:36:27<1:22:30, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6807, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3552, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6807, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3552, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0359, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3476, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3476, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 595/890 [2:36:44<1:22:24, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4401, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3316, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4401, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3316, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7717, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3470, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3470, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 596/890 [2:37:00<1:21:51, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4213, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4213, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9214, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3664, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3664, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 597/890 [2:37:17<1:22:00, 16.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9646, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3138, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9646, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3138, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2784, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2821, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2821, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 598/890 [2:37:34<1:21:28, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2940, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8981, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2949, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2949, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 599/890 [2:37:50<1:20:49, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7691, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6790, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7691, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6790, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3690, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3690, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  67%|██████▋   | 600/890 [2:38:08<1:21:14, 16.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4238, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4238, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9282, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3867, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3867, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 601/890 [2:38:24<1:20:40, 16.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3066, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3066, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9900, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7415, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2999, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7415, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2999, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 602/890 [2:38:41<1:20:18, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3388, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3388, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1781, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2780, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2780, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 603/890 [2:38:58<1:20:03, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3145, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3145, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9624, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2964, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2964, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 604/890 [2:39:15<1:20:39, 16.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1052, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2791, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1052, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2791, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3843, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3396, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3396, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 605/890 [2:39:32<1:20:09, 16.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3477, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3477, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4276, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3029, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 606/890 [2:39:48<1:19:21, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2832, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3801, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2832, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3801, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6633, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3406, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3406, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 607/890 [2:40:05<1:18:49, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4147, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3032, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4147, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7179, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3864, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3864, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 608/890 [2:40:21<1:18:17, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3339, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3339, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2456, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3216, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3216, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  68%|██████▊   | 609/890 [2:40:38<1:17:59, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4912, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2881, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4912, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2881, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7793, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3757, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3757, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▊   | 610/890 [2:40:55<1:18:06, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4832, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2784, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4832, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2784, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7616, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0351, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3372, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0351, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3372, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▊   | 611/890 [2:41:12<1:18:07, 16.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5812, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3168, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5812, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3168, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8980, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7051, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3414, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7051, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3414, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▉   | 612/890 [2:41:28<1:17:24, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8231, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7643, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8231, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7643, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5874, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2931, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2931, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▉   | 613/890 [2:41:45<1:17:02, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4124, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4124, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0267, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8853, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2624, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8853, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2624, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▉   | 614/890 [2:42:02<1:16:42, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3600, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3600, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.0045, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7038, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3335, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7038, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3335, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▉   | 615/890 [2:42:18<1:16:14, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8815, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4981, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8815, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4981, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3795, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8587, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2481, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8587, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2481, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▉   | 616/890 [2:42:35<1:15:42, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3063, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8048, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5133, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2685, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5133, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2685, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▉   | 617/890 [2:42:52<1:15:48, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6140, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4500, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6140, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4500, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0641, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3592, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3592, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  69%|██████▉   | 618/890 [2:43:08<1:15:42, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5015, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4680, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5015, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4680, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9695, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7215, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3183, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7215, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3183, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|██████▉   | 619/890 [2:43:25<1:15:24, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2961, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3311, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2961, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3311, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6272, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7558, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2808, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7558, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2808, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|██████▉   | 620/890 [2:43:42<1:15:44, 16.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4816, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4816, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3928, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3074, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|██████▉   | 621/890 [2:43:59<1:15:08, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5279, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2923, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5279, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2923, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8202, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9415, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2633, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9415, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2633, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|██████▉   | 622/890 [2:44:15<1:14:25, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.9910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3755, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3755, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.3665, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2997, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2997, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|███████   | 623/890 [2:44:32<1:13:50, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7084, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2994, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7084, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2994, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0079, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|███████   | 624/890 [2:44:48<1:13:31, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3938, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4324, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3938, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4324, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8262, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8104, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3900, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8104, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3900, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|███████   | 625/890 [2:45:05<1:13:25, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7699, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4252, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7699, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4252, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1950, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7219, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2743, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7219, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2743, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|███████   | 626/890 [2:45:21<1:12:51, 16.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5069, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5069, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4072, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2990, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2990, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  70%|███████   | 627/890 [2:45:39<1:13:35, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5470, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5413, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5470, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5413, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0883, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8326, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2571, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8326, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2571, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████   | 628/890 [2:45:55<1:13:14, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5788, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3106, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5788, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3106, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8894, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8883, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2837, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8883, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2837, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████   | 629/890 [2:46:12<1:12:47, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3457, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3454, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3457, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3454, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6911, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2981, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2981, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████   | 630/890 [2:46:28<1:12:03, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4317, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2788, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4317, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2788, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7105, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3099, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3099, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████   | 631/890 [2:46:45<1:11:36, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8557, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3493, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8557, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3493, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4277, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3257, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4277, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3257, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████   | 632/890 [2:47:02<1:11:50, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0542, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3834, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0542, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3834, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4376, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2983, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2983, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████   | 633/890 [2:47:19<1:11:34, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4197, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4197, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1713, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3775, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3197, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3775, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3197, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████   | 634/890 [2:47:35<1:11:25, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7364, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3575, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7364, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3575, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0939, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9549, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3312, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9549, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3312, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████▏  | 635/890 [2:47:53<1:11:37, 16.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8076, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2776, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8076, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2776, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0852, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8545, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3566, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8545, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3566, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  71%|███████▏  | 636/890 [2:48:09<1:11:31, 16.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4037, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8398, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9875, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2869, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9875, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2869, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 637/890 [2:48:26<1:11:14, 16.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3178, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3064, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3178, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6242, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5167, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2661, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5167, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2661, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 638/890 [2:48:43<1:10:22, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6140, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3259, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6140, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3259, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9399, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3052, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 639/890 [2:48:59<1:09:45, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5693, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3998, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5693, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3998, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9691, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3317, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3317, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 640/890 [2:49:16<1:09:32, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3125, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3125, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1298, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6229, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3526, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6229, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3526, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 641/890 [2:49:33<1:09:04, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0675, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6353, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0675, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6353, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7028, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6116, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3160, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6116, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3160, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 642/890 [2:49:50<1:09:51, 16.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2799, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2799, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4776, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2916, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2916, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 643/890 [2:50:07<1:09:29, 16.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0438, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3161, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0438, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3161, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3599, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3385, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3385, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 644/890 [2:50:23<1:08:45, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8646, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4426, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8646, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4426, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6680, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2611, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6680, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2611, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  72%|███████▏  | 645/890 [2:50:40<1:08:21, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8658, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3606, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8658, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3606, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2264, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5606, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5606, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 646/890 [2:50:57<1:07:55, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3711, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3768, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3711, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3768, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7479, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4061, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4061, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 647/890 [2:51:13<1:07:31, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9820, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7565, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9820, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7565, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7385, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3017, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 648/890 [2:51:30<1:07:32, 16.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9417, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3259, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9417, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3259, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2676, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7965, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2795, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7965, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2795, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 649/890 [2:51:47<1:07:36, 16.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2675, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2475, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2675, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2475, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5151, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3474, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3474, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 650/890 [2:52:04<1:07:12, 16.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5028, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6265, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7611, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3174, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7611, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3174, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 651/890 [2:52:21<1:07:08, 16.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2198, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2877, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2198, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2877, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5075, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3198, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3198, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 652/890 [2:52:38<1:07:33, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3468, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3468, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2409, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2779, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2779, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 653/890 [2:52:55<1:07:08, 17.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4581, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3548, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4581, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3548, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8130, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5797, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3294, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5797, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3294, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  73%|███████▎  | 654/890 [2:53:12<1:06:14, 16.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3418, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3418, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0699, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3220, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3220, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▎  | 655/890 [2:53:28<1:05:44, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7652, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3038, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7652, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0690, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2868, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2868, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▎  | 656/890 [2:53:45<1:05:28, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7110, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3253, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7110, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3253, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0363, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2862, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2862, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▍  | 657/890 [2:54:02<1:05:12, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1667, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3489, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1667, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3489, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5156, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3330, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3330, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▍  | 658/890 [2:54:19<1:05:09, 16.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5367, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5367, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8979, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3116, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3116, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▍  | 659/890 [2:54:36<1:05:03, 16.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3375, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3025, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3375, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6400, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3184, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3184, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▍  | 660/890 [2:54:53<1:04:59, 16.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2488, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3083, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2488, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3083, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5571, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2730, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2730, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▍  | 661/890 [2:55:10<1:04:45, 16.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8214, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3901, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8214, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3901, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2116, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4390, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2571, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4390, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2571, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▍  | 662/890 [2:55:27<1:04:09, 16.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3197, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3197, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3209, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7929, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3107, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7929, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3107, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  74%|███████▍  | 663/890 [2:55:44<1:03:39, 16.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5728, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3061, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5728, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3061, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8789, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3023, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  75%|███████▍  | 664/890 [2:56:00<1:03:09, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0198, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0198, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3205, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2848, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2848, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  75%|███████▍  | 665/890 [2:56:17<1:02:58, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2930, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2930, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0495, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3017, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  75%|███████▍  | 666/890 [2:56:34<1:03:12, 16.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.0227, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.0227, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.9155, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3127, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3127, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  75%|███████▍  | 667/890 [2:56:51<1:03:01, 16.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3084, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3084, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1895, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9095, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3384, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9095, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3384, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  75%|███████▌  | 668/890 [2:57:08<1:02:35, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2910, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2910, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0599, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4659, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3778, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4659, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3778, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  75%|███████▌  | 669/890 [2:57:25<1:02:06, 16.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2689, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2689, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1678, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3238, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3238, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  75%|███████▌  | 670/890 [2:57:41<1:01:33, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5483, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2763, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5483, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2763, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8246, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4587, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3629, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4587, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3629, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  75%|███████▌  | 671/890 [2:57:58<1:01:08, 16.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3428, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3428, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1981, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4384, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2762, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4384, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2762, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▌  | 672/890 [2:58:15<1:01:07, 16.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3043, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3744, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3498, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3498, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▌  | 673/890 [2:58:32<1:00:47, 16.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2630, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3537, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2630, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3537, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6167, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2963, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2963, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▌  | 674/890 [2:58:49<1:00:38, 16.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1677, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6083, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1677, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6083, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7761, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5682, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2608, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5682, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2608, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▌  | 675/890 [2:59:06<1:00:15, 16.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5992, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6706, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5992, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6706, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2697, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6213, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3280, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6213, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3280, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▌  | 676/890 [2:59:23<1:00:15, 16.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2936, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2936, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1271, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6282, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3447, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6282, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3447, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▌  | 677/890 [2:59:40<1:00:15, 16.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2940, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5853, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2581, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3075, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2581, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3075, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▌  | 678/890 [2:59:56<59:25, 16.82s/batch]  

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2772, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2772, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1524, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6824, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3110, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6824, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3110, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▋  | 679/890 [3:00:13<58:35, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9269, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3770, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9269, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3770, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3039, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2957, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2957, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  76%|███████▋  | 680/890 [3:00:29<58:12, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3221, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3221, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7257, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3323, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3323, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 681/890 [3:00:46<57:55, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4187, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4187, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4155, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5577, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3565, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5577, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3565, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 682/890 [3:01:02<57:31, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3552, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3552, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3344, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1965, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2889, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1965, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2889, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 683/890 [3:01:19<57:08, 16.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4058, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5446, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4058, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5446, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9504, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3578, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3578, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 684/890 [3:01:36<57:15, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0187, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2911, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0187, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2911, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3098, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2940, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 685/890 [3:01:52<56:50, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3171, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2692, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2999, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2999, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 686/890 [3:02:09<56:15, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5560, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2974, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5560, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2974, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8535, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3230, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3230, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 687/890 [3:02:25<55:55, 16.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3732, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3732, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8627, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6525, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3395, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6525, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3395, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 688/890 [3:02:42<56:30, 16.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6135, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3150, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6135, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3150, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9286, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5285, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5285, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3009, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  77%|███████▋  | 689/890 [3:02:59<55:55, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3193, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3193, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8172, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3267, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3267, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 690/890 [3:03:15<55:32, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3580, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3580, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1185, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2631, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 691/890 [3:03:32<55:31, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6816, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3692, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6816, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3692, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0508, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4401, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3541, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4401, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3541, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 692/890 [3:03:49<54:56, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3580, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3580, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0166, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5551, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2737, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5551, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2737, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 693/890 [3:04:06<54:53, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5363, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5363, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1222, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3378, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3378, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 694/890 [3:04:22<54:22, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2866, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2866, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9135, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3491, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3491, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 695/890 [3:04:39<53:54, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0103, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0103, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4118, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7791, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3051, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7791, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3051, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 696/890 [3:04:56<54:24, 16.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5756, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3215, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5756, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3215, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8971, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3263, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3263, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 697/890 [3:05:13<53:52, 16.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4819, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3515, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4819, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3515, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8334, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3313, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3313, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  78%|███████▊  | 698/890 [3:05:29<53:27, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0785, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3219, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0785, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3219, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4004, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4694, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3419, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4694, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3419, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▊  | 699/890 [3:05:46<53:21, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2501, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7161, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2501, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7161, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9663, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7552, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2815, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7552, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2815, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▊  | 700/890 [3:06:03<53:00, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4901, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4901, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4388, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2895, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2895, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▉  | 701/890 [3:06:19<52:32, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7545, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2622, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7545, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2622, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0167, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3004, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▉  | 702/890 [3:06:36<52:17, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5487, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5162, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5487, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5162, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0649, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7023, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3125, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7023, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3125, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▉  | 703/890 [3:06:53<51:49, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7059, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3992, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7059, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3992, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1051, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2866, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2866, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▉  | 704/890 [3:07:09<51:40, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2753, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2753, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7123, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2723, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2723, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▉  | 705/890 [3:07:26<51:13, 16.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5768, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3874, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5768, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3874, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9642, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8532, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3175, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8532, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3175, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▉  | 706/890 [3:07:43<51:08, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3629, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5583, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3629, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5583, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9212, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3283, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3283, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  79%|███████▉  | 707/890 [3:07:59<50:54, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2948, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2948, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9301, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5865, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2959, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5865, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2959, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|███████▉  | 708/890 [3:08:16<50:58, 16.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7372, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4879, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7372, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4879, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2251, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7317, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3288, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7317, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3288, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|███████▉  | 709/890 [3:08:33<50:33, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6030, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3061, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6030, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3061, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9090, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2913, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2913, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|███████▉  | 710/890 [3:08:49<49:59, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4134, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3107, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4134, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3107, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7242, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3071, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3039, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3071, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|███████▉  | 711/890 [3:09:06<49:35, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5936, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5936, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1377, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5569, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3581, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5569, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3581, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|████████  | 712/890 [3:09:23<49:19, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4890, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3654, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4890, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3654, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8544, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9468, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3168, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9468, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3168, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|████████  | 713/890 [3:09:39<49:01, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4832, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3339, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4832, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3339, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8171, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3272, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3272, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|████████  | 714/890 [3:09:56<49:08, 16.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3342, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3342, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6436, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4925, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3211, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4925, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3211, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|████████  | 715/890 [3:10:13<48:46, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5551, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2890, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5551, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2890, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8441, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8945, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2968, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8945, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2968, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  80%|████████  | 716/890 [3:10:29<48:15, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3200, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3200, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2210, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7831, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3184, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7831, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3184, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████  | 717/890 [3:10:46<48:00, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3320, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3320, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7449, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6637, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2450, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6637, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2450, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████  | 718/890 [3:11:03<47:38, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6706, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5941, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6706, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5941, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2647, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3996, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2688, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3996, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2688, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████  | 719/890 [3:11:19<47:07, 16.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6688, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3878, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6688, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3878, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0566, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2613, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2613, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████  | 720/890 [3:11:36<47:12, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5337, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5337, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4184, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3037, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████  | 721/890 [3:11:52<46:45, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3230, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3230, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8127, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7522, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2723, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7522, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2723, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████  | 722/890 [3:12:10<46:58, 16.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4140, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4140, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1840, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5953, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2888, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5953, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2888, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████  | 723/890 [3:12:26<46:30, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7425, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3717, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7425, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3717, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3039, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████▏ | 724/890 [3:12:43<45:59, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3249, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3249, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2617, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2617, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  81%|████████▏ | 725/890 [3:12:59<45:38, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6467, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4402, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6467, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4402, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0869, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5410, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2755, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5410, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2755, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 726/890 [3:13:15<45:05, 16.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3741, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3741, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8430, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7326, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2953, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7326, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2953, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 727/890 [3:13:32<44:47, 16.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3395, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3395, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9345, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7622, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2529, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7622, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2529, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 728/890 [3:13:48<44:30, 16.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5515, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3153, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5515, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3153, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8668, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6711, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2852, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6711, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2852, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 729/890 [3:14:05<44:13, 16.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5401, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3032, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5401, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8433, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6884, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3310, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6884, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3310, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 730/890 [3:14:21<44:04, 16.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4316, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4316, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1350, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8198, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8198, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2631, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 731/890 [3:14:38<43:44, 16.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3216, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3216, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0494, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7384, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2855, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7384, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2855, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 732/890 [3:14:54<43:25, 16.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2896, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2896, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6997, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4416, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3397, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4416, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3397, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 733/890 [3:15:11<43:13, 16.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6132, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4531, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6132, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4531, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0663, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3163, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3163, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  82%|████████▏ | 734/890 [3:15:27<42:48, 16.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5827, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3729, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5827, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3729, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9556, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3347, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2865, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3347, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2865, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 735/890 [3:15:44<42:30, 16.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6182, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2889, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6182, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2889, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9070, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3128, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3128, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 736/890 [3:16:01<42:32, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8870, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8870, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3979, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5625, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3962, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5625, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3962, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 737/890 [3:16:17<42:14, 16.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.1962, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.1962, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.9403, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8260, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3139, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8260, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3139, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 738/890 [3:16:33<41:48, 16.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7577, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3768, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7577, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3768, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1345, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0223, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2764, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0223, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2764, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 739/890 [3:16:50<41:55, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9131, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3219, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9131, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3219, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2349, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5038, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2626, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5038, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2626, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 740/890 [3:17:07<41:28, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7196, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3138, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7196, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3138, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0334, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3009, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 741/890 [3:17:23<41:09, 16.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3758, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3302, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3758, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3302, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7059, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3173, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3173, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 742/890 [3:17:40<40:39, 16.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4326, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4326, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0123, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7225, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3502, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7225, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3502, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  83%|████████▎ | 743/890 [3:17:56<40:21, 16.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6387, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2910, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6387, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2910, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9297, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6013, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3420, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6013, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3420, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▎ | 744/890 [3:18:13<40:03, 16.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5186, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2757, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5186, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2757, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7943, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7577, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3135, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7577, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3135, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▎ | 745/890 [3:18:29<39:52, 16.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5238, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3849, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5238, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3849, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9088, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1963, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3313, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1963, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3313, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▍ | 746/890 [3:18:46<39:47, 16.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2658, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2658, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7831, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7590, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3227, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7590, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3227, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▍ | 747/890 [3:19:03<39:32, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5455, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2858, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5455, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2858, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8313, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5288, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3067, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5288, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3067, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▍ | 748/890 [3:19:19<39:15, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.0907, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.0907, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6084, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1568, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3810, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1568, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3810, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▍ | 749/890 [3:19:36<38:55, 16.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3523, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3523, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9971, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7832, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3039, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7832, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▍ | 750/890 [3:19:52<38:34, 16.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5771, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5750, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5771, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5750, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1520, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9945, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2766, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9945, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2766, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▍ | 751/890 [3:20:08<38:11, 16.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5597, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3388, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5597, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3388, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8984, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1608, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2614, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1608, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2614, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  84%|████████▍ | 752/890 [3:20:25<38:04, 16.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5412, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3889, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5412, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3889, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9301, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8447, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2889, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8447, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2889, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  85%|████████▍ | 753/890 [3:20:42<37:43, 16.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9365, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7880, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9365, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7880, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7244, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7165, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3033, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7165, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  85%|████████▍ | 754/890 [3:20:58<37:34, 16.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3052, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8785, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3258, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3258, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  85%|████████▍ | 755/890 [3:21:15<37:20, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5137, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3602, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5137, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3602, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8738, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2616, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2616, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  85%|████████▍ | 756/890 [3:21:31<36:57, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4115, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3598, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4115, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3598, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7712, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6161, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3142, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6161, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3142, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  85%|████████▌ | 757/890 [3:21:48<36:50, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3059, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2232, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2617, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2617, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  85%|████████▌ | 758/890 [3:22:05<36:33, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5230, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3359, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5230, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3359, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8589, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2840, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2840, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  85%|████████▌ | 759/890 [3:22:21<36:07, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7787, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5219, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7787, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5219, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3006, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6752, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2624, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6752, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2624, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  85%|████████▌ | 760/890 [3:22:38<36:03, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0344, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5086, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0344, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5086, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5430, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3068, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3068, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▌ | 761/890 [3:22:55<35:54, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5329, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5329, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8962, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3117, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3117, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▌ | 762/890 [3:23:12<35:34, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3142, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3142, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9414, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2610, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2610, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▌ | 763/890 [3:23:28<35:16, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6139, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2802, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6139, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2802, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8941, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5146, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3359, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5146, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3359, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▌ | 764/890 [3:23:45<35:15, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3620, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3620, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8776, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3081, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3081, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▌ | 765/890 [3:24:02<34:53, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5615, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3031, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5615, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8647, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3330, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3330, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▌ | 766/890 [3:24:18<34:25, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4586, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3067, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4586, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3067, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7653, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5014, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2821, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5014, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2821, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▌ | 767/890 [3:24:35<34:02, 16.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3733, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3733, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9966, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2980, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2980, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▋ | 768/890 [3:24:52<34:02, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2959, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2959, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7169, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3158, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3158, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  86%|████████▋ | 769/890 [3:25:09<33:45, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2914, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2914, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7185, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3028, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 770/890 [3:25:25<33:32, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7998, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5899, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7998, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5899, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3898, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4003, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 771/890 [3:25:42<33:18, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2141, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.6015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2141, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.6015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8157, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3633, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3633, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 772/890 [3:25:59<32:54, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4261, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4261, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0604, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3616, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3616, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 773/890 [3:26:15<32:28, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4530, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4530, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8976, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2818, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2818, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 774/890 [3:26:32<32:04, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2788, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2788, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0849, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6924, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3415, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6924, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3415, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 775/890 [3:26:48<31:43, 16.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2784, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2784, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8771, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3088, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3088, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 776/890 [3:27:05<31:47, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6195, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6051, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6195, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2246, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6160, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2855, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6160, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2855, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 777/890 [3:27:22<31:25, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5879, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.7714, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5879, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.7714, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3593, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3447, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3447, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  87%|████████▋ | 778/890 [3:27:38<30:59, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5256, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3767, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5256, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3767, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9023, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3173, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3173, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 779/890 [3:27:55<30:55, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6100, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3134, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6100, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3134, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9234, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2487, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3090, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2487, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3090, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 780/890 [3:28:12<30:36, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7651, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7651, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1629, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6475, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2916, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6475, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2916, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 781/890 [3:28:29<30:15, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8582, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3608, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8582, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3608, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2189, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3100, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3100, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 782/890 [3:28:45<29:58, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5960, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2758, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5960, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2758, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8718, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7976, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2662, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7976, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2662, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 783/890 [3:29:02<29:38, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2792, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2792, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3597, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4978, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3234, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4978, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3234, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 784/890 [3:29:19<29:32, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0067, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3708, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0067, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3708, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3775, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3997, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3612, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3997, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3612, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 785/890 [3:29:35<29:12, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3181, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3181, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2747, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8743, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3735, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8743, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3735, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 786/890 [3:29:52<29:02, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7198, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2999, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7198, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2999, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0198, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3404, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3404, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  88%|████████▊ | 787/890 [3:30:09<28:42, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3423, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3423, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0959, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3116, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3116, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▊ | 788/890 [3:30:26<28:35, 16.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7854, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3521, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7854, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3521, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1375, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2858, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2858, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▊ | 789/890 [3:30:42<28:08, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3191, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3191, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9192, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6238, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3713, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6238, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3713, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▉ | 790/890 [3:30:59<27:44, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7348, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3208, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7348, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3208, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0556, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3161, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3161, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▉ | 791/890 [3:31:15<27:23, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3209, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3209, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2105, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6622, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3245, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6622, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3245, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▉ | 792/890 [3:31:32<27:07, 16.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7018, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7952, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7018, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7952, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4970, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2994, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2994, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▉ | 793/890 [3:31:49<26:51, 16.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3420, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3420, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0076, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4768, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3917, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4768, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3917, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▉ | 794/890 [3:32:06<26:41, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9201, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2869, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9201, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2869, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3871, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3871, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▉ | 795/890 [3:32:23<26:34, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4117, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4117, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2138, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8760, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3412, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8760, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3412, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  89%|████████▉ | 796/890 [3:32:40<26:22, 16.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4082, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4082, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6989, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6406, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3230, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6406, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3230, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|████████▉ | 797/890 [3:32:56<25:53, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7015, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5209, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7015, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5209, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2225, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3149, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3149, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|████████▉ | 798/890 [3:33:13<25:34, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6840, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2667, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6840, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2667, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9507, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9935, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3108, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9935, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3108, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|████████▉ | 799/890 [3:33:29<25:12, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3564, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3564, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0899, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3194, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3194, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|████████▉ | 800/890 [3:33:46<25:11, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6589, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8508, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6589, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8508, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5097, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3588, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3588, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|█████████ | 801/890 [3:34:03<24:58, 16.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3393, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3393, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5879, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7039, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2771, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7039, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2771, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|█████████ | 802/890 [3:34:20<24:41, 16.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4046, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4863, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4046, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4863, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8909, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2986, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2986, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|█████████ | 803/890 [3:34:37<24:16, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0435, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3855, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0435, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3855, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4289, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7883, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3974, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7883, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3974, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|█████████ | 804/890 [3:34:53<24:02, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3702, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3702, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0895, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3815, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3815, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  90%|█████████ | 805/890 [3:35:10<23:50, 16.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0154, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3097, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0154, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3097, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3250, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9138, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3083, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9138, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3083, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████ | 806/890 [3:35:27<23:24, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5388, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4917, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5388, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4917, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0304, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7709, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2965, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7709, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2965, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████ | 807/890 [3:35:43<23:02, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8379, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3488, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8379, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3488, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1867, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8355, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3129, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8355, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3129, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████ | 808/890 [3:36:00<22:45, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5514, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3814, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5514, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3814, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9328, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6878, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3452, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6878, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3452, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████ | 809/890 [3:36:16<22:23, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3773, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3773, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9743, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3054, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3054, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████ | 810/890 [3:36:33<22:12, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7174, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7174, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4857, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7349, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2973, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7349, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2973, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████ | 811/890 [3:36:50<22:02, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5119, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2695, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5119, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2695, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7814, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6664, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3604, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6664, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3604, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████ | 812/890 [3:37:07<21:48, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4397, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4397, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0266, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0507, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3210, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0507, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3210, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████▏| 813/890 [3:37:23<21:24, 16.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8072, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3594, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8072, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3594, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1666, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5884, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3263, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5884, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3263, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  91%|█████████▏| 814/890 [3:37:40<21:02, 16.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5158, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5158, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2646, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0820, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3351, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0820, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3351, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 815/890 [3:37:57<20:51, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0132, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6358, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0132, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6358, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6490, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7127, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7127, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4005, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 816/890 [3:38:14<20:42, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3642, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3642, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9240, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5443, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3160, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5443, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3160, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 817/890 [3:38:31<20:26, 16.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3929, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3929, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9528, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2936, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2936, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 818/890 [3:38:47<20:05, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4647, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5218, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4647, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5218, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9866, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7330, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3220, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7330, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3220, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 819/890 [3:39:04<19:43, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7429, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3723, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7429, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3723, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1152, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6529, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2527, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6529, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2527, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 820/890 [3:39:21<19:29, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9438, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3275, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9438, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3275, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2713, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2636, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2636, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 821/890 [3:39:37<19:10, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5253, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5253, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2046, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8119, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2608, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8119, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2608, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 822/890 [3:39:54<18:51, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3873, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3443, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3873, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3443, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7316, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7662, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3158, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7662, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3158, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  92%|█████████▏| 823/890 [3:40:10<18:35, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5849, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5849, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1410, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0970, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2968, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0970, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2968, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 824/890 [3:40:27<18:20, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7694, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3381, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7694, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3381, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1074, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3065, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3065, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 825/890 [3:40:44<18:01, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3958, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3958, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9930, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2635, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2635, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 826/890 [3:41:01<17:49, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5789, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5789, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3562, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8606, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2958, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8606, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2958, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 827/890 [3:41:17<17:28, 16.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3380, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3380, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8530, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 828/890 [3:41:34<17:12, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8301, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8301, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4908, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3209, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4001, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 829/890 [3:41:51<16:59, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0623, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3746, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0623, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3746, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4370, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2673, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2673, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 830/890 [3:42:07<16:40, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2806, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2806, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8786, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6820, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3310, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6820, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3310, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 831/890 [3:42:24<16:22, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2520, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8742, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2520, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8742, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1262, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5976, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2644, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5976, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2644, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  93%|█████████▎| 832/890 [3:42:40<16:05, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4962, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4549, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4962, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4549, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9511, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4595, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4595, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▎| 833/890 [3:42:57<15:50, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5073, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3412, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5073, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3412, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8486, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7926, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2845, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7926, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2845, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▎| 834/890 [3:43:14<15:37, 16.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.4703, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6953, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.4703, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6953, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(6.1656, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4394, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2856, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4394, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2856, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▍| 835/890 [3:43:31<15:26, 16.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5497, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4121, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3169, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4121, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3169, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▍| 836/890 [3:43:48<15:06, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.0490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3239, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3239, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.3729, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7832, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2974, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7832, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2974, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▍| 837/890 [3:44:05<14:52, 16.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4776, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4726, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4776, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4726, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9502, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4184, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3234, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4184, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3234, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▍| 838/890 [3:44:21<14:30, 16.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.8540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3160, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3160, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.1699, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9122, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3734, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9122, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3734, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▍| 839/890 [3:44:38<14:09, 16.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0410, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3258, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0410, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3258, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3668, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9046, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3136, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9046, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3136, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▍| 840/890 [3:44:54<13:53, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8764, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3444, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8764, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3444, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2207, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2844, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  94%|█████████▍| 841/890 [3:45:11<13:36, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.3592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4891, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4891, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8483, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3537, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3537, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  95%|█████████▍| 842/890 [3:45:28<13:25, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4260, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4260, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9105, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1647, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1647, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2967, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  95%|█████████▍| 843/890 [3:45:45<13:08, 16.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5884, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5884, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5831, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8848, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3114, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8848, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3114, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  95%|█████████▍| 844/890 [3:46:02<12:49, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4165, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4165, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8768, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9662, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3092, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9662, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3092, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  95%|█████████▍| 845/890 [3:46:18<12:31, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0504, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3351, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0504, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3351, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3855, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5507, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3413, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5507, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3413, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  95%|█████████▌| 846/890 [3:46:34<12:10, 16.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4752, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4752, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8298, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7452, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2996, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7452, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2996, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  95%|█████████▌| 847/890 [3:46:51<11:54, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6514, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3587, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6514, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3587, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0101, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2875, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2875, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  95%|█████████▌| 848/890 [3:47:08<11:39, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4311, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4311, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1857, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7541, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3248, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7541, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3248, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  95%|█████████▌| 849/890 [3:47:24<11:21, 16.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5028, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4564, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5028, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4564, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9592, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5386, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3400, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5386, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3400, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▌| 850/890 [3:47:41<11:07, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6975, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6975, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7760, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3078, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3078, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▌| 851/890 [3:47:58<10:52, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7495, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5214, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7495, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5214, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2709, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9028, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3188, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9028, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3188, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▌| 852/890 [3:48:15<10:37, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7250, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5754, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7250, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5754, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3004, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2779, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2779, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▌| 853/890 [3:48:32<10:23, 16.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4166, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4166, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5999, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2571, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2571, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▌| 854/890 [3:48:49<10:03, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4894, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4894, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8883, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8264, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2851, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8264, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2851, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▌| 855/890 [3:49:05<09:43, 16.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8391, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5105, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8391, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5105, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3496, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3103, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3103, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▌| 856/890 [3:49:22<09:28, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2643, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3289, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2643, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3289, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5932, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3141, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3141, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▋| 857/890 [3:49:38<09:09, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5540, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5540, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3989, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5962, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2702, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5962, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2702, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  96%|█████████▋| 858/890 [3:49:55<08:54, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4324, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4324, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7301, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7328, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3243, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7328, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3243, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 859/890 [3:50:12<08:36, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3947, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3947, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1001, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7259, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3692, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7259, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3692, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 860/890 [3:50:29<08:24, 16.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1957, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4449, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1957, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4449, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6406, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5429, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2932, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5429, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2932, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 861/890 [3:50:45<08:04, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3387, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3387, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8712, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3318, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3318, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 862/890 [3:51:02<07:46, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7898, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3132, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7898, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3132, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1030, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6452, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3729, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6452, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3729, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 863/890 [3:51:18<07:28, 16.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6700, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6700, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0701, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0325, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3146, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0325, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3146, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 864/890 [3:51:36<07:15, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9876, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3573, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9876, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3573, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3449, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4909, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2752, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4909, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2752, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 865/890 [3:51:52<06:58, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5996, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4862, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5996, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4862, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3398, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3398, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 866/890 [3:52:09<06:43, 16.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3364, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3364, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3217, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6891, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2689, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6891, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2689, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  97%|█████████▋| 867/890 [3:52:26<06:25, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4375, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4375, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1037, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3240, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3240, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 868/890 [3:52:43<06:13, 16.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2767, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4301, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2767, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4301, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7068, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5055, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3435, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5055, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3435, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 869/890 [3:53:00<05:53, 16.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3164, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4649, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3164, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4649, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7813, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5493, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3065, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5493, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3065, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 870/890 [3:53:16<05:34, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4162, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4162, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8345, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3114, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3114, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 871/890 [3:53:33<05:17, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7877, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4459, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7877, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4459, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2336, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2815, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2815, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 872/890 [3:53:50<05:00, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.5124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5982, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5982, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.1106, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4826, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3489, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4826, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3489, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 873/890 [3:54:07<04:45, 16.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5240, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3210, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5240, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3210, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8450, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6822, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6822, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 874/890 [3:54:23<04:27, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9782, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3717, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9782, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3717, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3499, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3605, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3605, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 875/890 [3:54:40<04:10, 16.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4110, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4240, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4110, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4240, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8350, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2670, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2670, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  98%|█████████▊| 876/890 [3:54:57<03:54, 16.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3025, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3321, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3025, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3321, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6346, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6589, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3477, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6589, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3477, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▊| 877/890 [3:55:13<03:37, 16.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1159, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5587, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1159, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5587, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6746, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3182, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3182, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▊| 878/890 [3:55:30<03:20, 16.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4426, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4426, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9026, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6740, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3607, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6740, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3607, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▉| 879/890 [3:55:47<03:03, 16.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4559, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4578, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4559, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4578, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9137, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7110, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3308, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7110, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3308, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▉| 880/890 [3:56:04<02:48, 16.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7508, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3403, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7508, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3403, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0910, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8813, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3943, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8813, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3943, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▉| 881/890 [3:56:21<02:31, 16.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5836, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3252, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5836, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3252, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9088, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4872, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4872, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3003, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▉| 882/890 [3:56:38<02:14, 16.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0225, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3789, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0225, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3789, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4014, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7387, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2740, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7387, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2740, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▉| 883/890 [3:56:55<01:58, 16.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5284, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3327, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5284, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3327, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8611, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9153, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3208, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9153, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3208, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▉| 884/890 [3:57:12<01:42, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6405, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6405, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6455, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7351, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3359, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7351, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3359, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14):  99%|█████████▉| 885/890 [3:57:29<01:24, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3200, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3200, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6720, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3585, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3585, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14): 100%|█████████▉| 886/890 [3:57:45<01:07, 16.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7122, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2856, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7122, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2856, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9978, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1800, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3339, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1800, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3339, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14): 100%|█████████▉| 887/890 [3:58:01<00:49, 16.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2657, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2657, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7678, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3554, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3554, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14): 100%|█████████▉| 888/890 [3:58:18<00:33, 16.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4651, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2875, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4651, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2875, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7526, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4843, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3057, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4843, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14): 100%|█████████▉| 889/890 [3:58:34<00:16, 16.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5026, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3566, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5026, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3566, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8592, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9046, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2691, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9046, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2691, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 13/14): 100%|██████████| 890/890 [3:58:51<00:00, 16.10s/batch]
Validating:   1%|          | 1/126 [00:07<16:32,  7.94s/batch]

Validation dice loss per batch: 0.34712138772010803


Validating:   2%|▏         | 2/126 [00:08<07:16,  3.52s/batch]

Validation dice loss per batch: 0.25002047419548035


Validating:   2%|▏         | 3/126 [00:13<09:10,  4.48s/batch]

Validation dice loss per batch: 0.427791029214859


Validating:   3%|▎         | 4/126 [00:14<05:51,  2.88s/batch]

Validation dice loss per batch: 0.6613879203796387


Validating:   4%|▍         | 5/126 [00:21<08:47,  4.36s/batch]

Validation dice loss per batch: 0.2967168688774109


Validating:   5%|▍         | 6/126 [00:21<06:02,  3.02s/batch]

Validation dice loss per batch: 0.3053719997406006


Validating:   6%|▌         | 7/126 [00:27<07:52,  3.97s/batch]

Validation dice loss per batch: 0.3018183410167694


Validating:   6%|▋         | 8/126 [00:28<05:35,  2.84s/batch]

Validation dice loss per batch: 0.1691339910030365


Validating:   7%|▋         | 9/126 [00:34<07:33,  3.88s/batch]

Validation dice loss per batch: 0.30001306533813477


Validating:   8%|▊         | 10/126 [00:34<05:26,  2.81s/batch]

Validation dice loss per batch: 0.4330883026123047


Validating:   9%|▊         | 11/126 [00:40<07:05,  3.70s/batch]

Validation dice loss per batch: 0.4573334753513336


Validating:  10%|▉         | 12/126 [00:40<05:07,  2.70s/batch]

Validation dice loss per batch: 0.19768200814723969


Validating:  10%|█         | 13/126 [00:45<06:28,  3.43s/batch]

Validation dice loss per batch: 0.47478795051574707


Validating:  11%|█         | 14/126 [00:46<04:43,  2.53s/batch]

Validation dice loss per batch: 0.27733513712882996


Validating:  12%|█▏        | 15/126 [00:54<07:52,  4.26s/batch]

Validation dice loss per batch: 0.29721254110336304


Validating:  13%|█▎        | 16/126 [00:55<05:55,  3.23s/batch]

Validation dice loss per batch: 0.22125065326690674


Validating:  13%|█▎        | 17/126 [01:00<06:53,  3.79s/batch]

Validation dice loss per batch: 0.35596710443496704


Validating:  14%|█▍        | 18/126 [01:01<05:07,  2.84s/batch]

Validation dice loss per batch: 0.43262332677841187


Validating:  15%|█▌        | 19/126 [01:07<06:38,  3.73s/batch]

Validation dice loss per batch: 0.22808346152305603


Validating:  16%|█▌        | 20/126 [01:07<04:50,  2.74s/batch]

Validation dice loss per batch: 0.265410840511322


Validating:  17%|█▋        | 21/126 [01:15<07:37,  4.36s/batch]

Validation dice loss per batch: 0.33137786388397217


Validating:  17%|█▋        | 22/126 [01:16<05:30,  3.18s/batch]

Validation dice loss per batch: 0.24070021510124207


Validating:  18%|█▊        | 23/126 [01:23<07:24,  4.31s/batch]

Validation dice loss per batch: 0.7661788463592529


Validating:  19%|█▉        | 24/126 [01:23<05:21,  3.15s/batch]

Validation dice loss per batch: 0.9761703014373779


Validating:  20%|█▉        | 25/126 [01:30<07:05,  4.21s/batch]

Validation dice loss per batch: 0.6416186094284058


Validating:  21%|██        | 26/126 [01:30<05:07,  3.08s/batch]

Validation dice loss per batch: 0.3386062979698181


Validating:  21%|██▏       | 27/126 [01:36<06:20,  3.84s/batch]

Validation dice loss per batch: 0.3083985149860382


Validating:  22%|██▏       | 28/126 [01:36<04:36,  2.82s/batch]

Validation dice loss per batch: 0.28497380018234253


Validating:  23%|██▎       | 29/126 [01:43<06:29,  4.01s/batch]

Validation dice loss per batch: 0.2808569073677063


Validating:  24%|██▍       | 30/126 [01:43<04:41,  2.94s/batch]

Validation dice loss per batch: 0.3963504433631897


Validating:  25%|██▍       | 31/126 [01:49<05:48,  3.66s/batch]

Validation dice loss per batch: 0.3230865001678467


Validating:  25%|██▌       | 32/126 [01:49<04:13,  2.69s/batch]

Validation dice loss per batch: 0.26296764612197876


Validating:  26%|██▌       | 33/126 [01:55<05:52,  3.79s/batch]

Validation dice loss per batch: 0.3032415509223938


Validating:  27%|██▋       | 34/126 [01:56<04:15,  2.78s/batch]

Validation dice loss per batch: 0.31410735845565796


Validating:  28%|██▊       | 35/126 [02:02<05:47,  3.82s/batch]

Validation dice loss per batch: 0.4680105447769165


Validating:  29%|██▊       | 36/126 [02:03<04:12,  2.80s/batch]

Validation dice loss per batch: 0.5630292892456055


Validating:  29%|██▉       | 37/126 [02:09<05:42,  3.85s/batch]

Validation dice loss per batch: 0.30806681513786316


Validating:  30%|███       | 38/126 [02:09<04:08,  2.82s/batch]

Validation dice loss per batch: 0.45498931407928467


Validating:  31%|███       | 39/126 [02:15<05:10,  3.57s/batch]

Validation dice loss per batch: 0.36386698484420776


Validating:  32%|███▏      | 40/126 [02:15<03:45,  2.62s/batch]

Validation dice loss per batch: 0.5787748098373413


Validating:  33%|███▎      | 41/126 [02:21<04:58,  3.52s/batch]

Validation dice loss per batch: 0.382850706577301


Validating:  33%|███▎      | 42/126 [02:21<03:37,  2.59s/batch]

Validation dice loss per batch: 0.49131137132644653


Validating:  34%|███▍      | 43/126 [02:28<05:16,  3.81s/batch]

Validation dice loss per batch: 0.4505786597728729


Validating:  35%|███▍      | 44/126 [02:28<03:49,  2.80s/batch]

Validation dice loss per batch: 0.5316872000694275


Validating:  36%|███▌      | 45/126 [02:33<04:40,  3.46s/batch]

Validation dice loss per batch: 0.31401222944259644


Validating:  37%|███▋      | 46/126 [02:34<03:24,  2.55s/batch]

Validation dice loss per batch: 0.2470104694366455


Validating:  37%|███▋      | 47/126 [02:39<04:40,  3.55s/batch]

Validation dice loss per batch: 0.578492283821106


Validating:  38%|███▊      | 48/126 [02:40<03:23,  2.61s/batch]

Validation dice loss per batch: 0.5888606905937195


Validating:  39%|███▉      | 49/126 [02:46<04:36,  3.59s/batch]

Validation dice loss per batch: 0.39474785327911377


Validating:  40%|███▉      | 50/126 [02:46<03:20,  2.64s/batch]

Validation dice loss per batch: 0.23715505003929138


Validating:  40%|████      | 51/126 [02:51<04:14,  3.39s/batch]

Validation dice loss per batch: 0.35412052273750305


Validating:  41%|████▏     | 52/126 [02:52<03:05,  2.50s/batch]

Validation dice loss per batch: 0.31725460290908813


Validating:  42%|████▏     | 53/126 [02:57<04:01,  3.31s/batch]

Validation dice loss per batch: 0.5399022102355957


Validating:  43%|████▎     | 54/126 [02:57<02:56,  2.45s/batch]

Validation dice loss per batch: 1.2508769035339355


Validating:  44%|████▎     | 55/126 [03:03<03:51,  3.27s/batch]

Validation dice loss per batch: 1.0387884378433228


Validating:  44%|████▍     | 56/126 [03:03<02:48,  2.41s/batch]

Validation dice loss per batch: 0.2557026147842407


Validating:  45%|████▌     | 57/126 [03:09<04:09,  3.61s/batch]

Validation dice loss per batch: 0.3660300374031067


Validating:  46%|████▌     | 58/126 [03:10<03:00,  2.66s/batch]

Validation dice loss per batch: 0.5881040692329407


Validating:  47%|████▋     | 59/126 [03:16<04:13,  3.79s/batch]

Validation dice loss per batch: 0.20870500802993774


Validating:  48%|████▊     | 60/126 [03:17<03:03,  2.78s/batch]

Validation dice loss per batch: 0.15487727522850037


Validating:  48%|████▊     | 61/126 [03:24<04:25,  4.09s/batch]

Validation dice loss per batch: 0.2533623278141022


Validating:  49%|████▉     | 62/126 [03:24<03:11,  2.99s/batch]

Validation dice loss per batch: 0.5631284713745117


Validating:  50%|█████     | 63/126 [03:30<04:03,  3.87s/batch]

Validation dice loss per batch: 0.36483854055404663


Validating:  51%|█████     | 64/126 [03:31<02:56,  2.85s/batch]

Validation dice loss per batch: 1.1294286251068115


Validating:  52%|█████▏    | 65/126 [03:38<04:24,  4.34s/batch]

Validation dice loss per batch: 0.6488547325134277


Validating:  52%|█████▏    | 66/126 [03:39<03:09,  3.16s/batch]

Validation dice loss per batch: 0.6310757398605347


Validating:  53%|█████▎    | 67/126 [03:46<04:12,  4.28s/batch]

Validation dice loss per batch: 0.25636857748031616


Validating:  54%|█████▍    | 68/126 [03:46<03:00,  3.12s/batch]

Validation dice loss per batch: 0.26282763481140137


Validating:  55%|█████▍    | 69/126 [03:52<03:42,  3.91s/batch]

Validation dice loss per batch: 0.7353436946868896


Validating:  56%|█████▌    | 70/126 [03:52<02:40,  2.86s/batch]

Validation dice loss per batch: 1.3392095565795898


Validating:  56%|█████▋    | 71/126 [04:00<03:59,  4.36s/batch]

Validation dice loss per batch: 0.25707995891571045


Validating:  57%|█████▋    | 72/126 [04:01<02:51,  3.18s/batch]

Validation dice loss per batch: 0.16231802105903625


Validating:  58%|█████▊    | 73/126 [04:06<03:28,  3.93s/batch]

Validation dice loss per batch: 0.173713818192482


Validating:  59%|█████▊    | 74/126 [04:07<02:29,  2.87s/batch]

Validation dice loss per batch: 0.14788773655891418


Validating:  60%|█████▉    | 75/126 [04:14<03:34,  4.21s/batch]

Validation dice loss per batch: 0.13833114504814148


Validating:  60%|██████    | 76/126 [04:14<02:33,  3.07s/batch]

Validation dice loss per batch: 0.13289040327072144


Validating:  61%|██████    | 77/126 [04:21<03:24,  4.18s/batch]

Validation dice loss per batch: 0.14169184863567352


Validating:  62%|██████▏   | 78/126 [04:22<02:26,  3.05s/batch]

Validation dice loss per batch: 0.20462645590305328


Validating:  63%|██████▎   | 79/126 [04:28<03:12,  4.09s/batch]

Validation dice loss per batch: 0.16800084710121155


Validating:  63%|██████▎   | 80/126 [04:29<02:17,  2.99s/batch]

Validation dice loss per batch: 0.13510389626026154


Validating:  64%|██████▍   | 81/126 [04:35<02:55,  3.90s/batch]

Validation dice loss per batch: 0.8786967992782593


Validating:  65%|██████▌   | 82/126 [04:35<02:05,  2.86s/batch]

Validation dice loss per batch: 0.22486969828605652


Validating:  66%|██████▌   | 83/126 [04:41<02:38,  3.69s/batch]

Validation dice loss per batch: 0.3485780358314514


Validating:  67%|██████▋   | 84/126 [04:41<01:53,  2.71s/batch]

Validation dice loss per batch: 0.19166389107704163


Validating:  67%|██████▋   | 85/126 [04:49<02:53,  4.22s/batch]

Validation dice loss per batch: 0.22615250945091248


Validating:  68%|██████▊   | 86/126 [04:49<02:03,  3.08s/batch]

Validation dice loss per batch: 0.1381564438343048


Validating:  69%|██████▉   | 87/126 [04:55<02:31,  3.89s/batch]

Validation dice loss per batch: 0.132180854678154


Validating:  70%|██████▉   | 88/126 [04:55<01:48,  2.85s/batch]

Validation dice loss per batch: 0.4331871569156647


Validating:  71%|███████   | 89/126 [05:02<02:29,  4.05s/batch]

Validation dice loss per batch: 0.17359872162342072


Validating:  71%|███████▏  | 90/126 [05:03<01:46,  2.96s/batch]

Validation dice loss per batch: 0.22209401428699493


Validating:  72%|███████▏  | 91/126 [05:09<02:15,  3.87s/batch]

Validation dice loss per batch: 0.46556419134140015


Validating:  73%|███████▎  | 92/126 [05:09<01:36,  2.84s/batch]

Validation dice loss per batch: 0.20571592450141907


Validating:  74%|███████▍  | 93/126 [05:15<02:07,  3.85s/batch]

Validation dice loss per batch: 0.6152535676956177


Validating:  75%|███████▍  | 94/126 [05:16<01:30,  2.82s/batch]

Validation dice loss per batch: 0.20389173924922943


Validating:  75%|███████▌  | 95/126 [05:23<02:10,  4.21s/batch]

Validation dice loss per batch: 0.3569703996181488


Validating:  76%|███████▌  | 96/126 [05:24<01:32,  3.07s/batch]

Validation dice loss per batch: 0.30342331528663635


Validating:  77%|███████▋  | 97/126 [05:29<01:51,  3.86s/batch]

Validation dice loss per batch: 0.2500287890434265


Validating:  78%|███████▊  | 98/126 [05:30<01:19,  2.83s/batch]

Validation dice loss per batch: 0.27151039242744446


Validating:  79%|███████▊  | 99/126 [05:36<01:45,  3.90s/batch]

Validation dice loss per batch: 0.14009130001068115


Validating:  79%|███████▉  | 100/126 [05:37<01:14,  2.85s/batch]

Validation dice loss per batch: 0.13030466437339783


Validating:  80%|████████  | 101/126 [05:42<01:31,  3.64s/batch]

Validation dice loss per batch: 0.254503458738327


Validating:  81%|████████  | 102/126 [05:42<01:04,  2.68s/batch]

Validation dice loss per batch: 0.41595134139060974


Validating:  82%|████████▏ | 103/126 [05:50<01:32,  4.01s/batch]

Validation dice loss per batch: 0.19340819120407104


Validating:  83%|████████▎ | 104/126 [05:50<01:04,  2.94s/batch]

Validation dice loss per batch: 0.7942081093788147


Validating:  83%|████████▎ | 105/126 [05:56<01:19,  3.81s/batch]

Validation dice loss per batch: 0.48012790083885193


Validating:  84%|████████▍ | 106/126 [05:56<00:55,  2.79s/batch]

Validation dice loss per batch: 0.1509968638420105


Validating:  85%|████████▍ | 107/126 [06:03<01:14,  3.94s/batch]

Validation dice loss per batch: 0.16859707236289978


Validating:  86%|████████▌ | 108/126 [06:03<00:51,  2.88s/batch]

Validation dice loss per batch: 0.3438846170902252


Validating:  87%|████████▋ | 109/126 [06:09<01:02,  3.70s/batch]

Validation dice loss per batch: 0.17277270555496216


Validating:  87%|████████▋ | 110/126 [06:09<00:43,  2.72s/batch]

Validation dice loss per batch: 0.17885315418243408


Validating:  88%|████████▊ | 111/126 [06:16<00:59,  3.94s/batch]

Validation dice loss per batch: 0.14085376262664795


Validating:  89%|████████▉ | 112/126 [06:17<00:40,  2.89s/batch]

Validation dice loss per batch: 0.5093159675598145


Validating:  90%|████████▉ | 113/126 [06:23<00:52,  4.05s/batch]

Validation dice loss per batch: 0.7222484350204468


Validating:  90%|█████████ | 114/126 [06:24<00:35,  2.96s/batch]

Validation dice loss per batch: 0.7810804843902588


Validating:  91%|█████████▏| 115/126 [06:31<00:46,  4.25s/batch]

Validation dice loss per batch: 1.0786916017532349


Validating:  92%|█████████▏| 116/126 [06:31<00:31,  3.11s/batch]

Validation dice loss per batch: 0.6086962223052979


Validating:  93%|█████████▎| 117/126 [06:37<00:35,  3.96s/batch]

Validation dice loss per batch: 0.5406646132469177


Validating:  94%|█████████▎| 118/126 [06:38<00:23,  2.90s/batch]

Validation dice loss per batch: 0.7894283533096313


Validating:  94%|█████████▍| 119/126 [06:43<00:25,  3.65s/batch]

Validation dice loss per batch: 0.5163136124610901


Validating:  95%|█████████▌| 120/126 [06:44<00:16,  2.68s/batch]

Validation dice loss per batch: 0.2803182005882263


Validating:  96%|█████████▌| 121/126 [06:49<00:17,  3.57s/batch]

Validation dice loss per batch: 0.4724695086479187


Validating:  97%|█████████▋| 122/126 [06:50<00:10,  2.63s/batch]

Validation dice loss per batch: 0.4276188015937805


Validating:  98%|█████████▊| 123/126 [07:00<00:14,  4.77s/batch]

Validation dice loss per batch: 0.4932032823562622


Validating:  98%|█████████▊| 124/126 [07:00<00:06,  3.46s/batch]

Validation dice loss per batch: 0.3133651316165924


Validating:  99%|█████████▉| 125/126 [07:09<00:05,  5.08s/batch]

Validation dice loss per batch: 0.284226655960083


                                                                

Validation dice loss per batch: 0.5388457775115967
------Final validation dice loss after epoch 13: 0.396073579788208-------




Learning rate after epoch 13: 0.001
Model saved after epoch 13


(Epoch 14/14):   0%|          | 0/890 [00:00<?, ?batch/s]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3615, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2736, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3615, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2736, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6351, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   0%|          | 1/890 [00:32<7:54:37, 32.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4978, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3383, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4978, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3383, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8361, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2633, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2633, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   0%|          | 2/890 [00:50<5:51:38, 23.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5590, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3183, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5590, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3183, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8773, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3206, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3206, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   0%|          | 3/890 [01:06<5:04:13, 20.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3266, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3266, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0390, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7248, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2443, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7248, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2443, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   0%|          | 4/890 [01:23<4:40:58, 19.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3032, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4000, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2636, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2636, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|          | 5/890 [01:40<4:32:20, 18.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4083, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3579, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4083, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3579, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7662, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8780, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2588, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8780, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2588, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|          | 6/890 [01:58<4:25:50, 18.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4029, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9962, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6523, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2944, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6523, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2944, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|          | 7/890 [02:14<4:19:15, 17.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3003, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4006, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2872, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2872, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|          | 8/890 [02:32<4:16:52, 17.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9618, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3386, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9618, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3386, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3004, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6200, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6200, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|          | 9/890 [02:49<4:15:59, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4390, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4390, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1688, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2555, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2555, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|          | 10/890 [03:05<4:11:46, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2792, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2792, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4725, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8674, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2561, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8674, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2561, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|          | 11/890 [03:23<4:11:11, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7589, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6539, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7589, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6539, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.4128, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1127, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4245, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1127, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4245, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|▏         | 12/890 [03:40<4:10:35, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2534, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2534, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1340, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3951, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3951, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   1%|▏         | 13/890 [03:56<4:08:05, 16.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7202, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3086, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7202, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3086, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0288, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8679, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3899, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8679, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3899, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 14/890 [04:13<4:07:05, 16.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3814, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3814, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6065, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6103, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3146, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6103, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3146, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 15/890 [04:30<4:05:01, 16.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4267, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4267, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7093, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3240, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3240, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 16/890 [04:46<4:03:25, 16.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9045, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2910, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9045, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2910, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1955, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3346, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3102, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3346, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3102, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 17/890 [05:04<4:08:04, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1077, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3485, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1077, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3485, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4562, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9198, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2910, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9198, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2910, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 18/890 [05:21<4:07:55, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9965, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2673, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9965, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2673, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2638, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5070, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2783, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5070, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2783, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 19/890 [05:38<4:05:59, 16.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3196, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3196, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3504, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3101, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3101, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 20/890 [05:54<4:05:07, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8456, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2953, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8456, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2953, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1408, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2934, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2934, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 21/890 [06:11<4:03:45, 16.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9667, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2959, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9667, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2959, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2626, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4369, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2981, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4369, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2981, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   2%|▏         | 22/890 [06:28<4:05:23, 16.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9473, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2742, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9473, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2742, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2215, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7926, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7926, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3631, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 23/890 [06:45<4:05:26, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2712, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2712, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9105, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9108, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3601, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9108, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3601, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 24/890 [07:02<4:04:01, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4070, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3896, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4070, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3896, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7965, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8748, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2887, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8748, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2887, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 25/890 [07:19<4:04:05, 16.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5696, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5696, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2908, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8604, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6378, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2693, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6378, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2693, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 26/890 [07:36<4:02:33, 16.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5572, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3643, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5572, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3643, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9214, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1843, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2964, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1843, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2964, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 27/890 [07:53<4:02:19, 16.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2617, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2617, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1389, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9242, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2924, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9242, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2924, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 28/890 [08:10<4:03:00, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4063, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0528, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2738, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3505, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2738, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3505, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 29/890 [08:27<4:03:26, 16.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5368, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3430, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5368, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3430, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8797, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2906, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2906, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 30/890 [08:44<4:02:22, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5734, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3452, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5734, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3452, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9186, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8653, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2859, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8653, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2859, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   3%|▎         | 31/890 [09:00<4:01:20, 16.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.9239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4147, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.9239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4147, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.3386, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9436, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2675, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9436, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2675, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▎         | 32/890 [09:18<4:03:08, 17.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5073, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4227, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5073, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4227, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9301, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7268, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2637, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7268, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2637, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▎         | 33/890 [09:35<4:02:30, 16.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7590, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2740, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7590, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2740, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0329, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3084, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3084, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▍         | 34/890 [09:51<4:01:35, 16.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8738, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3656, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8738, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3656, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2394, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9823, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2892, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9823, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2892, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▍         | 35/890 [10:08<4:00:57, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2861, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2861, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8803, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3065, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3065, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▍         | 36/890 [10:26<4:02:21, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3402, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3865, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3402, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3865, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7267, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7060, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2751, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7060, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2751, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▍         | 37/890 [10:42<4:00:42, 16.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6194, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2875, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6194, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2875, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9068, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3566, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2565, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3566, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2565, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▍         | 38/890 [10:59<3:58:28, 16.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6820, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2851, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6820, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2851, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9671, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8150, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2868, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8150, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2868, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▍         | 39/890 [11:16<4:00:42, 16.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6419, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5642, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6419, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5642, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2061, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3144, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3144, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   4%|▍         | 40/890 [11:33<3:58:45, 16.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8155, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5436, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8155, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5436, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3591, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7404, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2951, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7404, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2951, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   5%|▍         | 41/890 [11:50<4:01:54, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3459, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3459, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8977, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2616, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2616, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   5%|▍         | 42/890 [12:07<4:00:54, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2575, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2575, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8705, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6024, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3325, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6024, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3325, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   5%|▍         | 43/890 [12:24<3:59:08, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7621, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3222, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7621, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3222, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0844, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3662, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3662, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   5%|▍         | 44/890 [12:42<4:01:30, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4079, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4079, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9082, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2791, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2791, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   5%|▌         | 45/890 [12:58<3:59:46, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9651, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3442, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9651, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3442, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3093, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5666, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3254, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5666, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3254, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   5%|▌         | 46/890 [13:15<3:58:21, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1168, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2576, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1168, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2576, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3744, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3704, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3704, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   5%|▌         | 47/890 [13:32<3:59:12, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5453, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3562, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5453, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3562, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9015, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9147, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9147, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   5%|▌         | 48/890 [13:49<3:59:00, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4414, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3152, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4414, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3152, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7566, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4862, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2951, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4862, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2951, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▌         | 49/890 [14:06<3:58:52, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5864, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2757, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5864, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2757, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8621, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4421, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3428, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4421, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3428, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▌         | 50/890 [14:23<3:56:55, 16.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4800, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5677, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4800, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5677, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0477, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6206, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2612, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6206, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2612, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▌         | 51/890 [14:40<3:55:43, 16.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3377, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3377, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7810, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3207, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3207, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▌         | 52/890 [14:57<3:57:33, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5120, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2849, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5120, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2849, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7970, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3319, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3319, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▌         | 53/890 [15:14<3:56:30, 16.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3728, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3728, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1063, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3000, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3000, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▌         | 54/890 [15:31<3:55:21, 16.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3241, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3241, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1781, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6002, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2917, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6002, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2917, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▌         | 55/890 [15:48<3:56:34, 17.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8844, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2550, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8844, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2550, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1395, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2785, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2785, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▋         | 56/890 [16:05<3:55:24, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4481, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4481, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0633, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3790, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3790, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   6%|▋         | 57/890 [16:22<3:55:55, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4618, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2914, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4618, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2914, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7531, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2195, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3538, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2195, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3538, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 58/890 [16:39<3:55:40, 17.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5732, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3097, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5732, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3097, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8829, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4143, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4143, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 59/890 [16:56<3:54:37, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4750, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3547, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4750, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3547, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8297, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3027, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4171, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 60/890 [17:13<3:54:42, 16.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3238, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3238, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8927, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3145, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3145, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 61/890 [17:30<3:53:48, 16.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3053, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8817, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2987, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2987, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 62/890 [17:46<3:52:47, 16.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2816, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2816, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1066, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5160, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3469, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5160, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3469, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 63/890 [18:04<3:54:09, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4693, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3611, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4693, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3611, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8304, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6199, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3260, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6199, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3260, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 64/890 [18:20<3:53:15, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4657, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3500, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4657, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3500, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8157, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6283, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2802, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6283, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2802, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 65/890 [18:38<3:54:44, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4339, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4141, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4339, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4141, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8480, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3021, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   7%|▋         | 66/890 [18:55<3:53:38, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3182, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3182, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8658, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6591, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2596, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6591, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2596, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 67/890 [19:12<3:53:19, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4522, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6317, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4522, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6317, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0839, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8182, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3072, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8182, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3072, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 68/890 [19:29<3:55:46, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3454, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2956, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3454, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2956, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6410, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7500, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3341, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7500, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3341, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 69/890 [19:46<3:54:02, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3905, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3905, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8108, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5365, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3253, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5365, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3253, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 70/890 [20:03<3:51:49, 16.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6995, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3341, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6995, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3341, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0336, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2046, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3167, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2046, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3167, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 71/890 [20:20<3:52:59, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4558, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3486, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4558, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3486, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8044, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3702, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3702, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 72/890 [20:37<3:52:03, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7111, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3276, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7111, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3276, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0387, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4202, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3366, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4202, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3366, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 73/890 [20:54<3:51:43, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3834, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3834, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2218, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3073, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3073, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 74/890 [21:11<3:49:58, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6924, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3016, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6924, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9940, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3213, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3213, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   8%|▊         | 75/890 [21:27<3:48:49, 16.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4402, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4402, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3631, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8033, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2804, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2804, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▊         | 76/890 [21:46<3:54:50, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5696, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5696, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7489, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3400, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3400, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▊         | 77/890 [22:03<3:52:24, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0025, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5417, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0025, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5417, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5442, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3100, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3100, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▉         | 78/890 [22:20<3:51:10, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3568, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2853, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3568, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2853, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6421, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2675, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2675, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▉         | 79/890 [22:37<3:51:43, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3712, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3939, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3712, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3939, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7651, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3541, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3541, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▉         | 80/890 [22:54<3:50:34, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6635, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6635, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1228, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5620, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3674, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5620, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3674, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▉         | 81/890 [23:11<3:50:04, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5067, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2865, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5067, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2865, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7932, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8514, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2849, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8514, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2849, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▉         | 82/890 [23:28<3:50:00, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2878, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2878, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5563, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7018, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7018, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3908, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▉         | 83/890 [23:45<3:48:36, 17.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3538, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3538, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7525, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7891, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2934, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7891, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2934, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):   9%|▉         | 84/890 [24:02<3:51:23, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7150, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3051, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7150, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0201, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3111, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3111, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|▉         | 85/890 [24:20<3:53:12, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3852, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3852, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1842, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5312, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3187, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5312, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3187, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|▉         | 86/890 [24:37<3:49:56, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5670, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3378, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5670, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3378, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9048, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4398, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3389, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4398, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3389, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|▉         | 87/890 [24:54<3:49:13, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6035, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4732, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6035, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4732, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0767, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7595, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2829, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7595, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2829, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|▉         | 88/890 [25:11<3:49:17, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9184, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2773, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9184, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2773, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1957, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7802, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7802, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|█         | 89/890 [25:29<3:51:43, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3245, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3245, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0494, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7223, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3315, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7223, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3315, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|█         | 90/890 [25:46<3:49:23, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5678, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3126, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5678, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3126, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8804, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2913, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2913, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|█         | 91/890 [26:03<3:47:31, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4094, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4094, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4102, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7016, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2873, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7016, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2873, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|█         | 92/890 [26:20<3:48:28, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3453, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3453, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1733, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7454, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2659, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7454, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2659, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  10%|█         | 93/890 [26:37<3:47:54, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3877, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2785, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3877, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2785, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6662, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3040, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3040, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█         | 94/890 [26:54<3:45:52, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3853, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3853, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4944, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6321, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3339, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6321, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3339, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█         | 95/890 [27:11<3:46:57, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7219, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7219, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7737, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2870, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2870, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█         | 96/890 [27:28<3:45:12, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3556, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3556, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1000, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2739, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2739, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█         | 97/890 [27:45<3:46:46, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4673, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3893, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4673, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3893, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8567, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2914, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2914, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█         | 98/890 [28:02<3:44:59, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1894, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5273, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1894, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5273, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7167, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9712, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2972, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9712, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2972, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█         | 99/890 [28:19<3:42:57, 16.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3312, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3312, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1752, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3628, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3628, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█         | 100/890 [28:36<3:44:08, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9122, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3523, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9122, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3523, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2645, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9652, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3442, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9652, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3442, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█▏        | 101/890 [28:53<3:42:38, 16.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1175, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3024, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1175, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4199, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6911, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5068, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6911, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5068, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  11%|█▏        | 102/890 [29:09<3:40:57, 16.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9261, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3522, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9261, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3522, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2783, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6923, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3026, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6923, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 103/890 [29:27<3:43:23, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3579, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4503, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3579, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4503, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8083, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7332, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2931, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7332, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2931, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 104/890 [29:44<3:41:57, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6973, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2884, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6973, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2884, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9857, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6691, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3433, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6691, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3433, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 105/890 [30:01<3:44:12, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6406, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2892, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6406, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2892, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9298, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3418, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3418, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 106/890 [30:18<3:43:39, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5389, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5389, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0636, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5791, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2984, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5791, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2984, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 107/890 [30:35<3:42:18, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1133, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2802, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1133, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2802, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3935, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9386, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3597, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9386, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3597, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 108/890 [30:53<3:44:40, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2933, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2933, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3966, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3029, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 109/890 [31:10<3:42:35, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4821, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4821, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2644, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2833, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2833, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 110/890 [31:27<3:41:25, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3487, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3487, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0427, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5579, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2856, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5579, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2856, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  12%|█▏        | 111/890 [31:44<3:42:02, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6087, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3320, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6087, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3320, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9407, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2557, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2557, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 112/890 [32:00<3:40:01, 16.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5364, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3392, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5364, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3392, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8756, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8474, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3717, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8474, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3717, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 113/890 [32:17<3:39:20, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4162, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3152, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4162, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3152, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7314, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3967, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 114/890 [32:34<3:39:44, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6060, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6060, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9070, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3290, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3290, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 115/890 [32:51<3:38:13, 16.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4312, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4312, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9529, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8107, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2609, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8107, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2609, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 116/890 [33:08<3:39:53, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5160, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2834, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5160, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2834, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7994, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6235, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2909, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6235, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2909, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 117/890 [33:26<3:39:38, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9433, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5154, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9433, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5154, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6294, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3269, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6294, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3269, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 118/890 [33:42<3:38:07, 16.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7960, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3268, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7960, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3268, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1228, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7277, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3457, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7277, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3457, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 119/890 [34:00<3:39:20, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2592, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2592, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7797, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7791, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2888, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7791, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2888, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  13%|█▎        | 120/890 [34:16<3:37:39, 16.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2451, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2451, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6366, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4221, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2794, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4221, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2794, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▎        | 121/890 [34:33<3:37:45, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2518, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2518, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9317, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3863, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3863, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▎        | 122/890 [34:50<3:36:06, 16.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6864, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3085, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6864, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3085, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9949, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3170, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3170, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▍        | 123/890 [35:07<3:35:45, 16.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3396, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3396, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3297, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4184, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4184, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▍        | 124/890 [35:24<3:38:18, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4890, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4890, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2252, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3612, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3612, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▍        | 125/890 [35:41<3:36:36, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3268, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3268, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9746, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3120, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3120, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▍        | 126/890 [35:58<3:34:38, 16.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5846, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3370, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5846, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3370, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9216, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9037, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2689, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9037, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2689, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▍        | 127/890 [36:15<3:37:03, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3350, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3350, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1250, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5566, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2606, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5566, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2606, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▍        | 128/890 [36:32<3:35:47, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3545, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3545, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0525, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1243, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2874, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1243, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2874, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  14%|█▍        | 129/890 [36:50<3:37:09, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5307, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3342, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5307, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3342, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8649, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8658, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8658, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2835, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  15%|█▍        | 130/890 [37:06<3:35:33, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2907, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4105, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2907, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4105, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7012, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2975, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2975, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  15%|█▍        | 131/890 [37:23<3:34:40, 16.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3726, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2688, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3726, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2688, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6414, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6430, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3567, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6430, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3567, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  15%|█▍        | 132/890 [37:41<3:37:11, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4840, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3161, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4840, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3161, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8001, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2831, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2831, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  15%|█▍        | 133/890 [37:58<3:35:23, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7709, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8218, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7709, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8218, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5927, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0504, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2594, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0504, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2594, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  15%|█▌        | 134/890 [38:14<3:33:57, 16.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7327, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2767, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7327, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2767, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0094, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2481, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2481, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  15%|█▌        | 135/890 [38:32<3:37:16, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7875, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3653, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7875, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3653, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1528, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5186, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3039, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5186, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  15%|█▌        | 136/890 [38:49<3:34:56, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3962, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3116, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3962, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3116, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7078, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1150, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3732, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1150, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3732, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  15%|█▌        | 137/890 [39:06<3:34:48, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4799, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3815, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4799, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3815, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8614, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7953, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2871, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7953, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2871, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▌        | 138/890 [39:23<3:34:35, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2914, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2914, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9957, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7150, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3385, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7150, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3385, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▌        | 139/890 [39:40<3:33:00, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8938, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2939, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8938, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2939, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1876, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0728, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3556, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0728, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3556, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▌        | 140/890 [39:58<3:33:54, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5078, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3073, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5078, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3073, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8150, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4070, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3381, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4070, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3381, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▌        | 141/890 [40:15<3:33:31, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1258, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3630, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1258, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3630, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4889, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3053, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3053, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▌        | 142/890 [40:31<3:32:16, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5581, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2906, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5581, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2906, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8487, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7155, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2928, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7155, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2928, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▌        | 143/890 [40:49<3:33:56, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7626, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7103, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7626, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7103, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4729, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6023, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2536, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6023, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2536, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▌        | 144/890 [41:06<3:34:05, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1064, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2987, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2987, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▋        | 145/890 [41:23<3:32:21, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4394, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3410, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4394, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3410, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7805, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7360, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3459, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7360, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3459, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  16%|█▋        | 146/890 [41:40<3:31:24, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3715, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3715, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5689, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2797, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2797, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 147/890 [41:57<3:30:44, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2926, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2926, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9961, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3168, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3168, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 148/890 [42:14<3:30:56, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4492, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4492, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0055, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3407, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3407, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 149/890 [42:31<3:30:41, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1065, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3646, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1065, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3646, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4711, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9218, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3062, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9218, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3062, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 150/890 [42:48<3:29:43, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4311, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4311, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3670, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2966, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2966, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 151/890 [43:05<3:28:54, 16.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6627, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2852, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6627, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2852, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9479, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4704, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3287, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4704, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3287, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 152/890 [43:22<3:30:14, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5520, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2716, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5520, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2716, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8235, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6387, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2479, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6387, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2479, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 153/890 [43:40<3:30:37, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5657, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2745, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5657, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2745, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8402, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3003, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 154/890 [43:57<3:29:36, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3489, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3489, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1478, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2858, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2858, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  17%|█▋        | 155/890 [44:14<3:29:38, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2320, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6668, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2320, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6668, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8988, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2923, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2923, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 156/890 [44:31<3:30:40, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3346, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3346, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9198, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3258, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3258, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 157/890 [44:48<3:30:32, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5719, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4511, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5719, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4511, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0230, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9259, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2999, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9259, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2999, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 158/890 [45:05<3:29:02, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4387, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4387, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4952, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3564, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4952, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3564, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 159/890 [45:22<3:27:53, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7020, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3341, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7020, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3341, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0361, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3180, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3401, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3180, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3401, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 160/890 [45:39<3:26:59, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2707, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2707, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8698, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4358, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4358, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 161/890 [45:56<3:26:32, 17.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6679, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2812, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6679, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2812, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9491, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4591, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3142, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4591, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3142, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 162/890 [46:13<3:27:10, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5524, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3083, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5524, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3083, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8607, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6862, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3288, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6862, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3288, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 163/890 [46:31<3:28:03, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4069, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7164, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4069, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7164, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1234, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6698, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3048, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6698, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3048, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  18%|█▊        | 164/890 [46:48<3:26:59, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7283, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2481, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7283, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2481, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9764, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5080, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3996, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5080, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3996, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▊        | 165/890 [47:05<3:26:40, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7800, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2795, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7800, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2795, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0595, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2682, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▊        | 166/890 [47:22<3:25:32, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7415, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7415, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2995, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3447, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3447, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▉        | 167/890 [47:39<3:25:32, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0789, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3614, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0789, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3614, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4403, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4221, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4221, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▉        | 168/890 [47:56<3:26:36, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4242, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3841, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4242, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3841, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8083, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6395, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3148, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6395, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3148, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▉        | 169/890 [48:13<3:25:54, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8506, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8506, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5835, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4341, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8837, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2972, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8837, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2972, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▉        | 170/890 [48:30<3:25:35, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6322, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3919, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6322, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3919, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0241, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7538, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3274, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7538, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3274, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▉        | 171/890 [48:48<3:26:39, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0271, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3151, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0271, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3151, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3422, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5744, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3133, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5744, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3133, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▉        | 172/890 [49:05<3:26:26, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7418, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4155, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7418, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4155, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1573, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2921, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2921, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  19%|█▉        | 173/890 [49:23<3:27:09, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8767, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6790, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8767, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6790, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5557, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8673, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3750, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8673, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3750, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|█▉        | 174/890 [49:40<3:25:32, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8688, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3559, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8688, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3559, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2248, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4053, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4053, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|█▉        | 175/890 [49:57<3:24:13, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4089, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4089, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0185, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5872, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5872, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|█▉        | 176/890 [50:14<3:24:21, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8454, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3443, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8454, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3443, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1897, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6771, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2606, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6771, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2606, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|█▉        | 177/890 [50:31<3:24:35, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8317, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3300, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8317, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3300, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1617, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6153, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2712, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6153, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2712, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|██        | 178/890 [50:49<3:27:15, 17.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7058, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2915, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7058, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2915, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9972, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5205, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3440, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5205, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3440, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|██        | 179/890 [51:07<3:26:57, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2648, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2648, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4841, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5773, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3037, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5773, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|██        | 180/890 [51:24<3:27:03, 17.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5775, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5177, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5775, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5177, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0951, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8591, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2802, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8591, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2802, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|██        | 181/890 [51:42<3:27:06, 17.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1244, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4647, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1244, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4647, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5891, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4507, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3450, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4507, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3450, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  20%|██        | 182/890 [51:59<3:25:35, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6205, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3170, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6205, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3170, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9375, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3542, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3542, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██        | 183/890 [52:16<3:24:02, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4684, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4684, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9297, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2705, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9297, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2705, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██        | 184/890 [52:34<3:24:34, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3583, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3583, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0147, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3720, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3720, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██        | 185/890 [52:51<3:23:31, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7732, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2880, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7732, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2880, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0613, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2877, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2877, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██        | 186/890 [53:08<3:22:18, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5692, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2946, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5692, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2946, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8639, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9380, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2792, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9380, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2792, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██        | 187/890 [53:25<3:22:13, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6074, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4464, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6074, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4464, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0538, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5229, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3354, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5229, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3354, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██        | 188/890 [53:43<3:22:30, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2866, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2866, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8955, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3480, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3480, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██        | 189/890 [54:00<3:22:50, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1469, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3851, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1469, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3851, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5319, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2680, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2680, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██▏       | 190/890 [54:17<3:21:00, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5045, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1956, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2945, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2945, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  21%|██▏       | 191/890 [54:34<3:20:56, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4860, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4860, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2844, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7704, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7259, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2929, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7259, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2929, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 192/890 [54:51<3:19:44, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3110, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3110, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6702, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6018, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2808, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6018, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2808, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 193/890 [55:08<3:18:17, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6572, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3305, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6572, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3305, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9877, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3007, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 194/890 [55:26<3:19:09, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2881, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2850, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2881, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2850, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5731, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2711, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2711, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 195/890 [55:43<3:19:12, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2993, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2993, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0255, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7958, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2972, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7958, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2972, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 196/890 [56:00<3:20:21, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5704, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5704, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8718, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3291, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3291, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 197/890 [56:17<3:18:45, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6133, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2540, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6133, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2540, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8674, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6064, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2942, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6064, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2942, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 198/890 [56:34<3:17:38, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8578, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3392, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8578, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3392, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1969, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7366, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2683, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7366, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2683, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 199/890 [56:51<3:16:29, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7621, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3890, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7621, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3890, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1510, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3436, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3436, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  22%|██▏       | 200/890 [57:08<3:15:44, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7674, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8467, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7674, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8467, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8832, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3579, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8832, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3579, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 201/890 [57:25<3:15:12, 17.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5560, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3928, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5560, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3928, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9487, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3534, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3534, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 202/890 [57:42<3:15:18, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3681, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3681, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0035, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9885, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3236, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9885, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3236, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 203/890 [57:59<3:14:22, 16.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2913, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2913, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1219, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7186, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3381, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7186, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3381, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 204/890 [58:16<3:15:10, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2777, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2777, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8463, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5649, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2820, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5649, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2820, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 205/890 [58:33<3:14:26, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6392, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3464, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6392, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3464, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9856, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2612, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2612, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 206/890 [58:50<3:13:03, 16.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9390, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3106, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9390, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3106, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2495, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7199, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3159, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7199, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3159, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 207/890 [59:07<3:12:59, 16.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4898, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4898, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0373, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3332, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3332, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 208/890 [59:25<3:14:44, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3161, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5171, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3161, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8332, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3984, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2905, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3984, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2905, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  23%|██▎       | 209/890 [59:42<3:14:09, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0644, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4094, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0644, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4094, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4738, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2594, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2594, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▎       | 210/890 [59:59<3:13:31, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5457, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2553, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5457, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2553, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8010, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2821, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2821, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▎       | 211/890 [1:00:16<3:13:31, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2966, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2966, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9888, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6325, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2696, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6325, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2696, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▍       | 212/890 [1:00:33<3:12:56, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4035, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2792, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4035, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2792, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6828, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2803, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2803, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▍       | 213/890 [1:00:50<3:12:00, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3394, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3394, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2631, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3073, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3073, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▍       | 214/890 [1:01:07<3:11:47, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9386, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4064, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9386, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3450, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7478, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2525, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7478, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2525, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▍       | 215/890 [1:01:24<3:11:32, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5575, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3079, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5575, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3079, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8653, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9981, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3988, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9981, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3988, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▍       | 216/890 [1:01:41<3:12:08, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7958, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3092, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7958, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3092, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7678, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7678, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▍       | 217/890 [1:01:58<3:11:47, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8408, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5999, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8408, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5999, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4407, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3612, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3612, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  24%|██▍       | 218/890 [1:02:15<3:12:00, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8309, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2966, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8309, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2966, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1275, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6637, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2867, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6637, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2867, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  25%|██▍       | 219/890 [1:02:32<3:11:07, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0257, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2960, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0257, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2960, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3217, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3153, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3153, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  25%|██▍       | 220/890 [1:02:49<3:11:04, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3594, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3594, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6099, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7421, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3260, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7421, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3260, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  25%|██▍       | 221/890 [1:03:07<3:11:13, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3019, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1504, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8199, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2998, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8199, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2998, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  25%|██▍       | 222/890 [1:03:24<3:10:44, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2942, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2942, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3036, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4857, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3338, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4857, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3338, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  25%|██▌       | 223/890 [1:03:41<3:09:46, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4250, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6677, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4250, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6677, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0928, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3031, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  25%|██▌       | 224/890 [1:03:58<3:10:33, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7955, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2984, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7955, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2984, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0939, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4813, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4813, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  25%|██▌       | 225/890 [1:04:15<3:10:13, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5328, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3377, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5328, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3377, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8705, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2910, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2910, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  25%|██▌       | 226/890 [1:04:33<3:10:21, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5918, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5918, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4434, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1628, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3239, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1628, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3239, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▌       | 227/890 [1:04:50<3:10:40, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7606, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3875, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7606, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3875, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3439, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3439, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▌       | 228/890 [1:05:07<3:10:37, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5715, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3800, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5715, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3800, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9515, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2726, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2726, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▌       | 229/890 [1:05:24<3:09:33, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8978, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3072, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8978, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3072, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5599, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3422, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5599, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3422, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▌       | 230/890 [1:05:41<3:08:08, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3713, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3713, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7959, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9463, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3370, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9463, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3370, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▌       | 231/890 [1:05:58<3:07:23, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3478, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3478, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9366, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2895, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2895, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▌       | 232/890 [1:06:15<3:07:39, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2692, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2692, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9294, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2848, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2848, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▌       | 233/890 [1:06:32<3:06:55, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4823, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2405, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4823, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2405, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7227, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5818, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2874, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5818, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2874, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▋       | 234/890 [1:06:50<3:07:06, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7804, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2851, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7804, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2851, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0655, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7594, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2935, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7594, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2935, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  26%|██▋       | 235/890 [1:07:07<3:06:57, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3513, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2720, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3513, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2720, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6233, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3331, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3331, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 236/890 [1:07:24<3:07:38, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8422, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3478, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8422, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3478, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1900, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2457, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2457, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 237/890 [1:07:42<3:08:14, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4884, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2702, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4884, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2702, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7586, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5975, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2580, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5975, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2580, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 238/890 [1:07:58<3:06:18, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3061, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3061, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2830, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0641, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3745, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0641, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3745, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 239/890 [1:08:16<3:05:51, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4529, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2609, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4529, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2609, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7138, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3348, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3348, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 240/890 [1:08:33<3:06:49, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3300, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3300, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9713, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2839, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2839, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 241/890 [1:08:50<3:05:40, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4040, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2623, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4040, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2623, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6664, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3459, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3459, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 242/890 [1:09:07<3:04:33, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4831, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4831, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9802, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8162, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2855, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8162, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2855, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 243/890 [1:09:25<3:05:50, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8377, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3435, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8377, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3435, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1812, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6855, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2990, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6855, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2990, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  27%|██▋       | 244/890 [1:09:42<3:05:10, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9493, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4346, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9493, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4346, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3839, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7918, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2765, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7918, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2765, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 245/890 [1:09:59<3:05:08, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2900, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2900, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7864, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2504, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2504, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 246/890 [1:10:16<3:04:03, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3070, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3070, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2926, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2926, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 247/890 [1:10:33<3:03:40, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9184, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9184, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2940, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6584, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3447, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6584, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3447, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 248/890 [1:10:50<3:03:11, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2980, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2980, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7565, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5186, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3230, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5186, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3230, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 249/890 [1:11:07<3:02:42, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3211, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3211, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2896, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5775, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3317, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5775, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3317, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 250/890 [1:11:24<3:02:06, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5800, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3574, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5800, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3574, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9374, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6472, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3321, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6472, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3321, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 251/890 [1:11:42<3:03:06, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3527, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3527, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8372, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8686, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3139, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8686, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3139, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 252/890 [1:11:59<3:02:03, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3088, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3088, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8635, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4775, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2924, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4775, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2924, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  28%|██▊       | 253/890 [1:12:16<3:01:25, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4768, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4635, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4768, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4635, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9403, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2583, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2583, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▊       | 254/890 [1:12:33<3:00:43, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5058, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3774, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5058, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3774, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8832, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7195, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2456, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7195, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2456, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▊       | 255/890 [1:12:49<2:59:59, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4970, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3969, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4970, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3969, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8939, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3114, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3114, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▉       | 256/890 [1:13:07<3:00:40, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7402, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7402, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1409, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5059, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2743, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5059, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2743, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▉       | 257/890 [1:13:24<2:59:33, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5639, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3142, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5639, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3142, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8781, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2879, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2879, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▉       | 258/890 [1:13:41<3:00:04, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5967, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4257, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5967, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4257, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0224, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3037, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▉       | 259/890 [1:13:58<2:59:34, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7176, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7176, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0180, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8929, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2669, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8929, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2669, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▉       | 260/890 [1:14:15<2:59:21, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7524, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3412, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7524, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3412, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0935, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3269, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3269, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▉       | 261/890 [1:14:32<2:59:18, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9495, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3290, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9495, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3290, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2785, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1501, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3170, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1501, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3170, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  29%|██▉       | 262/890 [1:14:49<2:58:34, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4996, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2774, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4996, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2774, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7771, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4111, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3491, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4111, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3491, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|██▉       | 263/890 [1:15:06<2:58:06, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5305, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2836, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5305, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2836, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4481, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2980, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4481, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2980, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|██▉       | 264/890 [1:15:24<2:59:05, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3185, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3185, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0842, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6016, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3200, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6016, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3200, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|██▉       | 265/890 [1:15:41<2:58:33, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5206, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2611, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5206, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2611, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7817, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7558, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2956, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7558, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2956, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|██▉       | 266/890 [1:15:58<2:58:35, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9997, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4324, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9997, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4324, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4322, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5597, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5597, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2940, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|███       | 267/890 [1:16:15<2:58:01, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9639, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2934, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9639, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2934, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2573, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6512, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3245, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6512, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3245, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|███       | 268/890 [1:16:32<2:58:01, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7253, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3104, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7253, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3104, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0356, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7107, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3955, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7107, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3955, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|███       | 269/890 [1:16:49<2:57:09, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2805, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2805, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8591, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2993, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2947, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2993, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2947, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|███       | 270/890 [1:17:06<2:56:08, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6452, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3594, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6452, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3594, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0046, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6684, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2550, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6684, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2550, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  30%|███       | 271/890 [1:17:23<2:55:55, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3164, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2803, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3164, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2803, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5968, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8785, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3317, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8785, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3317, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███       | 272/890 [1:17:40<2:55:47, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2762, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2762, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0703, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2685, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2685, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███       | 273/890 [1:17:57<2:55:34, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3782, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8192, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3782, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8192, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1973, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5803, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3143, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5803, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3143, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███       | 274/890 [1:18:15<2:56:49, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3528, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3528, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2694, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7201, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3349, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7201, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3349, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███       | 275/890 [1:18:32<2:56:10, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4244, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4244, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0724, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8706, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3545, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8706, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3545, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███       | 276/890 [1:18:49<2:55:50, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9215, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9215, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2075, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3269, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3269, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███       | 277/890 [1:19:06<2:54:48, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8238, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3355, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8238, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3355, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1593, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6114, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2893, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6114, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2893, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███       | 278/890 [1:19:23<2:53:59, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9007, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6444, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9007, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6444, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5451, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7053, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2654, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7053, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2654, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███▏      | 279/890 [1:19:40<2:53:29, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0015, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3018, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0015, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3033, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6607, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3493, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6607, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3493, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  31%|███▏      | 280/890 [1:19:58<2:54:30, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9073, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2563, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9073, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2563, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1636, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6153, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3383, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6153, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3383, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 281/890 [1:20:15<2:53:48, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5386, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3228, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5386, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3228, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8614, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6893, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3023, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6893, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 282/890 [1:20:32<2:54:27, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7455, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3376, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7455, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3376, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0830, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9075, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2665, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9075, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2665, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 283/890 [1:20:49<2:52:46, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7582, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2596, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7582, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2596, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0179, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3283, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3283, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 284/890 [1:21:06<2:53:12, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8221, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2851, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8221, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2851, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1073, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3471, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3471, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 285/890 [1:21:23<2:51:51, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2704, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2704, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9241, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2804, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2804, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 286/890 [1:21:40<2:51:34, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4065, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3385, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4065, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3385, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7449, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7355, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3675, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7355, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3675, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 287/890 [1:21:57<2:51:09, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7302, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3236, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7302, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3236, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0538, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8706, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3255, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8706, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3255, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 288/890 [1:22:14<2:52:25, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3112, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3112, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8206, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2900, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2900, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  32%|███▏      | 289/890 [1:22:31<2:51:42, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2769, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2769, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9710, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3074, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 290/890 [1:22:48<2:50:47, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7329, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2797, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7329, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2797, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0127, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5431, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3445, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5431, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3445, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 291/890 [1:23:06<2:51:06, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3125, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3125, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7970, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2870, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2870, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 292/890 [1:23:23<2:51:12, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8232, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4327, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8232, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4327, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2559, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8479, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8479, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 293/890 [1:23:40<2:50:35, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7163, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3228, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7163, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3228, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0391, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8315, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4752, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8315, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4752, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 294/890 [1:23:57<2:49:43, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1900, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3022, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1900, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4922, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7019, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3803, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7019, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3803, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 295/890 [1:24:14<2:50:32, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4921, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4921, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6910, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7487, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2915, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7487, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2915, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 296/890 [1:24:32<2:51:18, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3293, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3293, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5745, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7746, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2672, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7746, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2672, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 297/890 [1:24:49<2:49:49, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3473, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3473, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6810, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2987, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2987, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  33%|███▎      | 298/890 [1:25:06<2:48:25, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5018, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6824, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7089, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3421, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7089, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3421, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▎      | 299/890 [1:25:23<2:48:25, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3219, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3219, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1159, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5800, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2820, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5800, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2820, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▎      | 300/890 [1:25:40<2:49:00, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5635, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3123, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5635, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3123, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8758, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7614, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3353, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7614, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3353, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▍      | 301/890 [1:25:57<2:48:05, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3328, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3328, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2198, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6823, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6823, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▍      | 302/890 [1:26:14<2:47:08, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7331, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2899, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7331, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2899, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0230, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8931, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8931, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▍      | 303/890 [1:26:31<2:46:18, 17.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6695, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4461, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6695, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4461, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1156, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5711, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3532, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5711, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3532, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▍      | 304/890 [1:26:49<2:48:32, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6753, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2988, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6753, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2988, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9741, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3292, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3292, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▍      | 305/890 [1:27:06<2:47:11, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4929, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4930, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4929, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4930, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9859, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6949, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3426, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6949, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3426, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▍      | 306/890 [1:27:23<2:47:09, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6771, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3027, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6771, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9798, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3076, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3076, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  34%|███▍      | 307/890 [1:27:40<2:47:34, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5117, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3472, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5117, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3472, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8589, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2920, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2920, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  35%|███▍      | 308/890 [1:27:57<2:46:46, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6702, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2504, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6702, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2504, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9207, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7664, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3405, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7664, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3405, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  35%|███▍      | 309/890 [1:28:14<2:45:27, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2917, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2917, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9934, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6119, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3464, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6119, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3464, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  35%|███▍      | 310/890 [1:28:31<2:44:27, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3119, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3119, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9106, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5299, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2685, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5299, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2685, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  35%|███▍      | 311/890 [1:28:48<2:43:51, 16.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8100, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2737, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8100, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2737, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0837, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2589, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2589, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  35%|███▌      | 312/890 [1:29:05<2:44:16, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4232, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3319, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4232, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3319, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7551, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7492, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2841, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7492, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2841, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  35%|███▌      | 313/890 [1:29:22<2:43:33, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6651, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3029, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6651, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9680, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7881, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2775, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7881, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2775, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  35%|███▌      | 314/890 [1:29:39<2:43:59, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1293, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3880, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1293, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3880, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5173, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6844, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3411, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6844, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3411, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  35%|███▌      | 315/890 [1:29:56<2:43:33, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9933, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2739, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9933, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2739, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2672, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2539, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2539, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▌      | 316/890 [1:30:13<2:43:02, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6961, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6073, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6961, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6073, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3033, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2078, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3656, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2078, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3656, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▌      | 317/890 [1:30:30<2:41:47, 16.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6098, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2979, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6098, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2979, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9077, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3953, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3953, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▌      | 318/890 [1:30:47<2:41:58, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6182, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3098, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6182, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3098, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9280, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8283, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3737, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8283, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3737, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▌      | 319/890 [1:31:04<2:41:23, 16.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2656, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2656, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3218, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2610, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2610, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▌      | 320/890 [1:31:22<2:42:31, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2427, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3250, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2427, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3250, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5677, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2835, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▌      | 321/890 [1:31:39<2:42:43, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5917, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3696, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5917, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3696, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9613, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2604, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2604, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▌      | 322/890 [1:31:56<2:42:47, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5259, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5259, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8237, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7837, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3212, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7837, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3212, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▋      | 323/890 [1:32:13<2:42:31, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3537, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3537, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8493, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  36%|███▋      | 324/890 [1:32:31<2:42:20, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8627, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8627, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5967, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4594, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3293, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3293, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 325/890 [1:32:48<2:41:37, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3016, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8948, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2988, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2988, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 326/890 [1:33:05<2:40:35, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4894, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3385, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4894, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3385, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8278, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3390, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2766, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3390, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2766, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 327/890 [1:33:22<2:40:19, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6445, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3593, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6445, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3593, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0038, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2926, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2926, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 328/890 [1:33:39<2:40:56, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8744, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4501, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8744, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4501, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3246, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3245, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3245, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 329/890 [1:33:56<2:40:52, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5148, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2776, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5148, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2776, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7924, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3144, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3144, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 330/890 [1:34:13<2:39:58, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4788, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3093, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4788, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3093, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7881, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9758, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9758, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3002, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 331/890 [1:34:31<2:40:58, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7628, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3970, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7628, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3970, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1598, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3347, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3347, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 332/890 [1:34:48<2:41:38, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3282, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3282, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9639, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0848, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2879, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0848, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2879, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  37%|███▋      | 333/890 [1:35:05<2:40:15, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6260, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8506, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6260, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8506, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4765, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8821, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3250, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8821, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3250, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 334/890 [1:35:22<2:38:56, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3291, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3291, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7740, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2864, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2864, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 335/890 [1:35:39<2:38:02, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6185, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3840, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6185, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3840, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0025, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5962, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2834, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5962, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2834, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 336/890 [1:35:57<2:38:20, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5699, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3691, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5699, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3691, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9391, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7377, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2916, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7377, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2916, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 337/890 [1:36:14<2:37:58, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6031, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4233, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6031, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4233, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0263, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9539, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2579, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9539, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2579, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 338/890 [1:36:31<2:37:33, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2741, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2741, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9027, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7372, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3264, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7372, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3264, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 339/890 [1:36:48<2:36:49, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.9258, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.9258, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5346, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5492, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2536, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5492, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2536, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 340/890 [1:37:05<2:37:59, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5927, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3428, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5927, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3428, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9356, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0727, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2945, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0727, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2945, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 341/890 [1:37:22<2:37:00, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4204, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4204, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5254, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3054, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3054, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  38%|███▊      | 342/890 [1:37:40<2:36:41, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5529, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3280, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5529, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3280, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8809, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7199, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3367, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7199, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3367, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▊      | 343/890 [1:37:57<2:36:00, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5874, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3876, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5874, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3876, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9751, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0407, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0407, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2631, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▊      | 344/890 [1:38:13<2:34:56, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7254, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3275, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7254, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3275, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0529, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8170, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3497, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8170, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3497, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▉      | 345/890 [1:38:30<2:34:20, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6176, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3965, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6176, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3965, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7730, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7730, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▉      | 346/890 [1:38:47<2:34:03, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5349, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3000, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5349, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3000, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8348, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2993, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2993, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▉      | 347/890 [1:39:04<2:33:59, 17.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3479, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3479, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1527, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3250, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3250, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▉      | 348/890 [1:39:22<2:34:51, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8823, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3247, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8823, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3247, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2070, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3328, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3328, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▉      | 349/890 [1:39:39<2:34:10, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6821, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3354, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6821, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3354, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0174, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7780, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3606, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7780, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3606, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▉      | 350/890 [1:39:56<2:33:43, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8058, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3148, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8058, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3148, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1207, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2845, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2845, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  39%|███▉      | 351/890 [1:40:13<2:33:45, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6279, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2861, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6279, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2861, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9140, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6029, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2647, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6029, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2647, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|███▉      | 352/890 [1:40:30<2:33:22, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2756, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2756, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2089, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3176, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3176, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|███▉      | 353/890 [1:40:47<2:33:49, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2753, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3193, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2753, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3193, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5945, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3066, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3066, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|███▉      | 354/890 [1:41:05<2:33:52, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4706, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4706, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6088, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2818, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2818, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|███▉      | 355/890 [1:41:22<2:33:44, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2700, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2700, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9679, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3294, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3294, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|████      | 356/890 [1:41:39<2:32:52, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4138, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2927, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4138, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2927, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7066, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6243, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3083, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6243, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3083, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|████      | 357/890 [1:41:56<2:32:49, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3073, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3073, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3167, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3173, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3173, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|████      | 358/890 [1:42:14<2:32:29, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.9332, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5142, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9332, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5142, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.4474, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5077, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3336, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5077, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3336, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|████      | 359/890 [1:42:30<2:31:32, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0349, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3914, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0349, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3914, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4263, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3002, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  40%|████      | 360/890 [1:42:48<2:31:33, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3145, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3145, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2500, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5906, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5906, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2835, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████      | 361/890 [1:43:05<2:31:02, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3767, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3767, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8209, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2985, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2985, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████      | 362/890 [1:43:22<2:32:08, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4320, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4320, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0748, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4723, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3213, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4723, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3213, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████      | 363/890 [1:43:40<2:31:34, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3897, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3897, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5008, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3104, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3104, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████      | 364/890 [1:43:57<2:31:28, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3056, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3056, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9364, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4196, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3454, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4196, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3454, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████      | 365/890 [1:44:14<2:31:32, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3106, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3106, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9654, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4577, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2617, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4577, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2617, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████      | 366/890 [1:44:31<2:30:24, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7804, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3347, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7804, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3347, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1152, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3164, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3164, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████      | 367/890 [1:44:49<2:30:02, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9253, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3317, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9253, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3317, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2570, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7119, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7119, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████▏     | 368/890 [1:45:06<2:30:48, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5654, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2930, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5654, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2930, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8584, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2843, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2843, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  41%|████▏     | 369/890 [1:45:23<2:29:49, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2912, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2912, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8568, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3265, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3265, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 370/890 [1:45:41<2:29:55, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6911, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3527, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6911, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3527, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0438, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2685, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2685, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 371/890 [1:45:58<2:29:42, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6963, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2712, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6963, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2712, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9675, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3121, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3121, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 372/890 [1:46:15<2:28:42, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0422, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2994, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0422, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2994, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3416, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2493, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2493, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 373/890 [1:46:32<2:27:55, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8255, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8255, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5432, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8482, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2853, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8482, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2853, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 374/890 [1:46:49<2:27:12, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3266, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3266, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5603, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7373, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3496, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7373, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3496, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 375/890 [1:47:06<2:26:47, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5865, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3045, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5865, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8910, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6116, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3087, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6116, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3087, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 376/890 [1:47:23<2:27:00, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0509, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3107, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0509, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3107, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3617, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2967, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 377/890 [1:47:41<2:27:14, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3406, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3406, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8397, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8716, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3600, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8716, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3600, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  42%|████▏     | 378/890 [1:47:58<2:27:21, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5844, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3667, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5844, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3667, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9510, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4569, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3374, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4569, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3374, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 379/890 [1:48:15<2:26:34, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2210, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4674, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2210, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4674, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6883, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2810, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2810, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 380/890 [1:48:33<2:26:56, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8894, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2960, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8894, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2960, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1854, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3241, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3241, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 381/890 [1:48:50<2:25:58, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7002, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3289, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7002, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3289, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0290, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7066, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3460, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7066, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3460, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 382/890 [1:49:07<2:25:19, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5752, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2820, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5752, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2820, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8572, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5283, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3019, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5283, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 383/890 [1:49:24<2:24:41, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4638, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3605, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4638, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3605, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8243, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2857, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2857, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 384/890 [1:49:42<2:25:54, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7309, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3082, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7309, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3082, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0391, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2730, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2730, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 385/890 [1:49:59<2:24:54, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5014, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2996, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5014, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2996, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8010, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3027, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 386/890 [1:50:16<2:24:46, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6188, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4039, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6188, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0227, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5500, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2828, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5500, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2828, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  43%|████▎     | 387/890 [1:50:34<2:25:47, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6293, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7566, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6293, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7566, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3859, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8165, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2660, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8165, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2660, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▎     | 388/890 [1:50:51<2:25:18, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1620, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2936, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1620, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2936, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4556, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3343, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3343, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▎     | 389/890 [1:51:08<2:24:16, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7418, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2881, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7418, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2881, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0299, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3140, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3140, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▍     | 390/890 [1:51:25<2:23:16, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5715, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3730, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5715, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3730, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9444, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2746, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2746, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▍     | 391/890 [1:51:42<2:22:30, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5111, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2600, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5111, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2600, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7711, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3370, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3370, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▍     | 392/890 [1:52:00<2:23:59, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3059, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0146, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3538, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3538, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▍     | 393/890 [1:52:17<2:23:31, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3412, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3606, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3412, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3606, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7018, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7488, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2831, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7488, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2831, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▍     | 394/890 [1:52:35<2:23:30, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5432, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2957, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5432, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2957, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8390, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2968, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2968, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▍     | 395/890 [1:52:52<2:23:11, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4468, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3167, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4468, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3167, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7635, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7500, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3021, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7500, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  44%|████▍     | 396/890 [1:53:09<2:22:12, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2703, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2703, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2591, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6616, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3656, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6616, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3656, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  45%|████▍     | 397/890 [1:53:26<2:21:22, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5838, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2691, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5838, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2691, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8529, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6570, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2934, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6570, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2934, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  45%|████▍     | 398/890 [1:53:43<2:20:41, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.2944, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3668, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.2944, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3668, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.6611, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7826, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2839, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7826, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2839, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  45%|████▍     | 399/890 [1:54:00<2:19:58, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7314, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2406, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7314, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2406, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9720, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7705, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3299, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7705, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3299, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  45%|████▍     | 400/890 [1:54:17<2:19:51, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5650, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2960, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5650, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2960, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8609, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5506, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3274, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5506, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3274, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  45%|████▌     | 401/890 [1:54:34<2:20:01, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0321, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0321, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3326, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6019, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2858, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6019, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2858, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  45%|████▌     | 402/890 [1:54:51<2:19:11, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4676, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3505, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4676, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3505, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8181, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7607, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3286, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7607, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3286, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  45%|████▌     | 403/890 [1:55:09<2:19:24, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2521, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2521, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1697, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6068, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3531, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6068, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3531, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  45%|████▌     | 404/890 [1:55:26<2:18:46, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1860, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2425, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1860, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2425, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4285, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1743, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3677, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1743, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3677, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▌     | 405/890 [1:55:43<2:18:13, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3137, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3137, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2239, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▌     | 406/890 [1:56:00<2:17:40, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6290, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3033, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6290, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9324, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8958, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3281, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8958, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3281, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▌     | 407/890 [1:56:17<2:17:15, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6357, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3238, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6357, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3238, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9595, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7212, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3327, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7212, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3327, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▌     | 408/890 [1:56:34<2:17:14, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4075, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4075, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9244, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3045, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▌     | 409/890 [1:56:51<2:17:28, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6937, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2831, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6937, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2831, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9768, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2842, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2842, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▌     | 410/890 [1:57:08<2:17:00, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2724, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2724, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9595, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7072, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3191, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7072, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3191, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▌     | 411/890 [1:57:26<2:17:08, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0572, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2702, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0572, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2702, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3274, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6698, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2469, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6698, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2469, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▋     | 412/890 [1:57:43<2:17:16, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2428, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2428, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3691, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4678, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2657, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4678, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2657, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  46%|████▋     | 413/890 [1:58:00<2:16:34, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3062, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3062, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7556, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0064, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2578, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0064, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2578, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 414/890 [1:58:17<2:15:58, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3136, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3136, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1265, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3124, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3124, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 415/890 [1:58:34<2:15:32, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2700, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2700, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7708, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2632, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2632, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 416/890 [1:58:52<2:17:06, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0819, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3018, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0819, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3837, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5653, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2778, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5653, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2778, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 417/890 [1:59:09<2:16:05, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4758, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3375, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4758, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3375, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8133, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9572, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2965, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9572, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2965, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 418/890 [1:59:26<2:15:12, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3093, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3093, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9342, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8191, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8191, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3682, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 419/890 [1:59:43<2:15:01, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3328, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3328, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5300, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1530, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4097, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1530, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4097, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 420/890 [2:00:01<2:15:56, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8032, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2564, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8032, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2564, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0596, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7331, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2857, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7331, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2857, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 421/890 [2:00:18<2:14:59, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5365, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2893, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5365, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2893, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8258, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3626, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3626, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  47%|████▋     | 422/890 [2:00:35<2:14:11, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3435, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3435, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9929, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7196, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2743, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7196, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2743, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 423/890 [2:00:52<2:13:25, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4635, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4635, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7645, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2514, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2514, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 424/890 [2:01:09<2:13:12, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3332, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3332, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2723, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2723, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 425/890 [2:01:27<2:13:06, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6154, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3626, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6154, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3626, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9780, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7664, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7664, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2844, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 426/890 [2:01:44<2:13:10, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6052, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2582, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6052, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2582, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8634, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4412, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3309, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4412, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3309, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 427/890 [2:02:01<2:12:37, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5966, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3211, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5966, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3211, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9177, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2965, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2965, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 428/890 [2:02:18<2:12:50, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3491, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3491, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7980, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5702, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3146, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5702, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3146, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 429/890 [2:02:36<2:13:35, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3044, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9828, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2798, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2798, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 430/890 [2:02:53<2:12:48, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8608, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3140, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8608, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3140, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1748, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1161, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3495, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1161, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3495, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  48%|████▊     | 431/890 [2:03:10<2:11:49, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3066, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3066, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8506, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9002, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3110, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9002, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3110, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▊     | 432/890 [2:03:28<2:11:56, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0743, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4600, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0743, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4600, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5343, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0031, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0031, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2844, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▊     | 433/890 [2:03:45<2:11:11, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9914, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3312, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9914, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3312, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3226, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5259, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3290, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5259, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3290, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▉     | 434/890 [2:04:03<2:12:10, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9429, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.3721, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9429, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(3.3721, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.3150, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3078, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3078, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▉     | 435/890 [2:04:20<2:11:44, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3109, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3109, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8672, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2784, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2784, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▉     | 436/890 [2:04:37<2:11:12, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8691, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6192, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8691, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6192, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4882, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3751, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3751, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▉     | 437/890 [2:04:54<2:10:02, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3621, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4103, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3621, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4103, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7724, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5571, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3307, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5571, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3307, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▉     | 438/890 [2:05:11<2:09:25, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.9928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3844, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.3772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8410, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3536, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8410, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3536, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▉     | 439/890 [2:05:28<2:08:26, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0514, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2654, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0514, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2654, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3168, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3377, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3377, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  49%|████▉     | 440/890 [2:05:46<2:10:36, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3262, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3262, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0439, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6221, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3214, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6221, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3214, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|████▉     | 441/890 [2:06:03<2:09:24, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8651, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2783, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8651, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2783, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1434, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3067, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3067, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|████▉     | 442/890 [2:06:20<2:08:35, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6758, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2517, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6758, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2517, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9275, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0364, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2878, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0364, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2878, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|████▉     | 443/890 [2:06:38<2:08:06, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2809, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2809, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8090, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3084, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3084, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|████▉     | 444/890 [2:06:55<2:08:00, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6275, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3451, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6275, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3451, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9725, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2765, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2765, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|█████     | 445/890 [2:07:12<2:07:08, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3885, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4334, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3885, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4334, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8219, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2399, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2399, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|█████     | 446/890 [2:07:29<2:06:50, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4421, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2695, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4421, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2695, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7115, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3506, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3506, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|█████     | 447/890 [2:07:46<2:06:32, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2621, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2621, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2713, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2941, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2941, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|█████     | 448/890 [2:08:04<2:07:58, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5625, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3430, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5625, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3430, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9054, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2975, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2975, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  50%|█████     | 449/890 [2:08:21<2:07:48, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8669, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5310, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2568, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5310, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2568, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████     | 450/890 [2:08:39<2:07:18, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9957, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3281, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9957, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3281, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3238, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6903, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2913, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6903, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2913, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████     | 451/890 [2:08:56<2:07:15, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9032, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2911, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9032, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2911, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1943, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7205, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3493, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7205, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3493, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████     | 452/890 [2:09:14<2:07:25, 17.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8064, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2972, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8064, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2972, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1036, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5566, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2752, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5566, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2752, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████     | 453/890 [2:09:31<2:06:23, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5777, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2703, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5777, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2703, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8480, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3233, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3233, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████     | 454/890 [2:09:48<2:05:01, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5121, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3275, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5121, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3275, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8395, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3345, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3345, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████     | 455/890 [2:10:05<2:03:59, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5278, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5278, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0458, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2775, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0458, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2775, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████     | 456/890 [2:10:22<2:04:49, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3297, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3297, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0314, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2576, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2576, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████▏    | 457/890 [2:10:39<2:04:36, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8538, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4686, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8538, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4686, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3224, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4301, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3254, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4301, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3254, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  51%|█████▏    | 458/890 [2:10:56<2:03:46, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3944, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3944, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0196, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6858, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3034, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6858, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 459/890 [2:11:14<2:03:33, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4002, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2869, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4002, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2869, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6872, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9768, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3221, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9768, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3221, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 460/890 [2:11:31<2:02:56, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5703, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3451, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5703, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3451, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9154, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5973, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3242, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5973, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3242, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 461/890 [2:11:48<2:02:06, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4260, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2757, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4260, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2757, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7016, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2907, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2907, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 462/890 [2:12:05<2:01:48, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4317, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5255, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4317, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5255, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9571, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6643, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2736, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6643, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2736, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 463/890 [2:12:22<2:01:13, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6757, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3768, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6757, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3768, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0525, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0064, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2730, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0064, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2730, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 464/890 [2:12:39<2:02:30, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2346, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3272, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2346, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3272, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5618, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5664, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3724, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5664, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3724, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 465/890 [2:12:57<2:02:19, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4149, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2783, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4149, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2783, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6932, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2406, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3537, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2406, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3537, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 466/890 [2:13:14<2:01:44, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4207, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2729, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4207, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2729, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6935, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3924, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3548, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3924, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3548, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  52%|█████▏    | 467/890 [2:13:31<2:01:50, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8074, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2714, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8074, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2714, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0788, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4244, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2809, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4244, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2809, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 468/890 [2:13:49<2:02:27, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7725, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2925, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7725, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2925, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0650, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3236, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3236, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 469/890 [2:14:06<2:01:31, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4863, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2488, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4863, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2488, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7352, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2940, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 470/890 [2:14:23<2:00:49, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3123, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3123, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9055, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5955, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2607, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5955, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2607, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 471/890 [2:14:40<1:59:50, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5331, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5331, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9583, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6838, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3296, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6838, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3296, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 472/890 [2:14:58<2:00:27, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6705, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6705, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1438, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5963, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2724, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5963, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2724, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 473/890 [2:15:15<1:59:40, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4532, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3102, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4532, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3102, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7633, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2323, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2986, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2323, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2986, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 474/890 [2:15:32<1:59:26, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4555, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4555, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0763, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7918, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2601, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7918, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2601, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 475/890 [2:15:49<1:59:23, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3468, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3468, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7310, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7887, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2985, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7887, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2985, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  53%|█████▎    | 476/890 [2:16:07<1:59:39, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4541, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3311, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4541, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3311, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7852, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9881, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3338, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9881, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3338, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▎    | 477/890 [2:16:24<1:58:49, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4497, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4497, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8469, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6608, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3833, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6608, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3833, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▎    | 478/890 [2:16:41<1:58:05, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6392, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4370, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6392, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4370, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0762, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1746, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3258, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1746, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3258, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▍    | 479/890 [2:16:58<1:57:11, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2822, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2822, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8885, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3464, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3464, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▍    | 480/890 [2:17:15<1:56:28, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3873, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3178, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3873, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3178, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4075, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3473, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4075, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3473, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▍    | 481/890 [2:17:32<1:56:56, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4764, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2515, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4764, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2515, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7279, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3134, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3134, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▍    | 482/890 [2:17:49<1:56:01, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2908, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7293, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3156, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3156, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▍    | 483/890 [2:18:06<1:55:22, 17.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7474, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2680, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7474, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2680, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0154, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6954, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2776, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6954, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2776, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▍    | 484/890 [2:18:23<1:55:57, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7456, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2934, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7456, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2934, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0390, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5269, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3423, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5269, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3423, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  54%|█████▍    | 485/890 [2:18:40<1:55:21, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2932, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2932, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7940, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6551, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6551, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  55%|█████▍    | 486/890 [2:18:57<1:54:44, 17.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2803, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2803, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2639, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9366, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2821, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9366, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2821, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  55%|█████▍    | 487/890 [2:19:14<1:54:21, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9803, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4253, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9803, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4253, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4055, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4625, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3517, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4625, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3517, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  55%|█████▍    | 488/890 [2:19:32<1:54:24, 17.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3398, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3398, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1894, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2726, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2726, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  55%|█████▍    | 489/890 [2:19:49<1:54:39, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2965, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2653, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2965, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2653, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5618, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2653, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2653, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  55%|█████▌    | 490/890 [2:20:06<1:54:32, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3181, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3181, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9862, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6776, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3871, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6776, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3871, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  55%|█████▌    | 491/890 [2:20:23<1:54:03, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4312, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3733, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4312, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3733, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8044, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8801, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3154, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8801, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3154, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  55%|█████▌    | 492/890 [2:20:41<1:54:52, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6780, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2940, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6780, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2940, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9720, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7519, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3391, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7519, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3391, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  55%|█████▌    | 493/890 [2:20:58<1:53:51, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2984, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2984, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8985, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5498, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3297, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5498, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3297, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▌    | 494/890 [2:21:15<1:52:54, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5303, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5303, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4548, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4578, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3567, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4578, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3567, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▌    | 495/890 [2:21:32<1:52:16, 17.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4740, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5778, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4740, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5778, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0518, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8712, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2850, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8712, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2850, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▌    | 496/890 [2:21:49<1:53:05, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3719, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3719, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0727, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7570, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2959, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7570, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2959, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▌    | 497/890 [2:22:06<1:52:14, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7745, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4216, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7745, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4216, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1961, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3406, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3406, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▌    | 498/890 [2:22:23<1:52:04, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5483, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5483, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1672, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3850, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3850, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▌    | 499/890 [2:22:40<1:51:42, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5709, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3651, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5709, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3651, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9360, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6209, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3074, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6209, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▌    | 500/890 [2:22:58<1:51:14, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5307, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.2280, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5307, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.2280, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7082, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2671, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7082, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2671, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▋    | 501/890 [2:23:15<1:50:47, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6459, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4024, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6459, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0483, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8780, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2896, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8780, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2896, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  56%|█████▋    | 502/890 [2:23:31<1:50:07, 17.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5280, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3735, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5280, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3735, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9015, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3277, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3277, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 503/890 [2:23:48<1:49:29, 16.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2976, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2976, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8561, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2901, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2901, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 504/890 [2:24:05<1:49:18, 16.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3581, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3581, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8964, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7160, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3296, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7160, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3296, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 505/890 [2:24:26<1:56:58, 18.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5236, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3217, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5236, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3217, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8453, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6435, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3488, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6435, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3488, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 506/890 [2:24:44<1:54:40, 17.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4203, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2933, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4203, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2933, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7136, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3436, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3436, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 507/890 [2:25:01<1:53:43, 17.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4804, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4187, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4804, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4187, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8990, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6114, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3103, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6114, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3103, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 508/890 [2:25:18<1:51:41, 17.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4478, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4478, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9483, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3519, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3519, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 509/890 [2:25:35<1:50:07, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2884, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2884, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7432, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3561, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3561, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 510/890 [2:25:52<1:49:15, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6643, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3515, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6643, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3515, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0158, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6140, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3251, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6140, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3251, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  57%|█████▋    | 511/890 [2:26:09<1:48:22, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2607, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2607, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9542, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2690, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2690, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 512/890 [2:26:27<1:48:52, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4381, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4381, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8874, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3446, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3446, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 513/890 [2:26:44<1:49:26, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2808, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2808, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5094, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6318, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2472, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6318, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2472, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 514/890 [2:27:01<1:48:33, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9425, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.3508, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9425, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.3508, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2933, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5257, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2643, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5257, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2643, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 515/890 [2:27:19<1:48:06, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0727, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3399, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0727, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3399, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4126, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9073, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3640, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9073, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3640, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 516/890 [2:27:36<1:47:08, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8104, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4753, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8104, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4753, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2857, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7488, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3216, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7488, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3216, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 517/890 [2:27:53<1:46:43, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2953, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2953, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0095, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3318, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3318, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 518/890 [2:28:10<1:46:19, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6966, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4095, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6966, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4095, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1061, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5639, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3224, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5639, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3224, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 519/890 [2:28:27<1:45:38, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3388, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3388, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2972, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2972, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  58%|█████▊    | 520/890 [2:28:44<1:45:29, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1506, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2887, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1506, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2887, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4394, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3664, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3664, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▊    | 521/890 [2:29:01<1:45:04, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3488, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3488, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1471, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8223, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3948, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8223, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3948, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▊    | 522/890 [2:29:18<1:44:35, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2936, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2936, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4570, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2795, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2795, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▉    | 523/890 [2:29:35<1:45:12, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5381, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2895, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5381, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2895, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8276, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2688, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2688, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▉    | 524/890 [2:29:53<1:44:41, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2617, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2617, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7799, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2846, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2846, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▉    | 525/890 [2:30:10<1:44:06, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9973, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9973, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3002, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2976, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3273, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3273, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▉    | 526/890 [2:30:27<1:43:42, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6810, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2930, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6810, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2930, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9740, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7723, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3262, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7723, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3262, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▉    | 527/890 [2:30:44<1:43:29, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2655, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2655, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5375, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7680, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2838, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7680, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2838, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▉    | 528/890 [2:31:01<1:43:40, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8532, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2930, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8532, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2930, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1462, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4636, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2871, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4636, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2871, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  59%|█████▉    | 529/890 [2:31:18<1:43:18, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2960, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2960, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3061, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4293, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3302, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4293, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3302, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|█████▉    | 530/890 [2:31:35<1:42:42, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2607, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2607, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7765, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4415, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2979, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4415, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2979, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|█████▉    | 531/890 [2:31:52<1:42:39, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3022, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8828, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4601, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3267, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4601, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3267, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|█████▉    | 532/890 [2:32:10<1:42:26, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.0739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2638, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2638, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.3377, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9855, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3277, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9855, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3277, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|█████▉    | 533/890 [2:32:27<1:41:43, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6394, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2773, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6394, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2773, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9167, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3392, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3392, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|██████    | 534/890 [2:32:44<1:41:27, 17.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7885, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3095, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7885, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3095, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0980, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2810, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2810, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|██████    | 535/890 [2:33:01<1:40:58, 17.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6375, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4701, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6375, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4701, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1076, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2749, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2749, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|██████    | 536/890 [2:33:18<1:41:59, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2693, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4471, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2693, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4471, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7164, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3077, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3077, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|██████    | 537/890 [2:33:36<1:41:25, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8670, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8670, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2951, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2868, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2868, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  60%|██████    | 538/890 [2:33:53<1:40:48, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7414, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2935, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7414, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2935, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0349, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.5661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2823, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2823, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████    | 539/890 [2:34:10<1:40:47, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8323, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5060, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8323, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5060, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3383, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6426, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2869, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6426, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2869, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████    | 540/890 [2:34:27<1:40:26, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3230, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3230, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1389, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7501, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2815, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7501, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2815, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████    | 541/890 [2:34:44<1:40:01, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2810, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2810, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8987, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0116, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2710, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0116, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2710, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████    | 542/890 [2:35:01<1:39:26, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4225, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4225, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0196, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7071, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2650, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7071, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2650, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████    | 543/890 [2:35:19<1:39:11, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4543, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3030, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4543, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7573, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2760, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2760, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████    | 544/890 [2:35:36<1:39:08, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9374, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4774, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9374, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4774, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4148, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2894, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2894, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████    | 545/890 [2:35:53<1:38:54, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2525, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2525, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2668, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8378, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3178, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8378, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3178, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████▏   | 546/890 [2:36:10<1:38:48, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2621, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2621, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8382, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7949, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3274, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7949, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3274, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  61%|██████▏   | 547/890 [2:36:27<1:38:22, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6876, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3241, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6876, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3241, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0117, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3528, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3528, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 548/890 [2:36:45<1:37:54, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6103, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5174, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6103, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5174, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1277, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7650, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3406, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7650, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3406, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 549/890 [2:37:02<1:37:26, 17.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5119, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3310, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5119, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3310, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8429, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3155, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3155, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 550/890 [2:37:19<1:36:57, 17.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2988, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2988, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8240, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7900, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3017, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7900, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 551/890 [2:37:36<1:36:19, 17.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8826, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2840, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8826, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2840, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1666, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3250, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3250, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 552/890 [2:37:53<1:36:27, 17.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7384, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2934, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7384, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2934, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0318, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3297, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3297, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 553/890 [2:38:10<1:36:29, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2872, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2872, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0150, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7507, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2785, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7507, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2785, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 554/890 [2:38:27<1:36:15, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0254, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4063, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0254, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4317, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5458, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3350, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5458, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3350, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 555/890 [2:38:45<1:36:08, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2839, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2839, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0811, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3202, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3202, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  62%|██████▏   | 556/890 [2:39:02<1:35:49, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2859, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2859, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6766, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3404, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3404, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 557/890 [2:39:19<1:35:32, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4195, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4350, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4195, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4350, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8546, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3297, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3297, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 558/890 [2:39:36<1:35:02, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5785, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3627, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5785, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3627, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9412, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2905, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2905, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 559/890 [2:39:53<1:34:46, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3714, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3714, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8476, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4891, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3281, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4891, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3281, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 560/890 [2:40:11<1:35:18, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5549, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5549, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1112, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6200, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3098, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6200, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3098, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 561/890 [2:40:28<1:34:40, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7630, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2847, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7630, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2847, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0477, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8738, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3090, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8738, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3090, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 562/890 [2:40:45<1:34:23, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3214, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3214, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0260, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3242, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3242, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 563/890 [2:41:03<1:33:49, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2767, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2767, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6699, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5520, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2927, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5520, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2927, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 564/890 [2:41:20<1:33:44, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8704, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8704, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2682, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6800, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3110, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6800, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3110, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  63%|██████▎   | 565/890 [2:41:37<1:33:23, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9638, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9638, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2653, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9750, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2744, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9750, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2744, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▎   | 566/890 [2:41:54<1:32:43, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9357, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3712, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9357, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3712, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3069, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3726, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3726, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▎   | 567/890 [2:42:11<1:32:29, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8084, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3288, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8084, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3288, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1373, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3281, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3281, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▍   | 568/890 [2:42:29<1:32:26, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5728, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5728, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8522, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3557, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3557, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▍   | 569/890 [2:42:46<1:32:28, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.3107, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.3107, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5660, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4390, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2998, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4390, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2998, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▍   | 570/890 [2:43:03<1:32:08, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6258, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3188, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6258, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3188, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9447, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3128, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3128, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▍   | 571/890 [2:43:21<1:31:54, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6628, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3645, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6628, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3645, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0273, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2963, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2963, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▍   | 572/890 [2:43:38<1:31:34, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4274, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3096, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4274, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3096, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7371, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5426, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3562, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5426, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3562, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▍   | 573/890 [2:43:55<1:31:08, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4605, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4605, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1397, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2715, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2715, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  64%|██████▍   | 574/890 [2:44:12<1:30:32, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9390, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2921, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9390, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2921, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2311, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8199, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2582, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8199, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2582, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  65%|██████▍   | 575/890 [2:44:29<1:30:30, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0729, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3879, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0729, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3879, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4608, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3037, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  65%|██████▍   | 576/890 [2:44:47<1:30:51, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7800, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2855, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7800, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2855, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0655, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6757, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2610, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6757, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2610, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  65%|██████▍   | 577/890 [2:45:04<1:30:09, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6372, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6372, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2421, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2825, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2825, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  65%|██████▍   | 578/890 [2:45:21<1:29:47, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4958, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4958, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6828, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2760, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2760, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  65%|██████▌   | 579/890 [2:45:39<1:29:47, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4930, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3744, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4930, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3744, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8674, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9250, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3886, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9250, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3886, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  65%|██████▌   | 580/890 [2:45:57<1:30:09, 17.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7073, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2771, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7073, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2771, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9843, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2868, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2868, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  65%|██████▌   | 581/890 [2:46:14<1:29:17, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0328, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.8384, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0328, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.8384, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8712, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9018, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3480, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9018, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3480, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  65%|██████▌   | 582/890 [2:46:31<1:28:52, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6258, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3531, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6258, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3531, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9790, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4244, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3338, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4244, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3338, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▌   | 583/890 [2:46:48<1:28:14, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5275, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2938, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5275, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2938, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8213, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5628, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2878, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5628, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2878, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▌   | 584/890 [2:47:05<1:28:08, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5809, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3036, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5809, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8845, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3112, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3112, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▌   | 585/890 [2:47:23<1:27:44, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3366, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3366, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0725, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3464, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3464, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▌   | 586/890 [2:47:40<1:27:34, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7760, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4290, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7760, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4290, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5560, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3049, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5560, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▌   | 587/890 [2:47:57<1:27:18, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2871, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2871, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0052, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2887, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2887, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▌   | 588/890 [2:48:15<1:27:04, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5098, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4064, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5098, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9161, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8551, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2831, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8551, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2831, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▌   | 589/890 [2:48:32<1:26:43, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6715, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2933, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6715, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2933, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9648, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3603, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3603, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▋   | 590/890 [2:48:49<1:26:07, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7068, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3568, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7068, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3568, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0636, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3654, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3654, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  66%|██████▋   | 591/890 [2:49:07<1:26:55, 17.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2922, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2922, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9582, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8810, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3416, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8810, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3416, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 592/890 [2:49:25<1:27:04, 17.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4648, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2883, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4648, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2883, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7531, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3893, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3893, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 593/890 [2:49:42<1:26:20, 17.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2717, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2717, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3382, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3318, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3318, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 594/890 [2:50:00<1:26:56, 17.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4301, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4301, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0566, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3066, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3066, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 595/890 [2:50:18<1:26:50, 17.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4422, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3457, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4422, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3457, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7879, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3144, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3144, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 596/890 [2:50:35<1:26:25, 17.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4903, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3948, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4903, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3948, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8851, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3908, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 597/890 [2:50:52<1:25:28, 17.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9787, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3041, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9787, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3041, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2828, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8475, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2686, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8475, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2686, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 598/890 [2:51:10<1:25:09, 17.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1140, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2741, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1140, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2741, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3882, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8949, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2818, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8949, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2818, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 599/890 [2:51:28<1:25:17, 17.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7506, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4925, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7506, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4925, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2430, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7538, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2920, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7538, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2920, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  67%|██████▋   | 600/890 [2:51:45<1:25:11, 17.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8635, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2927, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8635, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2927, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1562, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6692, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3716, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6692, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3716, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 601/890 [2:52:03<1:24:06, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2952, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2952, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9891, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2943, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2943, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 602/890 [2:52:20<1:23:49, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6508, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5300, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6508, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5300, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1808, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8058, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2763, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8058, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2763, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 603/890 [2:52:37<1:23:31, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6340, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2812, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6340, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2812, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9151, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9718, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3071, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9718, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3071, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 604/890 [2:52:55<1:22:40, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2992, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2992, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4585, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5323, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3391, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5323, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3391, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 605/890 [2:53:12<1:22:09, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0460, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3241, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0460, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3241, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3701, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7273, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2807, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7273, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2807, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 606/890 [2:53:29<1:21:38, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4825, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4825, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6977, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8855, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3338, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8855, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3338, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 607/890 [2:53:46<1:21:18, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4409, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2644, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4409, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2644, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7054, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6214, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3075, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6214, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3075, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 608/890 [2:54:03<1:21:01, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8770, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3160, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8770, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3160, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1929, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3825, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3825, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  68%|██████▊   | 609/890 [2:54:20<1:20:25, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2967, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7687, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5377, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2814, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5377, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2814, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▊   | 610/890 [2:54:38<1:20:56, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4855, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3610, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4855, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3610, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8465, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.2706, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4229, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.2706, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4229, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▊   | 611/890 [2:54:56<1:21:10, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6711, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3315, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6711, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3315, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0025, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7392, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4132, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7392, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4132, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▉   | 612/890 [2:55:13<1:20:44, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5133, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5133, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2975, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5457, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2848, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5457, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2848, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▉   | 613/890 [2:55:30<1:19:54, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5418, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4055, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5418, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9473, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8460, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2867, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8460, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2867, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▉   | 614/890 [2:55:47<1:19:19, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.4256, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4161, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4256, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4161, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8417, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8495, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3333, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8495, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3333, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▉   | 615/890 [2:56:04<1:18:47, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5458, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5588, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5458, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5588, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1045, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6827, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3412, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6827, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3412, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▉   | 616/890 [2:56:22<1:19:06, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3182, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3182, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7906, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4710, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2714, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4710, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2714, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▉   | 617/890 [2:56:39<1:18:59, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2976, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2976, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0246, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9542, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9542, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3631, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  69%|██████▉   | 618/890 [2:56:57<1:19:07, 17.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5407, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4591, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5407, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4591, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9999, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3155, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3155, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|██████▉   | 619/890 [2:57:14<1:18:22, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9286, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3228, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9286, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3228, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2515, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7746, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2811, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7746, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2811, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|██████▉   | 620/890 [2:57:31<1:17:54, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8771, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3932, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8771, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3932, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2703, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3168, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3168, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|██████▉   | 621/890 [2:57:48<1:17:13, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3125, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3125, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8810, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1778, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3078, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1778, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3078, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|██████▉   | 622/890 [2:58:06<1:17:05, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.0920, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3139, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0920, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3139, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.4059, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2334, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3722, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2334, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3722, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|███████   | 623/890 [2:58:23<1:16:38, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2945, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2945, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2299, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.5285, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3924, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5285, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3924, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|███████   | 624/890 [2:58:41<1:16:57, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4412, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4804, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4412, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4804, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9217, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3857, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3857, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|███████   | 625/890 [2:58:58<1:16:14, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3038, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4076, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3038, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4076, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7114, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7219, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3851, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7219, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3851, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|███████   | 626/890 [2:59:15<1:16:16, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3601, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3601, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1778, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3232, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3232, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  70%|███████   | 627/890 [2:59:33<1:16:18, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4360, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4360, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0015, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3290, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3290, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████   | 628/890 [2:59:50<1:16:09, 17.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3300, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3300, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6656, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3193, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3193, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████   | 629/890 [3:00:07<1:15:23, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4383, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4383, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1910, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7558, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2841, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7558, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2841, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████   | 630/890 [3:00:25<1:15:01, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2866, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2866, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7580, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2834, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2834, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████   | 631/890 [3:00:42<1:14:20, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0560, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3278, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0560, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3278, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3838, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4312, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3248, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4312, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3248, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████   | 632/890 [3:00:59<1:14:13, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6534, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6534, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7136, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2911, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2911, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████   | 633/890 [3:01:16<1:13:57, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8121, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4896, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8121, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4896, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3016, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3965, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2912, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3965, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2912, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████   | 634/890 [3:01:33<1:13:28, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2994, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2994, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1139, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3777, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3777, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████▏  | 635/890 [3:01:51<1:13:24, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7854, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3172, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7854, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3172, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1025, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0066, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3481, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0066, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3481, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  71%|███████▏  | 636/890 [3:02:08<1:13:29, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4468, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4551, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4468, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4551, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9019, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2913, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2913, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 637/890 [3:02:25<1:12:42, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4329, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3192, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4329, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3192, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7521, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6147, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3306, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6147, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3306, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 638/890 [3:02:42<1:12:09, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3178, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3178, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9221, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3018, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 639/890 [3:02:59<1:11:38, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3915, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3915, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9379, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2835, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 640/890 [3:03:17<1:12:15, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7459, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3282, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7459, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3282, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0742, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5436, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3503, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5436, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3503, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 641/890 [3:03:34<1:11:50, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0748, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5473, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0748, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5473, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6221, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5694, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3187, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5694, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3187, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 642/890 [3:03:52<1:11:21, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0848, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2726, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0848, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2726, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3575, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7157, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2842, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7157, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2842, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 643/890 [3:04:09<1:11:28, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2979, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2979, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2302, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0161, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3271, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0161, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3271, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 644/890 [3:04:27<1:11:16, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9955, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3938, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9955, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3938, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3893, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2584, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2584, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  72%|███████▏  | 645/890 [3:04:44<1:10:44, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3074, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3074, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1622, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2582, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2582, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 646/890 [3:05:01<1:10:08, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3414, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4136, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3414, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4136, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7551, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0812, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0812, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2908, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 647/890 [3:05:18<1:09:52, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7362, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7362, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6971, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0911, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3253, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0911, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3253, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 648/890 [3:05:36<1:10:12, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9238, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2977, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9238, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2977, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2215, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8018, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2872, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8018, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2872, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 649/890 [3:05:54<1:10:15, 17.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2721, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2990, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2721, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2990, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5711, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1727, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2551, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1727, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2551, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 650/890 [3:06:11<1:09:37, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0923, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4301, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0923, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4301, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5224, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3379, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3379, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 651/890 [3:06:28<1:09:26, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2222, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2613, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2222, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2613, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4835, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6966, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3318, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6966, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3318, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 652/890 [3:06:46<1:09:29, 17.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3000, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3000, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0451, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7293, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2951, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7293, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2951, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 653/890 [3:07:03<1:08:51, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4020, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3859, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4020, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3859, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7879, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5541, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3263, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5541, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3263, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  73%|███████▎  | 654/890 [3:07:20<1:08:02, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3472, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3472, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0718, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6415, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3518, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6415, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3518, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▎  | 655/890 [3:07:37<1:07:37, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7463, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2821, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7463, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2821, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0284, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3634, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3634, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▎  | 656/890 [3:07:55<1:07:25, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6929, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2981, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6929, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2981, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9909, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5100, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3256, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5100, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3256, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▍  | 657/890 [3:08:12<1:06:51, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1752, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3500, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1752, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3500, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5252, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5889, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2717, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5889, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2717, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▍  | 658/890 [3:08:29<1:06:34, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3219, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5274, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3219, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5274, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8493, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6478, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3565, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6478, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3565, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▍  | 659/890 [3:08:46<1:06:05, 17.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2350, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2928, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2350, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2928, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5278, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2766, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2766, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▍  | 660/890 [3:09:04<1:06:15, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2192, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3196, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2192, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3196, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5387, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4650, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2952, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4650, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2952, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▍  | 661/890 [3:09:21<1:05:35, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3217, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3217, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1622, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4519, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3569, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4519, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3569, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▍  | 662/890 [3:09:38<1:05:19, 17.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9357, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3045, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9357, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2401, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3121, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3121, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  74%|███████▍  | 663/890 [3:09:55<1:04:53, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5617, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3253, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5617, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3253, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8869, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3293, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3293, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  75%|███████▍  | 664/890 [3:10:12<1:05:01, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0591, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2904, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0591, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2904, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3496, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5529, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5529, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  75%|███████▍  | 665/890 [3:10:29<1:04:35, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7753, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3117, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7753, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3117, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0870, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2996, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2996, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  75%|███████▍  | 666/890 [3:10:47<1:04:13, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1512, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4300, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1512, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4300, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5812, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6433, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2913, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6433, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2913, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  75%|███████▍  | 667/890 [3:11:04<1:04:08, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2911, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2911, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1947, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9212, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2869, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9212, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2869, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  75%|███████▌  | 668/890 [3:11:22<1:04:40, 17.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7163, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3224, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7163, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3224, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0386, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4431, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4339, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4431, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4339, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  75%|███████▌  | 669/890 [3:11:39<1:03:52, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3373, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3373, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2429, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3761, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3761, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  75%|███████▌  | 670/890 [3:11:56<1:03:15, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2712, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2712, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3162, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4170, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4481, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4170, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4481, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  75%|███████▌  | 671/890 [3:12:13<1:02:46, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3075, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3075, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2257, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4133, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2728, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4133, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2728, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▌  | 672/890 [3:12:30<1:02:37, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2860, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2421, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3784, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3784, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▌  | 673/890 [3:12:48<1:02:18, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2555, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3359, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2555, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3359, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5914, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6439, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6439, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▌  | 674/890 [3:13:05<1:01:46, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2984, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4707, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2984, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4707, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7691, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3538, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3538, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▌  | 675/890 [3:13:22<1:01:39, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5267, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5267, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9169, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6419, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3361, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6419, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3361, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▌  | 676/890 [3:13:39<1:01:36, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9400, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2704, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9400, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2704, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2104, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3464, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3464, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▌  | 677/890 [3:13:57<1:01:17, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0159, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2779, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0159, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2779, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2938, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3041, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3041, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▌  | 678/890 [3:14:14<1:00:58, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2788, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2788, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1981, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6303, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3254, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6303, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3254, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▋  | 679/890 [3:14:31<1:00:31, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8519, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4306, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8519, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4306, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2826, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8126, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3149, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8126, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3149, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  76%|███████▋  | 680/890 [3:14:49<1:00:54, 17.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3687, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2874, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3687, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2874, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6561, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3380, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3380, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 681/890 [3:15:06<1:00:14, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3812, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3812, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4024, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3485, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3485, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 682/890 [3:15:24<1:00:19, 17.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9578, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2675, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9578, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2675, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2253, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3412, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3412, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 683/890 [3:15:41<59:52, 17.35s/batch]  

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5032, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9278, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3663, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3663, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 684/890 [3:15:58<59:45, 17.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2658, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2658, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3086, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3814, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3376, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3814, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3376, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 685/890 [3:16:15<58:59, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3059, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2960, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3183, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3183, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 686/890 [3:16:32<58:36, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2735, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2735, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8474, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3097, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3097, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 687/890 [3:16:50<58:25, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5340, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2854, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5340, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2854, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8194, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6175, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2730, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6175, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2730, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 688/890 [3:17:07<58:14, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6187, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6187, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2844, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9031, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3409, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3409, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  77%|███████▋  | 689/890 [3:17:25<58:09, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5126, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2742, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5126, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2742, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7868, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6419, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2815, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6419, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2815, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 690/890 [3:17:42<57:43, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3832, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3832, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0893, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2706, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2706, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 691/890 [3:17:59<57:33, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5811, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5811, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2633, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3056, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3056, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 692/890 [3:18:17<57:10, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3951, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3028, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3951, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6980, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5668, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2793, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5668, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2793, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 693/890 [3:18:34<56:43, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6968, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6641, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6968, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6641, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3610, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8439, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3385, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8439, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3385, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 694/890 [3:18:51<56:13, 17.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6343, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2842, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6343, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2842, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9186, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3770, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3770, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 695/890 [3:19:08<56:13, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0495, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4363, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0495, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4363, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4857, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8372, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3046, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8372, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 696/890 [3:19:25<55:42, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7982, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3812, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7982, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3812, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1794, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8035, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3440, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8035, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3440, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 697/890 [3:19:43<55:28, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4017, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8786, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3265, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3265, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  78%|███████▊  | 698/890 [3:20:00<55:09, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1170, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3912, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1170, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3912, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5083, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3398, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3398, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▊  | 699/890 [3:20:18<55:20, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9670, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6037, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9670, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5707, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2796, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2796, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▊  | 700/890 [3:20:35<55:15, 17.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4367, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4367, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1007, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7756, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2827, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7756, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2827, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▉  | 701/890 [3:20:52<54:42, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6944, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2943, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6944, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2943, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9887, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2835, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▉  | 702/890 [3:21:09<54:13, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5187, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5221, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5187, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5221, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0408, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3181, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3181, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▉  | 703/890 [3:21:27<53:42, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5341, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4214, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5341, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4214, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9555, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▉  | 704/890 [3:21:44<53:57, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3156, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3156, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6870, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7682, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2668, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7682, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2668, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▉  | 705/890 [3:22:01<53:19, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5539, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.6960, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5539, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.6960, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2499, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8167, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3080, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8167, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3080, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▉  | 706/890 [3:22:19<52:59, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3894, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3894, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7459, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3303, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3303, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  79%|███████▉  | 707/890 [3:22:36<52:51, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3211, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3211, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9260, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5524, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2889, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5524, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2889, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|███████▉  | 708/890 [3:22:53<52:23, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2862, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2862, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8808, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7730, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2740, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7730, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2740, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|███████▉  | 709/890 [3:23:10<51:46, 17.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2980, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2980, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8704, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3468, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3468, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|███████▉  | 710/890 [3:23:27<51:31, 17.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3935, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2938, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3935, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2938, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6873, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0065, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3527, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0065, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3527, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|███████▉  | 711/890 [3:23:44<51:06, 17.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4923, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4923, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0479, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2799, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2799, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|████████  | 712/890 [3:24:02<51:18, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4890, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3711, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4890, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3711, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8601, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9943, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3403, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9943, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3403, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|████████  | 713/890 [3:24:19<50:49, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4894, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4267, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4894, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4267, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9161, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3286, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3286, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|████████  | 714/890 [3:24:37<50:43, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3294, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3294, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6546, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2813, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2813, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|████████  | 715/890 [3:24:54<50:26, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5481, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2994, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5481, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2994, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8735, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2805, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8735, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2805, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  80%|████████  | 716/890 [3:25:11<50:19, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3034, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1643, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3204, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3204, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████  | 717/890 [3:25:29<50:14, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2978, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2978, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7223, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6168, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3481, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6168, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3481, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████  | 718/890 [3:25:46<49:43, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3178, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3178, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9191, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4026, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4026, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3007, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████  | 719/890 [3:26:03<49:18, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6694, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4566, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6694, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4566, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1261, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8292, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2737, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8292, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2737, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████  | 720/890 [3:26:21<49:10, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8899, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3980, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8899, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3980, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2879, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3048, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3048, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████  | 721/890 [3:26:38<48:45, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3466, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3466, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9031, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7260, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2645, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7260, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2645, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████  | 722/890 [3:26:55<48:24, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7492, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5659, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7492, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5659, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3151, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2882, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2882, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████  | 723/890 [3:27:12<47:59, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7575, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5501, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7575, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5501, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3076, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2981, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2981, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████▏ | 724/890 [3:27:30<47:59, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6420, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2980, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6420, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2980, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9399, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2593, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2593, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  81%|████████▏ | 725/890 [3:27:47<47:32, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6684, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3375, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6684, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3375, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0059, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7052, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7052, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4008, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 726/890 [3:28:04<47:10, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4730, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3958, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4730, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3958, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8689, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8108, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3752, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8108, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3752, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 727/890 [3:28:21<46:47, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6289, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3069, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6289, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3069, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9358, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3537, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3537, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 728/890 [3:28:39<46:48, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5551, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2839, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5551, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2839, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8390, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6599, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2869, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6599, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2869, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 729/890 [3:28:57<46:44, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3045, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8486, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3171, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 730/890 [3:29:14<46:31, 17.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3729, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3729, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0817, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7273, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3267, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7273, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3267, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 731/890 [3:29:32<46:31, 17.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3365, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3365, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1951, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2822, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2822, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 732/890 [3:29:50<46:16, 17.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2789, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2789, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6932, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4302, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2779, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4302, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2779, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 733/890 [3:30:07<45:55, 17.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3418, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3418, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9590, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3098, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3098, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  82%|████████▏ | 734/890 [3:30:24<45:23, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3249, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3249, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9373, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2783, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2773, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2783, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2773, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 735/890 [3:30:41<44:51, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5758, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3378, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5758, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3378, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9136, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4195, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3229, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4195, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3229, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 736/890 [3:30:59<44:30, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5773, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(1.2682, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5773, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.2682, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8455, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3326, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3326, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 737/890 [3:31:16<44:05, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7202, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.9332, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7202, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.9332, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.6535, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3199, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3199, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 738/890 [3:31:33<43:55, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3247, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3247, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0670, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2985, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2985, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 739/890 [3:31:51<43:35, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3133, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3133, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4278, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5461, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3555, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5461, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3555, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 740/890 [3:32:08<43:15, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6753, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3064, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6753, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9817, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2903, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2903, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 741/890 [3:32:25<42:54, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3578, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6080, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3578, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6080, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9658, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2797, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2797, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 742/890 [3:32:42<42:29, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4547, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4547, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0150, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3486, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3486, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  83%|████████▎ | 743/890 [3:33:00<42:24, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6176, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2967, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6176, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2967, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9143, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2835, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▎ | 744/890 [3:33:17<42:03, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5070, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2648, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5070, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2648, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7718, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8307, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2820, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8307, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2820, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▎ | 745/890 [3:33:34<41:41, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3055, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7995, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9035, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2917, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9035, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2917, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▍ | 746/890 [3:33:51<41:24, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2532, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2532, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7874, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6301, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2527, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6301, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2527, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▍ | 747/890 [3:34:09<41:18, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2568, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2568, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8012, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3044, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3044, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▍ | 748/890 [3:34:26<41:06, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8926, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8926, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1975, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0299, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3761, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0299, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3761, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▍ | 749/890 [3:34:44<40:42, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6146, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3316, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6146, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3316, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9463, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7880, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3050, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7880, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3050, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▍ | 750/890 [3:35:01<40:14, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4289, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6171, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4289, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0460, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2766, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2766, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▍ | 751/890 [3:35:18<39:50, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3413, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3413, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5555, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3513, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3513, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  84%|████████▍ | 752/890 [3:35:35<39:40, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4298, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4298, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9702, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7767, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3880, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7767, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3880, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  85%|████████▍ | 753/890 [3:35:52<39:21, 17.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6260, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6260, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5765, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3127, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3127, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  85%|████████▍ | 754/890 [3:36:09<39:01, 17.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6597, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3909, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6597, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3909, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0506, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2969, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2969, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  85%|████████▍ | 755/890 [3:36:27<38:54, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3460, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3460, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8508, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5495, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3152, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5495, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3152, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  85%|████████▍ | 756/890 [3:36:45<38:53, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3885, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3110, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3885, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3110, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6995, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5920, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3222, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5920, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3222, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  85%|████████▌ | 757/890 [3:37:02<38:46, 17.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1707, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1707, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4716, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2604, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2604, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  85%|████████▌ | 758/890 [3:37:19<38:14, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5235, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3396, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5235, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3396, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8631, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5155, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2899, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5155, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2899, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  85%|████████▌ | 759/890 [3:37:37<37:48, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5082, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5082, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1116, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3170, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3170, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  85%|████████▌ | 760/890 [3:37:54<37:45, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3897, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3897, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3444, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3381, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3381, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▌ | 761/890 [3:38:12<37:23, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3554, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3908, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3554, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3908, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7463, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3428, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3428, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▌ | 762/890 [3:38:29<36:57, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3353, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3353, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9707, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2656, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2656, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▌ | 763/890 [3:38:46<36:33, 17.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6164, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3284, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6164, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3284, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9448, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3303, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3303, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▌ | 764/890 [3:39:04<36:29, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4961, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3693, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4961, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3693, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8654, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7223, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2521, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7223, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2521, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▌ | 765/890 [3:39:21<36:15, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5618, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2994, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5618, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2994, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8611, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3251, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3251, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▌ | 766/890 [3:39:38<35:51, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4611, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2578, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4611, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2578, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7189, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4787, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2785, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4787, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2785, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▌ | 767/890 [3:39:56<35:33, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6617, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6617, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0628, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5266, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2912, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5266, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2912, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▋ | 768/890 [3:40:13<35:26, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3563, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3563, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7747, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6203, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2718, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6203, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2718, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  86%|████████▋ | 769/890 [3:40:30<35:03, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3486, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3486, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7723, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8801, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3035, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8801, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 770/890 [3:40:48<34:46, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4781, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4781, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1573, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4447, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3842, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4447, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3842, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 771/890 [3:41:05<34:24, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1539, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6365, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1539, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6365, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7905, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1606, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3366, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1606, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3366, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 772/890 [3:41:22<34:04, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6412, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3608, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6412, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3608, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0020, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4036, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 773/890 [3:41:40<33:45, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4408, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5250, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4408, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5250, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9658, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3169, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3169, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 774/890 [3:41:57<33:29, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2772, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2772, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0339, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5080, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3383, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5080, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3383, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 775/890 [3:42:14<33:04, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2762, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2762, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8741, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2992, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2992, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 776/890 [3:42:32<32:51, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6438, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3217, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6438, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3217, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9655, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4961, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2561, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4961, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2561, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 777/890 [3:42:49<32:32, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2845, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2845, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2804, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6717, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3146, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6717, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3146, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  87%|████████▋ | 778/890 [3:43:06<32:23, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3447, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3447, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8605, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6348, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3278, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6348, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3278, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 779/890 [3:43:23<31:58, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8789, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3868, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3080, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3868, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3080, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 780/890 [3:43:41<31:45, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7555, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2678, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7555, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2678, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0233, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5911, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2506, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5911, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2506, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 781/890 [3:43:58<31:21, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3545, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3545, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2350, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2844, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2844, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 782/890 [3:44:15<31:02, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2848, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2848, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8678, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2591, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2591, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 783/890 [3:44:32<30:43, 17.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1693, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3133, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1693, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3133, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4827, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3409, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3409, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 784/890 [3:44:50<30:36, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3165, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3165, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4220, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3128, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3128, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 785/890 [3:45:08<30:30, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8081, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2492, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8081, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2492, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0573, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8396, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3434, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8396, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3434, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 786/890 [3:45:25<30:18, 17.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3099, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3099, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9853, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3262, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3262, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  88%|████████▊ | 787/890 [3:45:43<29:58, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8483, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3338, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8483, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3338, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1821, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5718, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2976, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5718, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2976, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▊ | 788/890 [3:46:00<29:48, 17.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7280, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3101, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7280, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3101, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0381, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2987, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2987, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▊ | 789/890 [3:46:18<29:24, 17.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5676, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2449, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5676, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2449, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2640, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2640, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▉ | 790/890 [3:46:35<29:00, 17.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7400, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7400, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0404, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3313, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3313, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▉ | 791/890 [3:46:52<28:39, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7890, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3489, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7890, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3489, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1379, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3094, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3094, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▉ | 792/890 [3:47:09<28:18, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3713, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3713, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0716, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9502, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2673, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9502, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2673, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▉ | 793/890 [3:47:27<27:58, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3043, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4376, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4387, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3424, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4387, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3424, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▉ | 794/890 [3:47:44<27:41, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4391, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4391, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0754, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3552, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3552, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▉ | 795/890 [3:48:01<27:26, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8350, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8350, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4171, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2521, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9334, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3322, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9334, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3322, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  89%|████████▉ | 796/890 [3:48:19<27:14, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3136, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3136, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4898, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3482, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3482, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|████████▉ | 797/890 [3:48:36<27:03, 17.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4247, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4247, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0928, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7698, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3074, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7698, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|████████▉ | 798/890 [3:48:54<26:43, 17.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7813, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7813, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2835, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0649, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9029, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3284, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9029, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3284, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|████████▉ | 799/890 [3:49:11<26:23, 17.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8679, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3381, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8679, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3381, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2061, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5829, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3085, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5829, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3085, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|████████▉ | 800/890 [3:49:29<26:07, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3659, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6922, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3659, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6922, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0581, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3276, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3276, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|█████████ | 801/890 [3:49:46<25:46, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2652, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2652, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4447, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8900, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2780, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8900, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2780, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|█████████ | 802/890 [3:50:03<25:28, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3518, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3518, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2933, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2933, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|█████████ | 803/890 [3:50:21<25:19, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8330, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4792, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8330, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4792, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7241, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2648, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7241, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2648, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|█████████ | 804/890 [3:50:39<25:09, 17.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6649, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4143, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6649, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4143, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0792, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6865, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2874, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6865, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2874, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  90%|█████████ | 805/890 [3:50:56<24:43, 17.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4959, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4959, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8642, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2790, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2790, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████ | 806/890 [3:51:13<24:19, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4256, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4256, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9812, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7973, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2718, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7973, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2718, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████ | 807/890 [3:51:31<24:03, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3097, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3097, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1362, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3082, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3082, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████ | 808/890 [3:51:48<23:48, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5368, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3349, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5368, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3349, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8717, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3268, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3268, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████ | 809/890 [3:52:05<23:30, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4593, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4593, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1564, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2795, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2795, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████ | 810/890 [3:52:23<23:16, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6712, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4928, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6712, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4928, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1641, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2675, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2675, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████ | 811/890 [3:52:40<22:54, 17.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6442, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2583, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6442, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2583, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9025, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6089, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3235, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6089, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3235, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████ | 812/890 [3:52:58<22:43, 17.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4496, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4496, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9508, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9341, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3073, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9341, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3073, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████▏| 813/890 [3:53:15<22:16, 17.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8366, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3897, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8366, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3897, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2263, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6302, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2738, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6302, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2738, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  91%|█████████▏| 814/890 [3:53:32<22:01, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7236, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4698, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7236, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4698, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1934, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3265, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3265, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 815/890 [3:53:50<21:41, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0117, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0117, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6137, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6801, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3800, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6801, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3800, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 816/890 [3:54:07<21:25, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3728, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3180, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3728, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3180, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6908, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5691, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3114, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5691, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3114, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 817/890 [3:54:25<21:13, 17.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3038, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8349, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7319, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2892, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7319, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2892, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 818/890 [3:54:42<20:53, 17.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3748, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3748, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8721, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7672, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3365, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7672, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3365, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 819/890 [3:55:00<20:39, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8346, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3263, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8346, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3263, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1609, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6051, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2491, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6051, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2491, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 820/890 [3:55:18<20:30, 17.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3215, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3215, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2448, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4623, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2639, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4623, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2639, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 821/890 [3:55:35<20:11, 17.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3287, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3287, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8100, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3575, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8100, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3575, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 822/890 [3:55:52<19:47, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3756, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3144, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3756, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3144, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6901, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2829, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2829, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  92%|█████████▏| 823/890 [3:56:09<19:20, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5501, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5501, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1105, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7264, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2937, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7264, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2937, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 824/890 [3:56:27<19:05, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3146, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3146, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0156, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5734, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3042, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5734, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 825/890 [3:56:44<18:44, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3799, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3799, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8996, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9524, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2554, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9524, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2554, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 826/890 [3:57:02<18:44, 17.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5861, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3780, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5861, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3780, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9641, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2553, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2553, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 827/890 [3:57:19<18:22, 17.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3135, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3135, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8307, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2028, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3851, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2028, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3851, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 828/890 [3:57:37<18:10, 17.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3428, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3428, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1485, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2750, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2750, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 829/890 [3:57:54<17:47, 17.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3415, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3415, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6557, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7734, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2562, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7734, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2562, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 830/890 [3:58:12<17:22, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2672, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2672, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8841, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8510, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3154, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8510, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3154, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 831/890 [3:58:29<17:02, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6927, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6927, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8310, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7303, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2553, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7303, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2553, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  93%|█████████▎| 832/890 [3:58:46<16:43, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0472, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4124, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0472, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4124, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4597, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3267, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3267, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▎| 833/890 [3:59:03<16:26, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3104, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3104, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9222, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8721, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2751, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8721, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2751, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▎| 834/890 [3:59:21<16:08, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.4897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5706, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.4897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5706, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(6.0603, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4411, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2743, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4411, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2743, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▍| 835/890 [3:59:38<15:50, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4481, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.8423, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4481, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8423, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2904, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4815, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3216, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4815, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3216, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▍| 836/890 [3:59:56<15:39, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.4147, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2787, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4147, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2787, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6934, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9045, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2911, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9045, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2911, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▍| 837/890 [4:00:13<15:18, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3764, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3764, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0656, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3383, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3383, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▍| 838/890 [4:00:30<14:58, 17.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7316, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3000, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7316, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3000, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0316, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1506, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3633, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1506, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3633, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▍| 839/890 [4:00:47<14:37, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3152, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3152, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7206, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0219, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3425, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0219, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3425, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▍| 840/890 [4:01:04<14:25, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4048, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0256, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9207, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2881, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9207, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2881, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  94%|█████████▍| 841/890 [4:01:22<14:09, 17.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2826, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5608, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2826, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5608, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8434, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3344, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3344, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  95%|█████████▍| 842/890 [4:01:39<13:52, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4907, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3716, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4907, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3716, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8623, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1467, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2875, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1467, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2875, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  95%|█████████▍| 843/890 [4:01:56<13:33, 17.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3765, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3765, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3646, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3439, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3439, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  95%|█████████▍| 844/890 [4:02:14<13:22, 17.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4063, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0154, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3646, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3477, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3646, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3477, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  95%|█████████▍| 845/890 [4:02:31<13:01, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3390, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3390, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6098, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3134, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6098, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3134, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  95%|█████████▌| 846/890 [4:02:48<12:40, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2999, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7055, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2999, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0054, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8122, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2785, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8122, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2785, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  95%|█████████▌| 847/890 [4:03:06<12:21, 17.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3141, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4224, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3141, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4224, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7366, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3291, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3291, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  95%|█████████▌| 848/890 [4:03:24<12:13, 17.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4479, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4479, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0163, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3323, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3323, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  95%|█████████▌| 849/890 [4:03:41<11:53, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6160, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5044, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6160, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1204, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3089, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3089, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▌| 850/890 [4:03:58<11:33, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0878, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6466, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0878, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6466, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7344, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0669, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2971, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0669, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2971, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▌| 851/890 [4:04:16<11:18, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7866, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5893, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7866, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5893, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3759, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2950, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2950, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▌| 852/890 [4:04:33<11:01, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4631, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4631, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4241, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8291, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2707, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8291, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2707, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▌| 853/890 [4:04:50<10:39, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3630, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3630, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4001, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3453, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3453, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▌| 854/890 [4:05:07<10:22, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.6651, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6651, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2638, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8304, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2987, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8304, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2987, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▌| 855/890 [4:05:25<10:05, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4713, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4713, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1623, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7304, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3052, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7304, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▌| 856/890 [4:05:42<09:47, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2954, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3036, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2954, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5990, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2789, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2789, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▋| 857/890 [4:05:59<09:27, 17.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7225, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3203, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7225, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3203, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0428, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5357, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3155, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5357, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3155, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  96%|█████████▋| 858/890 [4:06:17<09:16, 17.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3726, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3706, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3726, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3706, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7433, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3175, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3175, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 859/890 [4:06:34<08:56, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5479, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3676, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5479, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3676, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9155, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3518, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3518, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 860/890 [4:06:52<08:42, 17.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3680, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3680, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2803, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2911, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2911, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 861/890 [4:07:09<08:28, 17.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7487, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3236, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7487, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3236, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0723, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3173, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3173, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 862/890 [4:07:26<08:06, 17.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5874, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2968, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5874, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2968, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8842, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8139, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3345, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8139, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3345, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 863/890 [4:07:43<07:46, 17.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6190, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7615, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6190, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7615, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3805, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0886, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3089, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0886, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3089, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 864/890 [4:08:01<07:35, 17.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6628, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3559, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6628, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3559, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0187, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5960, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2599, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5960, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2599, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 865/890 [4:08:19<07:15, 17.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5146, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3425, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5146, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3425, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8571, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0222, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3262, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0222, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3262, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 866/890 [4:08:36<06:56, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3510, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3510, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3765, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7898, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3135, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7898, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3135, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  97%|█████████▋| 867/890 [4:08:53<06:39, 17.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3023, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9742, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3009, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 868/890 [4:09:11<06:24, 17.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1473, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5206, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1473, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5206, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6679, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2737, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2737, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 869/890 [4:09:28<06:05, 17.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3562, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3562, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8286, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5380, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2962, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5380, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2962, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 870/890 [4:09:45<05:45, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4086, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4086, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8648, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6069, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3252, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6069, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3252, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 871/890 [4:10:03<05:28, 17.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4210, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4210, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9252, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3090, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3090, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 872/890 [4:10:20<05:13, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.7620, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7620, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.5375, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4460, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3645, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4460, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3645, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 873/890 [4:10:37<04:54, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3477, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3477, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0602, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3236, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3236, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 874/890 [4:10:55<04:37, 17.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2471, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4114, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2471, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4114, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6585, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.3898, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3683, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3898, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3683, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 875/890 [4:11:12<04:21, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3985, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3985, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9969, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4400, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3158, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4400, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3158, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  98%|█████████▊| 876/890 [4:11:30<04:06, 17.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2917, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3327, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2917, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3327, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6244, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3457, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3457, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▊| 877/890 [4:11:48<03:48, 17.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2097, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3788, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2097, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3788, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5884, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6711, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6711, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▊| 878/890 [4:12:05<03:28, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6528, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4535, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6528, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4535, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1063, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2505, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2505, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▉| 879/890 [4:12:22<03:11, 17.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5130, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3274, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5130, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3274, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8404, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2716, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2716, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▉| 880/890 [4:12:40<02:55, 17.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4288, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3668, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4288, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3668, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7956, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6334, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2734, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6334, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2734, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▉| 881/890 [4:12:58<02:37, 17.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3288, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3288, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7417, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3133, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3032, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3133, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▉| 882/890 [4:13:15<02:20, 17.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3129, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3129, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0666, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6267, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2432, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6267, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2432, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▉| 883/890 [4:13:33<02:02, 17.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5758, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4655, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5758, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4655, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0413, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▉| 884/890 [4:13:50<01:44, 17.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2570, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3896, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2570, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3896, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6467, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2636, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2636, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14):  99%|█████████▉| 885/890 [4:14:07<01:27, 17.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3414, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3414, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6837, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5310, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3448, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5310, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3448, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14): 100%|█████████▉| 886/890 [4:14:24<01:09, 17.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8253, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2953, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8253, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2953, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1206, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1318, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3147, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1318, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3147, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14): 100%|█████████▉| 887/890 [4:14:41<00:51, 17.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2690, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2690, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8743, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8432, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3414, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8432, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3414, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14): 100%|█████████▉| 888/890 [4:14:58<00:34, 17.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3162, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3162, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5742, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3029, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14): 100%|█████████▉| 889/890 [4:15:15<00:17, 17.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4916, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3196, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4916, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3196, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8113, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2742, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2742, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 14/14): 100%|██████████| 890/890 [4:15:33<00:00, 17.23s/batch]
Validating:   1%|          | 1/126 [00:06<13:06,  6.29s/batch]

Validation dice loss per batch: 0.3282712697982788


Validating:   2%|▏         | 2/126 [00:06<05:52,  2.84s/batch]

Validation dice loss per batch: 0.2564297020435333


Validating:   2%|▏         | 3/126 [00:11<07:12,  3.51s/batch]

Validation dice loss per batch: 0.4380500316619873


Validating:   3%|▎         | 4/126 [00:11<04:39,  2.29s/batch]

Validation dice loss per batch: 0.6628406643867493


Validating:   4%|▍         | 5/126 [00:16<06:17,  3.12s/batch]

Validation dice loss per batch: 0.30660873651504517


Validating:   5%|▍         | 6/126 [00:16<04:23,  2.20s/batch]

Validation dice loss per batch: 0.2966401278972626


Validating:   6%|▌         | 7/126 [00:20<05:52,  2.96s/batch]

Validation dice loss per batch: 0.28690415620803833


Validating:   6%|▋         | 8/126 [00:21<04:13,  2.15s/batch]

Validation dice loss per batch: 0.16402947902679443


Validating:   7%|▋         | 9/126 [00:26<05:44,  2.95s/batch]

Validation dice loss per batch: 0.2871239185333252


Validating:   8%|▊         | 10/126 [00:26<04:11,  2.16s/batch]

Validation dice loss per batch: 0.43682628870010376


Validating:   9%|▊         | 11/126 [00:30<05:26,  2.84s/batch]

Validation dice loss per batch: 0.4439983665943146


Validating:  10%|▉         | 12/126 [00:31<04:00,  2.11s/batch]

Validation dice loss per batch: 0.18914794921875


Validating:  10%|█         | 13/126 [00:35<05:17,  2.81s/batch]

Validation dice loss per batch: 0.5280652046203613


Validating:  11%|█         | 14/126 [00:36<03:57,  2.12s/batch]

Validation dice loss per batch: 0.23620891571044922


Validating:  12%|█▏        | 15/126 [00:41<05:46,  3.12s/batch]

Validation dice loss per batch: 0.26152119040489197


Validating:  13%|█▎        | 16/126 [00:42<04:20,  2.37s/batch]

Validation dice loss per batch: 0.21850433945655823


Validating:  13%|█▎        | 17/126 [00:47<06:03,  3.33s/batch]

Validation dice loss per batch: 0.3521226644515991


Validating:  14%|█▍        | 18/126 [00:48<04:25,  2.46s/batch]

Validation dice loss per batch: 0.4303104281425476


Validating:  15%|█▌        | 19/126 [00:53<05:37,  3.15s/batch]

Validation dice loss per batch: 0.22386571764945984


Validating:  16%|█▌        | 20/126 [00:53<04:06,  2.33s/batch]

Validation dice loss per batch: 0.2179936021566391


Validating:  17%|█▋        | 21/126 [00:59<06:08,  3.51s/batch]

Validation dice loss per batch: 0.30306074023246765


Validating:  17%|█▋        | 22/126 [01:00<04:28,  2.58s/batch]

Validation dice loss per batch: 0.25601112842559814


Validating:  18%|█▊        | 23/126 [01:05<05:49,  3.40s/batch]

Validation dice loss per batch: 0.7338079214096069


Validating:  19%|█▉        | 24/126 [01:05<04:15,  2.50s/batch]

Validation dice loss per batch: 0.9356080889701843


Validating:  20%|█▉        | 25/126 [01:10<05:11,  3.09s/batch]

Validation dice loss per batch: 0.543136477470398


Validating:  21%|██        | 26/126 [01:10<03:48,  2.29s/batch]

Validation dice loss per batch: 0.36978599429130554


Validating:  21%|██▏       | 27/126 [01:16<05:23,  3.27s/batch]

Validation dice loss per batch: 0.2740188241004944


Validating:  22%|██▏       | 28/126 [01:16<03:56,  2.41s/batch]

Validation dice loss per batch: 0.2792717218399048


Validating:  23%|██▎       | 29/126 [01:21<05:03,  3.12s/batch]

Validation dice loss per batch: 0.27605849504470825


Validating:  24%|██▍       | 30/126 [01:21<03:42,  2.31s/batch]

Validation dice loss per batch: 0.3621658980846405


Validating:  25%|██▍       | 31/126 [01:26<04:39,  2.95s/batch]

Validation dice loss per batch: 0.30340057611465454


Validating:  25%|██▌       | 32/126 [01:26<03:25,  2.19s/batch]

Validation dice loss per batch: 0.24101322889328003


Validating:  26%|██▌       | 33/126 [01:32<04:51,  3.14s/batch]

Validation dice loss per batch: 0.25542816519737244


Validating:  27%|██▋       | 34/126 [01:32<03:33,  2.32s/batch]

Validation dice loss per batch: 0.3004602789878845


Validating:  28%|██▊       | 35/126 [01:37<04:33,  3.01s/batch]

Validation dice loss per batch: 0.4221266210079193


Validating:  29%|██▊       | 36/126 [01:37<03:20,  2.23s/batch]

Validation dice loss per batch: 0.5667368173599243


Validating:  29%|██▉       | 37/126 [01:41<04:12,  2.84s/batch]

Validation dice loss per batch: 0.3102909028530121


Validating:  30%|███       | 38/126 [01:42<03:06,  2.11s/batch]

Validation dice loss per batch: 0.42096447944641113


Validating:  31%|███       | 39/126 [01:47<04:31,  3.12s/batch]

Validation dice loss per batch: 0.350353866815567


Validating:  32%|███▏      | 40/126 [01:48<03:18,  2.31s/batch]

Validation dice loss per batch: 0.542613685131073


Validating:  33%|███▎      | 41/126 [01:54<04:58,  3.51s/batch]

Validation dice loss per batch: 0.36804112792015076


Validating:  33%|███▎      | 42/126 [01:54<03:37,  2.59s/batch]

Validation dice loss per batch: 0.536990225315094


Validating:  34%|███▍      | 43/126 [01:59<04:18,  3.11s/batch]

Validation dice loss per batch: 0.4615538716316223


Validating:  35%|███▍      | 44/126 [01:59<03:09,  2.31s/batch]

Validation dice loss per batch: 0.513593316078186


Validating:  36%|███▌      | 45/126 [02:04<04:01,  2.98s/batch]

Validation dice loss per batch: 0.28734302520751953


Validating:  37%|███▋      | 46/126 [02:04<02:57,  2.22s/batch]

Validation dice loss per batch: 0.23297801613807678


Validating:  37%|███▋      | 47/126 [02:09<03:51,  2.93s/batch]

Validation dice loss per batch: 0.4154513478279114


Validating:  38%|███▊      | 48/126 [02:09<02:49,  2.18s/batch]

Validation dice loss per batch: 0.5786483883857727


Validating:  39%|███▉      | 49/126 [02:14<03:42,  2.89s/batch]

Validation dice loss per batch: 0.3670944273471832


Validating:  40%|███▉      | 50/126 [02:14<02:43,  2.15s/batch]

Validation dice loss per batch: 0.2809397876262665


Validating:  40%|████      | 51/126 [02:19<03:35,  2.87s/batch]

Validation dice loss per batch: 0.32874447107315063


Validating:  41%|████▏     | 52/126 [02:19<02:38,  2.14s/batch]

Validation dice loss per batch: 0.30312198400497437


Validating:  42%|████▏     | 53/126 [02:24<03:30,  2.88s/batch]

Validation dice loss per batch: 0.5072320103645325


Validating:  43%|████▎     | 54/126 [02:24<02:34,  2.14s/batch]

Validation dice loss per batch: 1.269230842590332


Validating:  44%|████▎     | 55/126 [02:29<03:19,  2.81s/batch]

Validation dice loss per batch: 1.0197547674179077


Validating:  44%|████▍     | 56/126 [02:29<02:26,  2.10s/batch]

Validation dice loss per batch: 0.25747138261795044


Validating:  45%|████▌     | 57/126 [02:34<03:25,  2.97s/batch]

Validation dice loss per batch: 0.3786945343017578


Validating:  46%|████▌     | 58/126 [02:34<02:30,  2.21s/batch]

Validation dice loss per batch: 0.5656900405883789


Validating:  47%|████▋     | 59/126 [02:40<03:46,  3.38s/batch]

Validation dice loss per batch: 0.20597809553146362


Validating:  48%|████▊     | 60/126 [02:41<02:44,  2.49s/batch]

Validation dice loss per batch: 0.15321418642997742


Validating:  48%|████▊     | 61/126 [02:46<03:25,  3.17s/batch]

Validation dice loss per batch: 0.2795441150665283


Validating:  49%|████▉     | 62/126 [02:46<02:29,  2.34s/batch]

Validation dice loss per batch: 0.7667902708053589


Validating:  50%|█████     | 63/126 [02:52<03:32,  3.37s/batch]

Validation dice loss per batch: 0.3616544306278229


Validating:  51%|█████     | 64/126 [02:52<02:34,  2.49s/batch]

Validation dice loss per batch: 1.0774109363555908


Validating:  52%|█████▏    | 65/126 [02:58<03:36,  3.55s/batch]

Validation dice loss per batch: 0.684039831161499


Validating:  52%|█████▏    | 66/126 [02:59<02:36,  2.61s/batch]

Validation dice loss per batch: 0.8338580131530762


Validating:  53%|█████▎    | 67/126 [03:04<03:19,  3.38s/batch]

Validation dice loss per batch: 0.21457484364509583


Validating:  54%|█████▍    | 68/126 [03:04<02:24,  2.49s/batch]

Validation dice loss per batch: 0.2815746068954468


Validating:  55%|█████▍    | 69/126 [03:09<03:06,  3.27s/batch]

Validation dice loss per batch: 0.7076332569122314


Validating:  56%|█████▌    | 70/126 [03:10<02:15,  2.42s/batch]

Validation dice loss per batch: 0.8275551795959473


Validating:  56%|█████▋    | 71/126 [03:15<02:53,  3.15s/batch]

Validation dice loss per batch: 0.31102508306503296


Validating:  57%|█████▋    | 72/126 [03:15<02:05,  2.33s/batch]

Validation dice loss per batch: 0.1606057584285736


Validating:  58%|█████▊    | 73/126 [03:21<03:02,  3.44s/batch]

Validation dice loss per batch: 0.1648164540529251


Validating:  59%|█████▊    | 74/126 [03:22<02:11,  2.54s/batch]

Validation dice loss per batch: 0.1887277215719223


Validating:  60%|█████▉    | 75/126 [03:26<02:44,  3.23s/batch]

Validation dice loss per batch: 0.11817160248756409


Validating:  60%|██████    | 76/126 [03:27<01:59,  2.39s/batch]

Validation dice loss per batch: 0.4328695237636566


Validating:  61%|██████    | 77/126 [03:32<02:35,  3.17s/batch]

Validation dice loss per batch: 0.11189410835504532


Validating:  62%|██████▏   | 78/126 [03:32<01:52,  2.35s/batch]

Validation dice loss per batch: 0.17171254754066467


Validating:  63%|██████▎   | 79/126 [03:38<02:35,  3.30s/batch]

Validation dice loss per batch: 0.1597682535648346


Validating:  63%|██████▎   | 80/126 [03:38<01:52,  2.44s/batch]

Validation dice loss per batch: 0.11968548595905304


Validating:  64%|██████▍   | 81/126 [03:43<02:25,  3.23s/batch]

Validation dice loss per batch: 1.541668176651001


Validating:  65%|██████▌   | 82/126 [03:44<01:45,  2.39s/batch]

Validation dice loss per batch: 0.22001799941062927


Validating:  66%|██████▌   | 83/126 [03:49<02:18,  3.22s/batch]

Validation dice loss per batch: 0.25471949577331543


Validating:  67%|██████▋   | 84/126 [03:49<01:39,  2.38s/batch]

Validation dice loss per batch: 0.15231791138648987


Validating:  67%|██████▋   | 85/126 [03:54<02:04,  3.04s/batch]

Validation dice loss per batch: 0.25962454080581665


Validating:  68%|██████▊   | 86/126 [03:54<01:30,  2.26s/batch]

Validation dice loss per batch: 0.13460445404052734


Validating:  69%|██████▉   | 87/126 [04:00<02:02,  3.15s/batch]

Validation dice loss per batch: 0.12262102216482162


Validating:  70%|██████▉   | 88/126 [04:00<01:28,  2.33s/batch]

Validation dice loss per batch: 0.6640158295631409


Validating:  71%|███████   | 89/126 [04:05<01:51,  3.02s/batch]

Validation dice loss per batch: 0.1905970573425293


Validating:  71%|███████▏  | 90/126 [04:05<01:20,  2.24s/batch]

Validation dice loss per batch: 0.26677823066711426


Validating:  72%|███████▏  | 91/126 [04:12<02:03,  3.53s/batch]

Validation dice loss per batch: 0.635574460029602


Validating:  73%|███████▎  | 92/126 [04:12<01:28,  2.60s/batch]

Validation dice loss per batch: 0.26278308033943176


Validating:  74%|███████▍  | 93/126 [04:17<01:49,  3.31s/batch]

Validation dice loss per batch: 0.6365296840667725


Validating:  75%|███████▍  | 94/126 [04:17<01:18,  2.45s/batch]

Validation dice loss per batch: 0.2219424843788147


Validating:  75%|███████▌  | 95/126 [04:22<01:39,  3.22s/batch]

Validation dice loss per batch: 0.6275863647460938


Validating:  76%|███████▌  | 96/126 [04:23<01:11,  2.38s/batch]

Validation dice loss per batch: 0.26409921050071716


Validating:  77%|███████▋  | 97/126 [04:28<01:37,  3.35s/batch]

Validation dice loss per batch: 0.260152667760849


Validating:  78%|███████▊  | 98/126 [04:29<01:09,  2.47s/batch]

Validation dice loss per batch: 0.2624831795692444


Validating:  79%|███████▊  | 99/126 [04:34<01:27,  3.22s/batch]

Validation dice loss per batch: 0.12570521235466003


Validating:  79%|███████▉  | 100/126 [04:34<01:01,  2.38s/batch]

Validation dice loss per batch: 0.1114187091588974


Validating:  80%|████████  | 101/126 [04:39<01:19,  3.18s/batch]

Validation dice loss per batch: 0.2256198227405548


Validating:  81%|████████  | 102/126 [04:40<00:56,  2.36s/batch]

Validation dice loss per batch: 0.318433940410614


Validating:  82%|████████▏ | 103/126 [04:45<01:12,  3.15s/batch]

Validation dice loss per batch: 0.2426321804523468


Validating:  83%|████████▎ | 104/126 [04:45<00:51,  2.33s/batch]

Validation dice loss per batch: 0.7140673398971558


Validating:  83%|████████▎ | 105/126 [04:51<01:11,  3.42s/batch]

Validation dice loss per batch: 0.47397440671920776


Validating:  84%|████████▍ | 106/126 [04:52<00:52,  2.61s/batch]

Validation dice loss per batch: 0.1446041613817215


Validating:  85%|████████▍ | 107/126 [04:56<00:59,  3.13s/batch]

Validation dice loss per batch: 0.13180822134017944


Validating:  86%|████████▌ | 108/126 [04:57<00:44,  2.46s/batch]

Validation dice loss per batch: 0.34075239300727844


Validating:  87%|████████▋ | 109/126 [05:01<00:50,  2.99s/batch]

Validation dice loss per batch: 0.1593937873840332


Validating:  87%|████████▋ | 110/126 [05:03<00:40,  2.56s/batch]

Validation dice loss per batch: 0.25666916370391846


Validating:  88%|████████▊ | 111/126 [05:07<00:47,  3.14s/batch]

Validation dice loss per batch: 0.1142447292804718


Validating:  89%|████████▉ | 112/126 [05:08<00:34,  2.47s/batch]

Validation dice loss per batch: 0.49877917766571045


Validating:  90%|████████▉ | 113/126 [05:13<00:39,  3.02s/batch]

Validation dice loss per batch: 0.711448073387146


Validating:  90%|█████████ | 114/126 [05:14<00:29,  2.45s/batch]

Validation dice loss per batch: 0.8914986848831177


Validating:  91%|█████████▏| 115/126 [05:18<00:33,  3.07s/batch]

Validation dice loss per batch: 1.098677396774292


Validating:  92%|█████████▏| 116/126 [05:19<00:24,  2.45s/batch]

Validation dice loss per batch: 0.5727900266647339


Validating:  93%|█████████▎| 117/126 [05:24<00:28,  3.12s/batch]

Validation dice loss per batch: 0.5021151304244995


Validating:  94%|█████████▎| 118/126 [05:26<00:21,  2.74s/batch]

Validation dice loss per batch: 0.7148674726486206


Validating:  94%|█████████▍| 119/126 [05:30<00:21,  3.13s/batch]

Validation dice loss per batch: 0.4677051603794098


Validating:  95%|█████████▌| 120/126 [05:31<00:14,  2.50s/batch]

Validation dice loss per batch: 0.26426589488983154


Validating:  96%|█████████▌| 121/126 [05:35<00:15,  3.04s/batch]

Validation dice loss per batch: 0.4616297483444214


Validating:  97%|█████████▋| 122/126 [05:37<00:10,  2.62s/batch]

Validation dice loss per batch: 0.4244973957538605


Validating:  98%|█████████▊| 123/126 [05:42<00:09,  3.32s/batch]

Validation dice loss per batch: 0.689075231552124


Validating:  98%|█████████▊| 124/126 [05:43<00:05,  2.62s/batch]

Validation dice loss per batch: 0.3067951798439026


Validating:  99%|█████████▉| 125/126 [05:48<00:03,  3.41s/batch]

Validation dice loss per batch: 0.2982234060764313


Validating: 100%|██████████| 126/126 [05:50<00:00,  3.09s/batch]

Validation dice loss per batch: 0.8306617736816406


                                                                

------Final validation dice loss after epoch 14: 0.4022851288318634-------
Learning rate after epoch 14: 0.001
Model saved after epoch 14
Training completed.
