# Importing & Loading Dependencies

In [1]:
!pip install monai

import nibabel as nib
from monai.transforms import LoadImage, Compose, NormalizeIntensityd, RandFlipd, RandAdjustContrastd, Resized,  CropForegroundd, SpatialPadd
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from typing import Optional, Sequence, Tuple, Union
from torch.nn.functional import interpolate

from monai.networks.blocks.convolutions import Convolution
from monai.networks.layers.factories import Act, Norm
from monai.networks.layers.utils import get_act_layer, get_norm_layer
from monai.metrics import DiceMetric, HausdorffDistanceMetric

from torch import nn, optim, amp
from itertools import chain
from monai.losses import DiceLoss

from tqdm import tqdm
from pathlib import Path
import math
import os
import random

Collecting monai
  Downloading monai-1.4.0-py3-none-any.whl.metadata (11 kB)
Downloading monai-1.4.0-py3-none-any.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m18.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: monai
Successfully installed monai-1.4.0


# Creating Dataset with Preprocessing

In [2]:
class CustomDataset3D(Dataset):
    def __init__(self, data_dirs, patient_lists, mode):
        self.data_dirs = data_dirs
        self.patient_lists = patient_lists
        self.mode = mode

    @staticmethod
    def resize_with_aspect_ratio(keys, target_size):
        def transform(data):
            for key in keys:
                volume = data[key]
                original_shape = volume.shape[-3:]
    
                scaling_factor = min(
                    target_size[0] / original_shape[0],
                    target_size[1] / original_shape[1],
                    target_size[2] / original_shape[2]
                )
    
                # Computing the intermediate size while preserving aspect ratio
                new_shape = [
                    int(dim * scaling_factor) for dim in original_shape
                ]
    
                # Resizing to the intermediate shape
                resize_transform = Resized(keys=[key], spatial_size=new_shape, mode="trilinear" if key == "imgs" else "nearest-exact")
                data = resize_transform(data)
    
                # Padding to the final target size
                pad_transform = SpatialPadd(keys=[key], spatial_size=target_size, mode="constant")
                data = pad_transform(data)
            return data

        return transform

    def preprocess(cls, data, mode):
        if mode == 'training':
          transform = Compose([
            CropForegroundd(keys=["imgs", "masks"], source_key="imgs"),
            cls.resize_with_aspect_ratio(keys=["imgs", "masks"], target_size=[128, 128, 128]),
            NormalizeIntensityd( keys=['imgs'], nonzero=False, channel_wise=True),
              
            RandFlipd(keys=["imgs", "masks"],   
                    prob=0.5,                 
                    spatial_axis=2,  
            ),

            RandAdjustContrastd(
                keys=["imgs"],          
                prob=0.15,             
                gamma=(0.65, 1.5),   
            ),
            
        ])

        elif mode == 'validation':
          transform = Compose([
            CropForegroundd(keys=["imgs", "masks"], source_key="imgs"),
            cls.resize_with_aspect_ratio(keys=["imgs", "masks"], target_size=[128, 128, 128]),
            NormalizeIntensityd( keys=['imgs'], nonzero=False, channel_wise=True)

        ])

        else: # 'testing'
          transform = Compose([
            CropForegroundd(keys=["imgs", "masks"], source_key="imgs"),
            cls.resize_with_aspect_ratio(keys=["imgs", "masks"], target_size=[128, 128, 128]),
            NormalizeIntensityd( keys=['imgs'], nonzero=False, channel_wise=True)

        ])

        augmented_data = transform(data)
        return augmented_data
        
    def __len__(self):
        return len(self.patient_lists)

    def __getitem__(self, idx):
        patient_id = self.patient_lists[idx]
        loadimage = LoadImage(reader='NibabelReader', image_only=True)

        data_type=patient_id.split('-')[1]
        if data_type == 'GLI':
            patient_folder_path = os.path.join('/kaggle/input/bratsglioma/Training', patient_id)
        elif data_type == 'SSA':
            patient_folder_path = os.path.join('/kaggle/input/bratsafrica24', patient_id)
        elif data_type == 'PED':
            patient_folder_path = os.path.join('/kaggle/input/bratsped/Training', patient_id)
        elif data_type == 'MEN':
            patient_folder_path = os.path.join('/kaggle/input/bratsmen', patient_id)
        else:
            patient_folder_path = os.path.join('/kaggle/input/bratsmet24', patient_id)

        def resolve_file_path(folder, name):
            file_path = os.path.join(folder, name)
            # Check if the given path is a directory (case with 4 subdirs)
            if os.path.isdir(file_path):
                # Find the first file inside the directory that ends with .nii
                for root, _, files in os.walk(file_path):
                    for file in files:
                        if file.endswith(".nii"):
                            return os.path.join(root, file)
            return file_path

        # Resolve paths for all required image types
        t1c_path  = resolve_file_path(patient_folder_path, patient_id + '-t1c.nii')
        t1n_path  = resolve_file_path(patient_folder_path, patient_id + '-t1n.nii')
        t2f_path  = resolve_file_path(patient_folder_path, patient_id + '-t2f.nii')
        t2w_path  = resolve_file_path(patient_folder_path, patient_id + '-t2w.nii')
        seg_path  = os.path.join(patient_folder_path, patient_id + '-seg.nii')

        t1c_loader   = loadimage( t1c_path )
        t1n_loader   = loadimage( t1n_path )
        t2f_loader   = loadimage( t2f_path )
        t2w_loader   = loadimage( t2w_path )
        masks_loader = loadimage( seg_path )

        # Make the dimension of channel
        t1c_tensor   = torch.Tensor(t1c_loader).unsqueeze(0)
        t1n_tensor   = torch.Tensor(t1n_loader).unsqueeze(0)
        t2f_tensor   = torch.Tensor(t2f_loader).unsqueeze(0)
        t2w_tensor   = torch.Tensor(t2w_loader).unsqueeze(0)
        masks_tensor = torch.Tensor(masks_loader).unsqueeze(0)

        concat_tensor = torch.cat( (t1c_tensor, t1n_tensor, t2f_tensor, t2w_tensor, masks_tensor), 0 )
        data = {            
            'imgs'  : np.array(concat_tensor[0:4,:,:,:]),
            'masks' : np.array(concat_tensor[4:,:,:,:])
        }

        augmented_imgs_masks = self.preprocess(data, self.mode)
        imgs  = np.array(augmented_imgs_masks['imgs'])
        masks = np.array(augmented_imgs_masks['masks'])

        y = {

            'imgs'  : torch.from_numpy(imgs).type(torch.FloatTensor),
            'masks' : torch.from_numpy(masks).type(torch.FloatTensor),
            'patient_id' : patient_id,
            'data_type' : data_type

        }

        return y

# Data Loaders

In [3]:
def combine_datasets(dataset_lists, batch_size=3):
    max_len = max(len(dataset) for dataset in dataset_lists)

    # Ensure batch_size matches the number of datasets
    if batch_size != len(dataset_lists):
        raise ValueError("Batch size must equal the number of datasets for this function.")

    combined_paths = []

    for i in range(0, max_len, batch_size):
        for j in range(batch_size):
            index = (i + j) % max_len
            batch = [dataset[index % len(dataset)] for dataset in dataset_lists]
            combined_paths.extend(batch)

            # if j == 0:  
            #     print(f"Batch {(i // batch_size) + 1}: {batch}")

    return combined_paths

In [4]:
def prepare_data_loaders(args):
    train_datasets, val_datasets, test_datasets = [], [], []
    split_ratio = {'training': 0.71, 'validation': 0.09, 'testing': 0.2}
    
    for i, data_dir in enumerate(args['data_dirs']):
        patient_lists = os.listdir( data_dir )
        patient_lists.sort()
        total_patients = len(patient_lists)
        
        random.seed(5)
        random.shuffle(patient_lists)
    
        train_split = int(split_ratio['training'] * total_patients)
        val_split = int(split_ratio['validation'] * total_patients)
    
        train_patient_lists = patient_lists[:train_split]
        val_patient_lists = patient_lists[train_split : train_split + val_split]
        test_patient_lists = patient_lists[train_split + val_split :]
    
        train_patient_lists.sort()
        val_patient_lists.sort()
        test_patient_lists.sort()
        
        print(f'Number of training samples in {data_dir.split("/")[3]} DataSet: {len(train_patient_lists)}')
        print(f'Number of validation samples in {data_dir.split("/")[3]} DataSet: {len(val_patient_lists)}')
        print(f'Number of testing samples in {data_dir.split("/")[3]} DataSet: {len(test_patient_lists)} ')

        train_datasets.append(train_patient_lists)
        val_datasets.append(val_patient_lists)
        test_datasets.append(test_patient_lists)
            
    combined_trainDataset = combine_datasets(train_datasets, batch_size=args['train_batch_size'])
    combined_valDataset = list(chain.from_iterable(val_datasets))
    combined_testDataset = list(chain.from_iterable(test_datasets))
    
    print(f'Number of combined training samples', len(combined_trainDataset))
    print(f'Number of combined validation samples', len(combined_valDataset))
    print(f'Number of combined testing samples', len(combined_testDataset))
    
    trainDataset = CustomDataset3D( args['data_dirs'], combined_trainDataset, mode='training')
    valDataset = CustomDataset3D( args['data_dirs'], combined_valDataset, mode='validation')
    testDataset = CustomDataset3D( args['data_dirs'], combined_testDataset, mode='testing')
    
    trainLoader = DataLoader(
        trainDataset, batch_size=args['train_batch_size'], num_workers=args['workers'], prefetch_factor=2,
        pin_memory=True, shuffle=False)
    
    valLoader = DataLoader(
        valDataset, batch_size=args['val_batch_size'], num_workers=args['workers'], prefetch_factor=2,
        pin_memory=True, shuffle=False)
    
    testLoader = DataLoader(
        testDataset, batch_size=args['test_batch_size'], num_workers=args['workers'], prefetch_factor=2,
        pin_memory=True, shuffle=False)

    return trainLoader, valLoader, testLoader

# Visualizing Data

In [5]:
# args = {
#     'workers': 2,
#     'epochs': 10,
#     'train_batch_size': 2,
#     'val_batch_size': 2,
#     'test_batch_size': 2,
#     'learning_rate': 1e-3,
#     'weight_decay': 1e-5,
#     'lambd': 0.0051,
#     'data_dir': '/kaggle/input/bratsafrica24/',
#     'in_checkpoint_dir': Path('/kaggle/input/adultgliomamodel-45epochs'),
#     'out_checkpoint_dir': Path('/kaggle/working/')
# }

# trainLoader, valLoader, testLoader = prepare_data_loaders(args)

# for step, y in enumerate( trainLoader ):
#   print(y['imgs'].shape)
#   print(y['patient_id'])

#   fig, axes = plt.subplots(1, 4, figsize=(16, 4))
#   for sequence in range(4):
#     sequence_data = y['imgs'][0][sequence, :, :, :].cpu().detach().numpy()
#     slice_index = sequence_data.shape[2] // 2
#     axes[sequence].imshow(np.rot90(sequence_data[:, :, slice_index]), cmap='gray', origin='lower')
#     axes[sequence].set_title(f'Sequence {sequence + 1}')

#   plt.show()

# DynUNet Model

In [6]:
class UnetBasicBlock(nn.Module):
    """
    A CNN module module that can be used for DynUNet, based on:
    `Automated Design of Deep Learning Methods for Biomedical Image Segmentation <https://arxiv.org/abs/1904.08128>`_.
    `nnU-Net: Self-adapting Framework for U-Net-Based Medical Image Segmentation <https://arxiv.org/abs/1809.10486>`_.

    Args:
        spatial_dims: number of spatial dimensions.
        in_channels: number of input channels.
        out_channels: number of output channels.
        kernel_size: convolution kernel size.
        stride: convolution stride.
        norm_name: feature normalization type and arguments.
        act_name: activation layer type and arguments.
        dropout: dropout probability.

    """

    def __init__(
        self,
        spatial_dims: int,
        in_channels: int,
        out_channels: int,
        kernel_size: Union[Sequence[int], int],
        stride: Union[Sequence[int], int],
        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
        act_name: Union[Tuple, str] = ("leakyrelu", {"inplace": True, "negative_slope": 0.01}),
        dropout: Optional[Union[Tuple, str, float]] = None,
    ):
        super().__init__()
        self.conv1 = get_conv_layer(
            spatial_dims,
            in_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=stride,
            dropout=dropout,
            conv_only=True,
        )

        self.conv2 = get_conv_layer(
            spatial_dims,
            out_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=1,
            dropout=dropout,
            conv_only=True
        )
        self.lrelu = get_act_layer(name=act_name)
        self.norm1 = get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=out_channels)
        self.norm2 = get_norm_layer(name=norm_name, spatial_dims=spatial_dims, channels=out_channels)

    def forward(self, inp):
        out = self.conv1(inp)
        out = self.norm1(out)
        out = self.lrelu(out)
        out = self.conv2(out)
        out = self.norm2(out)
        out = self.lrelu(out)
        return out



class UnetUpBlock(nn.Module):
    """
    An upsampling module that can be used for DynUNet, based on:
    `Automated Design of Deep Learning Methods for Biomedical Image Segmentation <https://arxiv.org/abs/1904.08128>`_.
    `nnU-Net: Self-adapting Framework for U-Net-Based Medical Image Segmentation <https://arxiv.org/abs/1809.10486>`_.

    Args:
        spatial_dims: number of spatial dimensions.
        in_channels: number of input channels.
        out_channels: number of output channels.
        kernel_size: convolution kernel size.
        stride: convolution stride.
        upsample_kernel_size: convolution kernel size for transposed convolution layers.
        norm_name: feature normalization type and arguments.
        act_name: activation layer type and arguments.
        dropout: dropout probability.
        trans_bias: transposed convolution bias.

    """

    def __init__(
        self,
        spatial_dims: int,
        in_channels: int,
        out_channels: int,
        kernel_size: Union[Sequence[int], int],
        upsample_kernel_size: Union[Sequence[int], int],
        norm_name: Union[Tuple, str] = ("INSTANCE", {"affine": True}),
        act_name: Union[Tuple, str] = ("leakyrelu", {"inplace": True, "negative_slope": 0.01}),
        dropout: Optional[Union[Tuple, str, float]] = None,
        trans_bias: bool = False,
    ):
        super().__init__()
        upsample_stride = upsample_kernel_size
        
        # ( a purple arrow in the paper )
        self.transp_conv = get_conv_layer(
            spatial_dims,
            in_channels,
            out_channels,
            kernel_size=upsample_kernel_size,
            stride=upsample_stride,
            dropout=dropout,
            bias=trans_bias,
            conv_only=True,
            is_transposed=True,
        )
        
        # A light blue conv blocks in the decoder of nnUNet
        self.conv_block = UnetBasicBlock(
            spatial_dims,
            out_channels + out_channels,
            out_channels,
            kernel_size=kernel_size,
            stride=1,
            dropout=dropout,
            norm_name=norm_name,
            act_name=act_name,
        )

    def forward(self, inp, skip):
        # number of channels for skip should equals to out_channels
        out = self.transp_conv(inp)
        out = torch.cat((out, skip), dim=1)
        out = self.conv_block(out)
        return out



class UnetOutBlock(nn.Module):
    def __init__(
        self, spatial_dims: int, in_channels: int, out_channels: int, dropout: Optional[Union[Tuple, str, float]] = None
    ):
        super().__init__()
        self.conv = get_conv_layer(
            spatial_dims, in_channels, out_channels, kernel_size=1, stride=1, dropout=dropout, bias=True, conv_only=True
        )

    def forward(self, inp):
        return self.conv(inp)
    

def get_conv_layer(
    spatial_dims: int,
    in_channels: int,
    out_channels: int,
    kernel_size: Union[Sequence[int], int] = 3,
    stride: Union[Sequence[int], int] = 1,
    act: Optional[Union[Tuple, str]] = Act.PRELU,
    norm: Union[Tuple, str] = Norm.INSTANCE,
    dropout: Optional[Union[Tuple, str, float]] = None,
    bias: bool = False,
    conv_only: bool = True,
    is_transposed: bool = False,
):
    padding = get_padding(kernel_size, stride)
    output_padding = None
    if is_transposed:
        output_padding = get_output_padding(kernel_size, stride, padding)
    
    return Convolution(
        spatial_dims,
        in_channels,
        out_channels,
        strides=stride,
        kernel_size=kernel_size,
        act=act,
        norm=norm,
        dropout=dropout,
        bias=bias,
        conv_only=conv_only,
        is_transposed=is_transposed,
        padding=padding,
        output_padding=output_padding,
    )


def get_padding(
    kernel_size: Union[Sequence[int], int], stride: Union[Sequence[int], int]
) -> Union[Tuple[int, ...], int]:

    kernel_size_np = np.atleast_1d(kernel_size)
    stride_np = np.atleast_1d(stride)
    padding_np = (kernel_size_np - stride_np + 1) / 2
    if np.min(padding_np) < 0:
        raise AssertionError("padding value should not be negative, please change the kernel size and/or stride.")
    padding = tuple(int(p) for p in padding_np)

    return padding if len(padding) > 1 else padding[0]


def get_output_padding(
    kernel_size: Union[Sequence[int], int], stride: Union[Sequence[int], int], padding: Union[Sequence[int], int]
) -> Union[Tuple[int, ...], int]:
    kernel_size_np = np.atleast_1d(kernel_size)
    stride_np = np.atleast_1d(stride)
    padding_np = np.atleast_1d(padding)

    out_padding_np = 2 * padding_np + stride_np - kernel_size_np
    if np.min(out_padding_np) < 0:
        raise AssertionError("out_padding value should not be negative, please change the kernel size and/or stride.")
    out_padding = tuple(int(p) for p in out_padding_np)

    return out_padding if len(out_padding) > 1 else out_padding[0]

def set_requires_grad(nets, requires_grad=False):
    if not isinstance(nets, list):
        nets = [nets]
    for net in nets:
        if net is not None:
            for param in net.parameters():
                param.requires_grad = requires_grad

In [7]:
class DynUNet(nn.Module):
    def __init__(
        self,
        spatial_dims: int,
        in_channels: int,
        out_channels: int,
        deep_supervision: bool,
        KD: bool = False
    ):
        super().__init__()
        self.spatial_dims = spatial_dims
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.deep_supervision = deep_supervision
        self.KD_enabled = KD
        
        self.input_conv = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=self.in_channels,
                                     out_channels=64,
                                     kernel_size=3,
                                     stride=1
                                     )
        self.down1 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=64,
                                     out_channels=96,
                                     kernel_size=3,
                                     stride=2 # Reduces spatial dims by 2
                                     )
        self.down2 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=96,
                                     out_channels=128,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.down3 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=128,
                                     out_channels=192,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.down4 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=192,
                                     out_channels=256,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.down5 = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=256,
                                     out_channels=384,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.bottleneck = UnetBasicBlock( spatial_dims=self.spatial_dims,
                                     in_channels=384,
                                     out_channels=512,
                                     kernel_size=3,
                                     stride=2
                                     )
        self.up1 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=512,
                                out_channels=384,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        self.up2 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=384,
                                out_channels=256,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        self.up3 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=256,
                                out_channels=192,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        self.up4 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=192,
                                out_channels=128,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        
        self.up5 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=128,
                                out_channels=96,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )        
        self.up6 = UnetUpBlock( spatial_dims=self.spatial_dims,
                                in_channels=96,
                                out_channels=64,
                                kernel_size=3,
                                upsample_kernel_size=2
                                )
        self.out1 = UnetOutBlock( spatial_dims=self.spatial_dims,
                                  in_channels=64,
                                  out_channels=self.out_channels,
                                  )
        self.out2 = UnetOutBlock( spatial_dims=self.spatial_dims,
                                  in_channels=96,
                                  out_channels=self.out_channels,
                                  )
        self.out3 = UnetOutBlock( spatial_dims=self.spatial_dims,
                                  in_channels=128,
                                  out_channels=self.out_channels,
                                  )
        
    def forward( self, input ):
        
        # Input
        x0 = self.input_conv( input ) # x0.shape = (B x 64 x 128 x 128 x 128)
        
        # Encoder
        x1 = self.down1( x0 ) # x1.shape = (B x 96 x 64 x 64 x 64) 
        x2 = self.down2( x1 ) # x2.shape = (B x 128 x 32 x 32 x 32)
        x3 = self.down3( x2 ) # x3.shape = (B x 192 x 16 x 16 x 16)
        x4 = self.down4( x3 ) # x4.shape = (B x 256 x 8 x 8 x 8)   
        x5 = self.down5( x4 ) # x5.shape = (B x 384 x 4 x 4 x 4)   
        
        # Bottleneck
        x6 = self.bottleneck( x5 ) # x6.shape = (B x 512 x 2 x 2 x 2)
        
        # Decoder
        x7  = self.up1( x6, x5 )  # x7.shape  = (B x 384 x 4 x 4 x 4)
        x8  = self.up2( x7, x4 )  # x8.shape  = (B x 256 x 8 x 8 x 8)
        x9  = self.up3( x8, x3 )  # x9.shape  = (B x 192 x 16 x 16 x 16)
        x10 = self.up4( x9, x2 )  # x10.shape = (B x 128 x 32 x 32 x 32)
        x11 = self.up5( x10, x1 ) # x11.shape = (B x 96 x 64 x 64 x 64)
        x12 = self.up6( x11, x0 ) # x12.shape = (B x 64 x 128 x 128 x 128)
        
        # Output
        output1 = self.out1( x12 )
        
        if (self.training and self.deep_supervision) or self.KD_enabled:
            
            # output['pred'].shape = B x 3 x 4 x 128 x 128 x 128
            output2 = interpolate( self.out2( x11 ), output1.shape[2:])
            output3 = interpolate( self.out3( x10 ), output1.shape[2:])
            output_all = [ output1, output2, output3 ]
            return { 'pred' : torch.stack(output_all, dim=1),
                     'bottleneck_feature_map' : x6 }
        
        return { 'pred' : output1 }

# Visualizing Model Instance

In [8]:
# !pip install torchsummary
# from torchsummary import summary

# # Initialize your DynUNet model
# model = DynUNet(spatial_dims=3, in_channels=4, out_channels=4, deep_supervision=True, KD=True)

# # Print model summary
# summary(model, input_size=(4, 128, 128, 128))  # Adjust input_size according to your needs

# ClearML

In [9]:
!pip install clearml
from clearml import Task

%env CLEARML_WEB_HOST=https://app.clear.ml/
%env CLEARML_API_HOST=https://api.clear.ml
%env CLEARML_FILES_HOST=https://files.clear.ml
%env CLEARML_API_ACCESS_KEY=CLEARML_API_ACCESS_KEY
%env CLEARML_API_SECRET_KEY=CLEARML_API_SECRET_KEY

Collecting clearml
  Downloading clearml-1.18.0-py2.py3-none-any.whl.metadata (18 kB)
Collecting furl>=2.0.0 (from clearml)
  Downloading furl-2.1.4-py2.py3-none-any.whl.metadata (25 kB)
Collecting pathlib2>=2.3.0 (from clearml)
  Downloading pathlib2-2.3.7.post1-py2.py3-none-any.whl.metadata (3.5 kB)
Collecting pyjwt<2.10.0,>=2.4.0 (from clearml)
  Downloading PyJWT-2.9.0-py3-none-any.whl.metadata (3.0 kB)
Collecting orderedmultidict>=1.0.1 (from furl>=2.0.0->clearml)
  Downloading orderedmultidict-1.0.1-py2.py3-none-any.whl.metadata (1.3 kB)
Downloading clearml-1.18.0-py2.py3-none-any.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m17.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading furl-2.1.4-py2.py3-none-any.whl (27 kB)
Downloading pathlib2-2.3.7.post1-py2.py3-none-any.whl (18 kB)
Downloading PyJWT-2.9.0-py3-none-any.whl (22 kB)
Downloading orderedmultidict-1.0.1-py2.py3-none-any.whl (11 kB)
Installing collected packag

# GPUs Check

In [10]:
if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"Number of GPUs available: {num_gpus}")
    for i in range(num_gpus):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
else:
    print("No GPU available. Running on CPU.")

Number of GPUs available: 2
GPU 0: Tesla T4
GPU 1: Tesla T4


In [11]:
# # For freeing gpu
# import gc; gc.collect(); torch.cuda.empty_cache()

# Loss Function

In [12]:
class LossFunction(nn.Module):
    def __init__(self):
        super(LossFunction, self).__init__()
        self.dice = DiceLoss(sigmoid=True, batch=True, smooth_nr=1e-05, smooth_dr=1e-05)
        self.ce = nn.BCEWithLogitsLoss()

    def _loss(self, p, y):
        return self.dice(p, y) + self.ce(p, y.float())

    def forward(self, p, y):
        y_wt, y_tc, y_et = y > 0, ((y == 1) + (y == 3)) > 0, y == 3
        p_wt, p_tc, p_et = p[:, 1].unsqueeze(1), p[:, 2].unsqueeze(1), p[:, 3].unsqueeze(1)
        l_wt, l_tc, l_et = self._loss(p_wt, y_wt), self._loss(p_tc, y_tc), self._loss(p_et, y_et)
        return l_wt + l_tc + l_et

# Student KD Model

In [13]:
class CBAMFeatureExtractor(nn.Module):
    def __init__(self, in_channels, reduction=16, kernel_size=7):
        super(CBAMFeatureExtractor, self).__init__()

        # Channel Attention
        self.avg_pool = nn.AdaptiveAvgPool3d(1)
        self.max_pool = nn.AdaptiveMaxPool3d(1)
        self.fc = nn.Sequential(
            nn.Conv3d(in_channels, in_channels // reduction, 1, bias=False),
            nn.ReLU(),
            nn.Conv3d(in_channels // reduction, in_channels, 1, bias=False),
            nn.Sigmoid()
        )

        # Spatial Attention
        self.conv = nn.Conv3d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        """ Extracts attention-enhanced feature maps """

        # Channel Attention
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        channel_attention = avg_out + max_out
        x = x * channel_attention  # Apply channel attention

        # Spatial Attention
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        spatial_attention = self.sigmoid(self.conv(torch.cat([avg_out, max_out], dim=1)))
        x = x * spatial_attention  # Apply spatial attention

        return x  # Return enhanced features

In [14]:
class Student_KD_loss(nn.Module):
    def __init__(self):
        super().__init__()
        self.student = DynUNet( spatial_dims=3, in_channels=4, out_channels=4, deep_supervision=True)
        self.loss_fn = LossFunction()
        self.temperature = 5.0
        self.bce_loss = nn.BCEWithLogitsLoss()
        self.mse_loss = nn.MSELoss()
        self.cbam_extractor = CBAMFeatureExtractor(in_channels=512)
        
    def forward(self, teacher_outputs, y):
        with amp.autocast('cuda:1'):
            student_outputs = self.student( y['imgs'] )

            # Student loss with Deep supervision -> (Dice loss)            
            segloss_s_decoder_1 = self.loss_fn( student_outputs['pred'][:,0], y['masks'] ) # student_outputs['pred'].shape = B x 3 x 4 x 128 x 128 x 128
            segloss_s_decoder_2 = self.loss_fn( student_outputs['pred'][:,1], y['masks'] )
            segloss_s_decoder_3 = self.loss_fn( student_outputs['pred'][:,2], y['masks'] )

            student_seg_loss = segloss_s_decoder_1 + 0.5*segloss_s_decoder_2 + 0.25*segloss_s_decoder_3

            #-----------------------------------------------------------------------------------#
            
            # KD loss between bottleneck layers -> (KL with CBAM Loss)
            teacher_bottleneck = teacher_outputs['bottleneck_feature_map']
            student_bottleneck = student_outputs['bottleneck_feature_map']
            
            # Extract attention-enhanced features from bottleneck
            teacher_bottleneck_att = self.cbam_extractor(teacher_bottleneck)  # (B, 512, 8)
            student_bottleneck_att = self.cbam_extractor(student_bottleneck)  # (B, 512, 8)
            
            # Normalize to probability distributions
            student_probs = F.log_softmax(student_bottleneck_att, dim=1)  # Log probabilities
            teacher_probs = F.softmax(teacher_bottleneck_att, dim=1)      # Probabilities
            
            # Compute KL loss instead of MSE
            kl_loss_with_teacher = F.kl_div(student_probs, teacher_probs, reduction='batchmean')

            #-----------------------------------------------------------------------------------#

            beta, alpha = 10, 1.0

            print("Seg loss: ", student_seg_loss)
            print("KL loss with teacher: ", kl_loss_with_teacher)

            print("Seg loss weighted: ", alpha*student_seg_loss)
            print("KL loss with teacher weighted: ", beta*kl_loss_with_teacher)

            batch_total_student_loss = alpha*student_seg_loss + beta*kl_loss_with_teacher
            
            print("-------------Final student loss-------------")
            print(batch_total_student_loss)
            print("-------------Final student loss-------------")

        KD_output = {
            'batch_total_student_loss' : batch_total_student_loss,
            'seg_weighted'   : alpha*student_seg_loss,
            'kl_weighted'    : beta*kl_loss_with_teacher,
        }
            
        return KD_output

# Training & Validation

In [15]:
def evaluate(model, loader, epoch, task):
    torch.manual_seed(0)
    model.eval()
    loss_fn = LossFunction()
    n_val_batches = len(loader)
    
    tumors_val_losses, running_loss = validate_model(model, loader, loss_fn)
    epoch_val_loss = running_loss / n_val_batches
    log_val_epoch_losses(tumors_val_losses, epoch, task, epoch_val_loss)
    
    print(f"------Final validation dice loss after epoch {epoch + 1}: {epoch_val_loss}-------")
    
    model.student.to('cuda:1')
    model.train()
    
    return epoch_val_loss

def validate_model(model, loader, loss_fn):
    tumors_val_losses = {'GLI': [], 'PED': [], 'SSA': [], 'MEN':[], 'MET':[]}
    running_loss = 0
    n_val_batches = len(loader)
    
    with tqdm(total=n_val_batches, desc='Validating', unit='batch', leave=False) as pbar:
        with torch.no_grad():
            for y in loader:
                val_loss, data_type = process_batch(model, y, loss_fn)
                tumors_val_losses[data_type].append(val_loss.item())
                running_loss += val_loss
                pbar.update(1)
    
    return tumors_val_losses, running_loss

def process_batch(model, y, loss_fn):
    y['imgs'], y['masks'] = y['imgs'].to('cuda'), y['masks'].to('cuda')
    data_type = y['data_type'][0]
    
    with torch.amp.autocast('cuda'):
        output = model.student.to('cuda')(y['imgs'])
        val_loss = loss_fn(output['pred'], y['masks'])
        
    print(f"Validation dice loss per batch: {val_loss}")
    return val_loss, data_type

def log_val_epoch_losses(tumors_val_losses, epoch, task, epoch_val_loss):
    for tumor_type, losses in tumors_val_losses.items():
        avg_loss = sum(losses) / len(losses) if losses else 0
        task.get_logger().report_scalar(
            title=f"{tumor_type} Losses over Epochs",
            series=f"{tumor_type} Epoch valLoss",
            iteration=epoch + 1,
            value=avg_loss
        )

    task.get_logger().report_scalar("KD Losses over Epochs", "val_loss", iteration=epoch+1, value=epoch_val_loss)

In [16]:
def setup_environment(args):
    torch.manual_seed(0)
    args['out_checkpoint_dir'].mkdir(parents=True, exist_ok=True)

def initialize_models():
    teacher_model = DynUNet(spatial_dims=3, in_channels=4, out_channels=4, deep_supervision=True, KD=True).to('cuda:0')
    student_model = Student_KD_loss().to('cuda:1')
    return teacher_model, student_model

def initialize_optimizer_scheduler(student_model, args):
    optimizer = optim.AdamW(student_model.parameters(), lr=args['learning_rate'], weight_decay=args['weight_decay'], eps=1e-4)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, cooldown=1, threshold=0.001, min_lr=1e-6)
    return optimizer, scheduler

def load_teacher_model(teacher_model, data_type, teacher_model_paths):
    teacher_model_path = teacher_model_paths.get(data_type)
    if teacher_model_path and Path(teacher_model_path).is_file():
        ckpt = torch.load(teacher_model_path, map_location='cuda:0', weights_only=True)
        teacher_model.load_state_dict(ckpt['teacher_model'])
        print(f"Loaded model: {teacher_model_path}")

def load_student_checkpoint(student_model, optimizer, scaler, scheduler, args):
    checkpoint_path = args['in_checkpoint_dir'] / 'Student_model_after_epoch_6_trainLoss_1.6740_valLoss_0.5214.pth'
    if checkpoint_path.is_file():
        print(f"Found model {checkpoint_path}")
        ckpt = torch.load(checkpoint_path, map_location='cuda:1', weights_only=True)
        student_model.student.load_state_dict(ckpt['student_model'])
        optimizer.load_state_dict(ckpt['optimizer_student'])
        scaler.load_state_dict(ckpt['grad_scaler_state'])
        scheduler.load_state_dict(ckpt['scheduler_state_dict'])
        print(f"Loaded student model: {checkpoint_path} with lr: {optimizer.param_groups[0]['lr']}")
        return ckpt['epoch'] + 1
    return 0

def train_epoch(epoch, trainLoader, train_config, start_ep):
    student_model = train_config['student_model']
    teacher_model = train_config['teacher_model']
    optimizer = train_config['optimizer']
    scaler = train_config['scaler']
    accumulation_steps = train_config['accumulation_steps']
    teacher_model_paths = train_config['teacher_model_paths']
    task = train_config['task']
    
    student_model.train()
    teacher_model.eval()
    
    epoch_losses = {'total': 0, 'kl': 0, 'seg': 0}
    tumors_losses = {'GLI': [], 'PED': [], 'SSA': [], 'MEN': [], 'MET': []}
    
    with tqdm(total=len(trainLoader), desc=f"(Epoch {epoch + 1}/{start_ep + train_config['epochs']})", unit='batch') as pbar:
        optimizer.zero_grad()
        
        for step, y in enumerate(trainLoader):
            batch_loss = 0
            for sub_step, data_type in enumerate(y['data_type']):
                imgs = y['imgs'][sub_step].unsqueeze(0).to('cuda:0')
                masks = y['masks'][sub_step].unsqueeze(0).to('cuda:0')
                
                load_teacher_model(teacher_model, data_type, teacher_model_paths)
                
                with amp.autocast('cuda:0'):
                    teacher_outputs = teacher_model(imgs)
                
                detached_teacher_output = {k: v.detach().to('cuda:1') for k, v in teacher_outputs.items()}
                imgs, masks = imgs.to('cuda:1'), masks.to('cuda:1')
                
                with amp.autocast('cuda:1'):
                    student_outputs = student_model(detached_teacher_output, {'imgs': imgs, 'masks': masks})
                    loss = (student_outputs['batch_total_student_loss'] / accumulation_steps)
                    batch_loss += loss.item()
                    tumors_losses[data_type].append(loss.item())
                
                scaler.scale(loss).backward()

                task.get_logger().report_scalar(
                    title=f"Tumors training losses per epoch {epoch+1}",
                    series=f"{data_type} loss",
                    iteration=len(tumors_losses[data_type]),
                    value=float(loss.item())
                )
                
                for key in epoch_losses:
                    if key != 'total':
                        epoch_losses[key] += (student_outputs.get(f'{key}_weighted', 0) / accumulation_steps)
                        
                if (sub_step + 1) % accumulation_steps == 0 or (sub_step + 1) == len(y['data_type']):
                    scaler.step(optimizer)
                    scaler.update()
                    optimizer.zero_grad()
                
            epoch_losses['total'] += batch_loss 
            pbar.update(1)
    
    for key in epoch_losses:
        epoch_losses[key] /= len(trainLoader)
        
    return epoch_losses, tumors_losses

def log_KD_losses_over_epochs(epoch, epoch_losses, tumors_losses, task):
    for loss_type, val in epoch_losses.items():
        task.get_logger().report_scalar(
            title="KD Losses over Epochs",
            series=f"{loss_type} loss",
            iteration=epoch + 1,
            value=epoch_losses[loss_type]
        )    

    for tumor_type, losses in tumors_losses.items():
        task.get_logger().report_scalar(
            title=f"{tumor_type} Losses over Epochs",
            series=f"{tumor_type} Epoch trainLoss",
            iteration=epoch + 1,
            value=sum(losses) / len(losses) if losses else 0
        )

def validate_and_save(epoch, valLoader, train_config, epoch_losses):
    student_model = train_config['student_model']
    scheduler = train_config['scheduler']
    optimizer = train_config['optimizer']
    scaler = train_config['scaler']
    out_checkpoint_dir = train_config['out_checkpoint_dir']
    task = train_config['task']

    val_loss = evaluate(student_model, valLoader, epoch, task)
    scheduler.step(val_loss)

    task.get_logger().report_scalar("LR", "learning_rate", iteration=epoch+1, value=optimizer.param_groups[0]['lr'])
    print(f"Learning rate after epoch {epoch + 1}: {optimizer.param_groups[0]['lr']}")

    state = {
        'epoch': epoch,
        'student_model': student_model.student.state_dict(),
        'optimizer_student': optimizer.state_dict(),
        'lr': optimizer.param_groups[0]['lr'],
        'grad_scaler_state': scaler.state_dict(),
        'scheduler_state_dict': scheduler.state_dict(),
        'val_dice_loss': val_loss
    }

    checkpoint_path = out_checkpoint_dir / f'Student_model_after_epoch_{epoch + 1}_trainLoss_{epoch_losses["total"]:.4f}_valLoss_{val_loss:.4f}.pth'
    torch.save(state, checkpoint_path)
    print(f"Model saved after epoch {epoch + 1}")

def run_KD(trainLoader, valLoader, args):
    setup_environment(args)
    teacher_model, student_model = initialize_models()
    optimizer, scheduler = initialize_optimizer_scheduler(student_model, args)
    scaler = amp.GradScaler('cuda:1')
    
    teacher_model_paths = {
        'GLI': '/kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth',
        'SSA': '/kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth',
        'PED': '/kaggle/input/pednewlabel/Teacher_model_after_epoch_99_trainLoss_1.4512_valLoss_1.0042.pth',
        'MEN': '/kaggle/input/meningiomateachernewlabels/Teacher_model_after_epoch_85_trainLoss_0.5824_valLoss_0.3318.pth',
        'MET': '/kaggle/input/met-teacher-new-labels/Teacher_model_after_epoch_100_trainLoss_1.6278_valLoss_0.7199.pth'
    }
    
    start_epoch = load_student_checkpoint(student_model, optimizer, scaler, scheduler, args)
    task = Task.init(project_name="Ablation Studies", task_name=f"Fairness Ablation Study KL(CBAM) and Seg with DS 8 ep", reuse_last_task_id=True)
    task.connect(args)
    task.add_tags(['CBAM(KL)+SEG', "nourhanahmed00"])

    print(f'''Starting Knowledge Distillation:
            Epochs:          From {start_epoch + 1} to {start_epoch + args['epochs']}
            Batch size:      5 (effective through gradient accumulation)
            Learning rate:  {args['learning_rate']}
            Training data coming from: {args['data_dirs']}
    ''')

    train_config = {
        'teacher_model': teacher_model,
        'student_model': student_model,
        'optimizer': optimizer,
        'scheduler': scheduler,
        'scaler': scaler,
        'accumulation_steps': 5,
        'teacher_model_paths': teacher_model_paths,
        'out_checkpoint_dir': args['out_checkpoint_dir'],
        'task': task,
        'epochs': args['epochs']
    }
    
    for epoch in range(start_epoch, start_epoch + args['epochs']):
        epoch_losses, tumors_losses = train_epoch(epoch, trainLoader, train_config, start_epoch)
        log_KD_losses_over_epochs(epoch, epoch_losses, tumors_losses, task)
        validate_and_save(epoch, valLoader, train_config, epoch_losses)
    
    print("Training completed.")
    task.close()

In [17]:
args = {
    'workers': 2,
    'epochs': 2,
    'train_batch_size': 5,
    'val_batch_size': 2,
    'test_batch_size': 1,
    'learning_rate': 1e-3,
    'weight_decay': 1e-5,
    'lambd': 0.0051,
    'data_dirs': ["/kaggle/input/bratsglioma/Training/", "/kaggle/input/bratsafrica24/", "/kaggle/input/bratsped/Training/", "/kaggle/input/bratsmen/", "/kaggle/input/bratsmet24/"],
    'in_checkpoint_dir': Path('/kaggle/input/abl-study-klcbam-seg/'),
    'out_checkpoint_dir': Path('/kaggle/working/')
}

trainLoader, valLoader, testLoader = prepare_data_loaders(args)
run_KD(trainLoader, valLoader, args)

Number of training samples in bratsglioma DataSet: 888
Number of validation samples in bratsglioma DataSet: 112
Number of testing samples in bratsglioma DataSet: 251 
Number of training samples in bratsafrica24 DataSet: 102
Number of validation samples in bratsafrica24 DataSet: 13
Number of testing samples in bratsafrica24 DataSet: 30 
Number of training samples in bratsped DataSet: 70
Number of validation samples in bratsped DataSet: 8
Number of testing samples in bratsped DataSet: 21 
Number of training samples in bratsmen DataSet: 710
Number of validation samples in bratsmen DataSet: 90
Number of testing samples in bratsmen DataSet: 200 
Number of training samples in bratsmet24 DataSet: 232
Number of validation samples in bratsmet24 DataSet: 29
Number of testing samples in bratsmet24 DataSet: 67 
Number of combined training samples 4450
Number of combined validation samples 252
Number of combined testing samples 569
Found model /kaggle/input/abl-study-klcbam-seg/Student_model_after_


monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.


monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.



2025-05-19 04:04:07,188 - clearml.model - INFO - Selected model id: 7e60e4ad1cb249bcad529f9a5ee48796
Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5461, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4197, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5461, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(4.1968, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(6.7429, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
2025-05-19 04:04:14,655 - clearml.model - INFO - Selected model id: 13192a13dfc94728a0d25424ae5b4bb7
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4587, device='cud

(Epoch 7/8):   0%|          | 1/890 [00:42<10:37:05, 43.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6591, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4927, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6591, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(4.9269, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.5859, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8269, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.5251, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8269, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(5.2512, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   0%|          | 2/890 [00:55<6:07:20, 24.82s/batch] 

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3364, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(3.3641, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.0157, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3534, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(3.5338, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   0%|          | 3/890 [01:06<4:36:26, 18.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6509, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.4131, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6509, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(4.1308, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(7.7816, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7814, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3892, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7814, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(3.8918, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   0%|          | 4/890 [01:17<3:52:55, 15.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.3241, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(3.2409, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.8091, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2777, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.7772, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|          | 5/890 [01:29<3:29:00, 14.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2470, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.4701, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.3187, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4967, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2709, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4967, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.7090, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|          | 6/890 [01:40<3:16:10, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6212, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2408, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6212, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.4084, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0295, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0891, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.2096, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0891, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(2.0962, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|          | 7/890 [01:52<3:07:28, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.1639, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.6392, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.9939, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.1835, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.8353, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|          | 8/890 [02:03<3:01:51, 12.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8443, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.1803, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8443, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.8029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6472, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.1254, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.2541, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|          | 9/890 [02:15<2:57:51, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8582, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.1185, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8582, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(1.1848, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0430, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0266, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0860, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0266, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.8604, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|          | 10/890 [02:27<2:56:31, 12.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0741, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.7413, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8857, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0562, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5618, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|          | 11/890 [02:39<2:56:31, 12.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0686, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.6862, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.4853, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5576, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0576, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5576, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.5763, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|▏         | 12/890 [02:51<2:56:50, 12.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1520, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0373, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1520, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.3726, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5246, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0472, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.4723, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   1%|▏         | 13/890 [03:03<2:58:09, 12.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8267, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0270, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8267, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2698, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0965, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1227, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0262, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1227, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2620, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 14/890 [03:16<3:00:26, 12.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.8274, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0264, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8274, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.2637, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.0911, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0144, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.1443, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 15/890 [03:28<2:59:32, 12.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7757, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0171, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7757, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.1713, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9470, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8643, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0109, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8643, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.1087, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 16/890 [03:41<2:58:53, 12.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6667, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0095, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6667, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0949, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7615, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9667, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0078, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9667, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0781, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 17/890 [03:53<2:57:22, 12.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5260, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0073, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5260, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0726, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5986, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1524, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0123, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1524, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.1231, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 18/890 [04:05<2:57:09, 12.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0056, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0559, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8474, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0072, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0717, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 19/890 [04:17<2:58:07, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4933, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0051, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4933, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0506, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5440, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0072, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0721, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 20/890 [04:30<2:58:11, 12.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0065, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0654, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0716, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0036, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0358, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 21/890 [04:42<2:57:34, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0874, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0036, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0874, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0358, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1232, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5206, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0031, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5206, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0307, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   2%|▏         | 22/890 [04:54<2:56:36, 12.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6747, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0047, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6747, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0468, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7214, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0810, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0039, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0810, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0389, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 23/890 [05:06<2:55:18, 12.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8963, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0031, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8963, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0308, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9272, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0045, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0448, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 24/890 [05:18<2:54:16, 12.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5850, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0022, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5850, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0217, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6067, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0026, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0265, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 25/890 [05:30<2:54:20, 12.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0032, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0320, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6052, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0021, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0207, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 26/890 [05:42<2:54:49, 12.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7931, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0034, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7931, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0339, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8270, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2259, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0016, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2259, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0162, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 27/890 [05:54<2:53:48, 12.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9221, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9221, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0140, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9361, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0019, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0187, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 28/890 [06:06<2:53:00, 12.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9664, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9664, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0144, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9808, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(5.1242, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.1242, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0107, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 29/890 [06:18<2:52:10, 12.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0152, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8549, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8210, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8210, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0203, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 30/890 [06:30<2:51:47, 11.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8507, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0024, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8507, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0241, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8748, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0022, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0220, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   3%|▎         | 31/890 [06:42<2:51:52, 12.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.8478, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0028, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8478, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0285, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8763, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4412, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0019, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4412, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0191, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▎         | 32/890 [06:54<2:52:04, 12.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6097, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6097, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0093, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6190, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0080, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▎         | 33/890 [07:06<2:51:40, 12.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8131, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8131, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0198, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8328, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2657, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2657, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0090, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▍         | 34/890 [07:18<2:52:47, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0094, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5346, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0019, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5346, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0190, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▍         | 35/890 [07:31<2:52:42, 12.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6962, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6962, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0142, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7104, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4452, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4452, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0118, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▍         | 36/890 [07:43<2:51:50, 12.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5224, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0024, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5224, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0240, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5464, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1712, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1712, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0124, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▍         | 37/890 [07:55<2:51:27, 12.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7810, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7810, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0068, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7878, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.1008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0018, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0176, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▍         | 38/890 [08:07<2:51:08, 12.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0579, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0579, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0065, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0643, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2878, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2878, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0147, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▍         | 39/890 [08:19<2:50:35, 12.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9523, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9523, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0144, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9668, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0125, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   4%|▍         | 40/890 [08:31<2:51:01, 12.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0121, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3745, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9103, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9103, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0096, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   5%|▍         | 41/890 [08:43<2:49:47, 12.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0070, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9844, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0076, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   5%|▍         | 42/890 [08:55<2:50:40, 12.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9260, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9260, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0100, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9360, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6416, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6416, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0075, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   5%|▍         | 43/890 [09:07<2:50:37, 12.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8907, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8907, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0126, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9033, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7392, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7392, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   5%|▍         | 44/890 [09:19<2:50:41, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0039, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0039, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0145, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0185, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6819, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6819, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   5%|▌         | 45/890 [09:31<2:49:48, 12.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8481, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8481, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0147, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8628, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   5%|▌         | 46/890 [09:43<2:48:34, 11.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1813, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1813, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0065, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1878, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8141, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8141, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0086, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   5%|▌         | 47/890 [09:55<2:48:21, 11.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7658, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7658, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0133, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7791, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0105, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   5%|▌         | 48/890 [10:07<2:49:04, 12.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0079, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5594, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6331, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6331, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0094, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▌         | 49/890 [10:19<2:48:51, 12.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0078, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6652, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7190, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7190, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0101, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▌         | 50/890 [10:31<2:48:29, 12.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0130, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6180, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7824, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7824, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▌         | 51/890 [10:43<2:47:54, 12.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5782, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5782, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0141, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5923, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0104, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▌         | 52/890 [10:55<2:47:34, 12.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5876, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5876, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0112, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5988, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7318, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7318, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0119, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▌         | 53/890 [11:07<2:47:28, 12.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8802, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8802, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0111, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8912, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0093, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▌         | 54/890 [11:19<2:47:05, 11.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0073, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0577, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0080, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▌         | 55/890 [11:31<2:47:15, 12.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4121, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4121, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0079, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4201, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0062, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▋         | 56/890 [11:43<2:46:39, 11.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0039, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0039, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0095, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0134, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0104, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   6%|▋         | 57/890 [11:55<2:45:48, 11.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5264, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5264, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5322, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0096, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 58/890 [12:07<2:45:49, 11.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0109, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7722, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5188, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5188, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0117, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 59/890 [12:19<2:45:47, 11.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5226, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5226, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0129, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5355, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6380, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6380, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0073, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 60/890 [12:31<2:48:28, 12.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6495, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6495, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0091, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7729, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7729, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0093, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 61/890 [12:44<2:48:29, 12.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0088, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7460, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2745, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2745, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 62/890 [12:56<2:47:05, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0070, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4978, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.8418, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.8418, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 63/890 [13:09<2:50:47, 12.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5676, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5676, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0128, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5804, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0079, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 64/890 [13:23<2:59:39, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5848, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5848, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0098, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5946, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9779, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9779, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0134, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 65/890 [13:36<3:00:00, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0072, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6073, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0082, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   7%|▋         | 66/890 [13:49<2:56:42, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0101, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7348, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8030, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8030, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0087, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 67/890 [14:01<2:53:24, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6157, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0020, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6157, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0200, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6357, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8745, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8745, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0081, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 68/890 [14:14<2:55:19, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9163, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0016, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9163, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0157, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9320, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0110, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 69/890 [14:26<2:52:01, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0095, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5881, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6583, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6583, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 70/890 [14:38<2:49:40, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9066, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9066, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9112, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0149, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 71/890 [14:50<2:47:36, 12.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5185, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5185, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5236, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0089, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0089, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 72/890 [15:02<2:46:16, 12.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0132, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8679, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0100, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 73/890 [15:14<2:46:26, 12.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0583, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0583, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0077, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0660, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7543, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7543, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 74/890 [15:26<2:45:18, 12.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7057, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   8%|▊         | 75/890 [15:38<2:44:31, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0145, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5609, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8569, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8569, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0091, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▊         | 76/890 [15:50<2:43:30, 12.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0071, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9535, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7192, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7192, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0099, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▊         | 77/890 [16:02<2:43:05, 12.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1103, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1103, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0087, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1189, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▉         | 78/890 [16:14<2:42:46, 12.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0065, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.3859, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1699, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1699, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▉         | 79/890 [16:26<2:43:02, 12.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0626, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0018, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0626, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0176, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0802, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6857, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6857, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0096, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▉         | 80/890 [16:39<2:43:06, 12.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0080, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5331, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7202, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7202, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0090, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▉         | 81/890 [16:51<2:43:43, 12.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6902, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1704, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1704, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0081, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▉         | 82/890 [17:03<2:43:20, 12.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0119, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3663, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0065, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▉         | 83/890 [17:15<2:42:47, 12.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5273, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5273, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5326, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0767, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0767, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):   9%|▉         | 84/890 [17:28<2:45:31, 12.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0130, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0867, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0075, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|▉         | 85/890 [17:40<2:45:02, 12.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8201, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8201, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0070, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8270, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0081, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|▉         | 86/890 [17:52<2:44:43, 12.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0133, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5528, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5528, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0082, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|▉         | 87/890 [18:04<2:43:46, 12.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8918, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8918, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8967, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|▉         | 88/890 [18:17<2:43:02, 12.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0106, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1063, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2912, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2912, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|█         | 89/890 [18:28<2:41:37, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8735, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9083, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9083, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0066, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|█         | 90/890 [18:41<2:41:45, 12.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6067, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6067, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6123, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7293, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7293, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0096, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|█         | 91/890 [18:53<2:40:47, 12.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7355, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7355, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0120, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7474, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|█         | 92/890 [19:05<2:42:17, 12.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0090, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8683, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0864, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0864, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  10%|█         | 93/890 [19:17<2:41:44, 12.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0101, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4698, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4503, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4503, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█         | 94/890 [19:29<2:41:55, 12.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1846, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1846, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1885, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8581, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8581, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█         | 95/890 [19:42<2:41:00, 12.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1789, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1789, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0075, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1864, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0068, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█         | 96/890 [19:54<2:41:12, 12.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8175, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8175, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0136, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8311, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0080, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█         | 97/890 [20:07<2:43:57, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5530, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5530, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5582, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0086, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█         | 98/890 [20:19<2:43:37, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2284, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0015, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2284, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0154, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2438, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2504, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2504, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0081, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█         | 99/890 [20:31<2:41:57, 12.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0125, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7508, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█         | 100/890 [20:44<2:42:56, 12.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9692, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.2639, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.2639, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█▏        | 101/890 [20:56<2:42:25, 12.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0541, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0541, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0080, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0621, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0071, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  11%|█▏        | 102/890 [21:08<2:42:20, 12.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.4584, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4584, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0078, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4662, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7997, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7997, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 103/890 [21:21<2:42:09, 12.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4560, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4560, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4619, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0350, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0350, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0078, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 104/890 [21:33<2:42:33, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5267, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5267, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0094, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5360, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8550, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8550, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0086, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 105/890 [21:46<2:42:44, 12.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7642, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7642, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0094, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7736, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 106/890 [21:58<2:40:53, 12.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5876, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5876, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0128, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6004, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 107/890 [22:10<2:39:57, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5081, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5081, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0076, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5157, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2199, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2199, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0069, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 108/890 [22:22<2:40:53, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1216, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 109/890 [22:35<2:41:08, 12.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9954, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4292, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4292, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 110/890 [22:47<2:40:29, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8945, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8945, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0106, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9051, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0209, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0209, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0061, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  12%|█▏        | 111/890 [23:00<2:40:14, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7863, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7863, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0066, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7930, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3996, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3996, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 112/890 [23:12<2:39:30, 12.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0086, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6449, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9753, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9753, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 113/890 [23:24<2:40:38, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0102, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4839, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.4277, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4277, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 114/890 [23:37<2:39:29, 12.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7088, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0014, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0137, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 115/890 [23:49<2:38:30, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0123, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5784, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2045, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2045, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 116/890 [24:01<2:39:54, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6053, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6053, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0091, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6144, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0101, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 117/890 [24:14<2:38:50, 12.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6541, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6384, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6384, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0070, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 118/890 [24:26<2:37:41, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9683, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1391, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1391, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 119/890 [24:38<2:37:06, 12.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6459, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6459, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0104, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6563, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6877, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6877, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  13%|█▎        | 120/890 [24:50<2:37:28, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4515, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0016, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4515, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0155, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4671, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0062, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▎        | 121/890 [25:03<2:37:45, 12.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7555, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7555, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7608, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4164, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4164, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0085, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▎        | 122/890 [25:15<2:38:25, 12.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8039, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0094, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▍        | 123/890 [25:27<2:38:02, 12.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3647, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3647, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3694, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▍        | 124/890 [25:40<2:38:21, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9915, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9915, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0078, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9993, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▍        | 125/890 [25:53<2:39:27, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0091, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9778, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9778, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▍        | 126/890 [26:05<2:37:40, 12.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0126, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1065, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▍        | 127/890 [26:17<2:36:53, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9350, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9350, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0061, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9411, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▍        | 128/890 [26:29<2:36:36, 12.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8286, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  14%|█▍        | 129/890 [26:41<2:35:51, 12.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6826, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0068, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  15%|█▍        | 130/890 [26:54<2:35:22, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1696, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1696, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0123, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1819, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  15%|█▍        | 131/890 [27:06<2:35:11, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4469, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8089, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8089, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  15%|█▍        | 132/890 [27:18<2:35:28, 12.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6619, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6619, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0115, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6733, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1126, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1126, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  15%|█▍        | 133/890 [27:31<2:34:53, 12.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2412, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2412, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0102, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2513, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  15%|█▌        | 134/890 [27:43<2:34:42, 12.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7858, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7858, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7921, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1260, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1260, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  15%|█▌        | 135/890 [27:55<2:34:33, 12.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8291, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8291, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0061, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8352, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  15%|█▌        | 136/890 [28:08<2:35:10, 12.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1844, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4418, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4418, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0096, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  15%|█▌        | 137/890 [28:20<2:35:52, 12.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6122, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6122, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6167, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0100, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▌        | 138/890 [28:32<2:34:31, 12.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8809, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8809, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0124, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8933, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0002, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0002, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▌        | 139/890 [28:45<2:35:18, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0118, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1075, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▌        | 140/890 [28:58<2:36:22, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6079, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6079, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0134, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6212, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1943, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1943, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0070, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▌        | 141/890 [29:10<2:35:53, 12.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0080, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6204, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0079, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▌        | 142/890 [29:22<2:34:23, 12.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7785, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7785, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0079, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7865, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▌        | 143/890 [29:34<2:33:33, 12.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7908, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6430, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6430, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▌        | 144/890 [29:47<2:32:28, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0732, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0732, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0076, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0808, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6301, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6301, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0096, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▋        | 145/890 [29:59<2:32:41, 12.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7656, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0061, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  16%|█▋        | 146/890 [30:11<2:33:06, 12.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0072, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5241, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5779, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5779, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 147/890 [30:24<2:31:59, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7367, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7367, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0108, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8766, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8766, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 148/890 [30:36<2:31:40, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9191, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9191, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0103, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9294, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 149/890 [30:48<2:30:41, 12.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2299, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2299, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2348, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0510, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0510, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0062, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 150/890 [31:00<2:29:43, 12.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4319, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4319, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4379, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7320, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7320, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 151/890 [31:12<2:29:16, 12.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7190, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7190, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7274, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6802, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6802, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 152/890 [31:24<2:28:58, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0086, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7632, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9903, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9903, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 153/890 [31:36<2:29:44, 12.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6460, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6460, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0097, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6557, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4779, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4779, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 154/890 [31:48<2:29:21, 12.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0115, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0162, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8848, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8848, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0073, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  17%|█▋        | 155/890 [32:01<2:30:18, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0069, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0863, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0061, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 156/890 [32:13<2:30:00, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7027, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8931, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8931, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 157/890 [32:25<2:29:13, 12.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6587, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6587, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0114, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6701, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0800, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0800, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 158/890 [32:37<2:28:17, 12.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5684, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5530, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5530, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 159/890 [32:50<2:28:22, 12.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7404, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7404, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7460, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3157, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3157, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 160/890 [33:02<2:28:41, 12.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5925, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5925, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0101, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6027, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 161/890 [33:14<2:29:25, 12.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0083, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6805, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5514, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5514, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 162/890 [33:27<2:28:38, 12.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6367, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6367, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6421, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8597, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8597, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 163/890 [33:39<2:30:21, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4005, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7655, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7655, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  18%|█▊        | 164/890 [33:51<2:29:09, 12.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0108, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7688, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9687, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9687, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▊        | 165/890 [34:04<2:28:27, 12.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9301, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9301, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9361, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7705, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7705, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▊        | 166/890 [34:16<2:28:14, 12.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6572, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6572, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0074, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6646, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7314, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7314, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▉        | 167/890 [34:28<2:27:03, 12.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0213, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0213, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0098, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0312, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4953, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4953, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▉        | 168/890 [34:40<2:26:33, 12.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4843, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4843, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0071, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4914, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7085, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7085, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▉        | 169/890 [34:52<2:26:27, 12.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0210, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0210, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0109, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0319, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▉        | 170/890 [35:05<2:26:39, 12.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7379, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7379, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7423, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9591, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9591, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▉        | 171/890 [35:17<2:26:24, 12.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1670, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6339, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6339, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▉        | 172/890 [35:29<2:26:20, 12.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7747, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4351, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4351, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  19%|█▉        | 173/890 [35:41<2:25:54, 12.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0118, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3507, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3867, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3867, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|█▉        | 174/890 [35:53<2:25:36, 12.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8277, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8277, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8341, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4510, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4510, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|█▉        | 175/890 [36:06<2:25:28, 12.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2523, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2523, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0115, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2637, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6693, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6693, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|█▉        | 176/890 [36:18<2:24:32, 12.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0100, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9822, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7426, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7426, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|█▉        | 177/890 [36:30<2:24:43, 12.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3293, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8741, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8741, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|██        | 178/890 [36:43<2:26:23, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7523, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7523, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7564, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7053, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7053, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|██        | 179/890 [36:55<2:24:50, 12.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2284, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2284, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2344, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9470, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9470, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|██        | 180/890 [37:07<2:24:54, 12.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7316, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7316, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7360, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0713, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0713, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|██        | 181/890 [37:19<2:24:30, 12.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3548, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  20%|██        | 182/890 [37:31<2:24:05, 12.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6318, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6318, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6357, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9770, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9770, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██        | 183/890 [37:43<2:23:54, 12.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4766, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██        | 184/890 [37:56<2:24:22, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0095, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7029, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8982, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8982, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██        | 185/890 [38:09<2:25:46, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0929, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1119, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1119, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██        | 186/890 [38:21<2:27:20, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9042, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3199, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3199, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██        | 187/890 [38:35<2:30:14, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9803, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9803, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0091, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9894, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6638, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6638, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██        | 188/890 [38:48<2:29:34, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0088, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9233, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0068, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██        | 189/890 [39:00<2:27:47, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0105, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7898, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1807, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1807, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██▏       | 190/890 [39:12<2:26:42, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0073, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0015, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  21%|██▏       | 191/890 [39:25<2:25:25, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5975, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5975, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0122, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6097, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1165, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1165, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 192/890 [39:37<2:24:22, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0122, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4465, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0073, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 193/890 [39:49<2:24:03, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7228, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7228, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7283, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1864, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1864, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 194/890 [40:01<2:23:16, 12.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.6743, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.6743, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.6789, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9259, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9259, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 195/890 [40:14<2:23:42, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7766, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3215, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3215, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 196/890 [40:26<2:23:07, 12.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4384, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 197/890 [40:39<2:22:44, 12.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6404, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6404, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6471, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5885, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5885, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 198/890 [40:51<2:22:02, 12.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0075, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 199/890 [41:03<2:21:33, 12.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8051, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8051, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8119, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8040, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8040, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  22%|██▏       | 200/890 [41:15<2:21:01, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6127, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6127, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6178, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3300, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3300, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 201/890 [41:28<2:20:56, 12.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6417, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0065, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 202/890 [41:40<2:21:31, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7863, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7863, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0068, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7931, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.1719, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.1719, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 203/890 [41:53<2:23:13, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0750, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 204/890 [42:06<2:24:03, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7047, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 205/890 [42:18<2:22:39, 12.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7429, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7429, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0072, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7501, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9200, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9200, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 206/890 [42:30<2:20:49, 12.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6163, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6163, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0100, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6262, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 207/890 [42:43<2:22:09, 12.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0070, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7654, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7654, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 208/890 [42:57<2:27:29, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5652, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5652, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0082, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5734, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4653, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4653, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  23%|██▎       | 209/890 [43:10<2:28:06, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0075, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1931, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9692, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9692, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▎       | 210/890 [43:23<2:26:30, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5919, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▎       | 211/890 [43:36<2:26:00, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7420, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7420, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7452, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▍       | 212/890 [43:48<2:23:30, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0085, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4642, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▍       | 213/890 [44:00<2:21:35, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0075, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2677, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▍       | 214/890 [44:12<2:20:21, 12.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0254, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0013, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0254, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0125, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0380, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9479, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9479, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▍       | 215/890 [44:25<2:19:44, 12.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6165, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6165, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6219, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.3681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▍       | 216/890 [44:37<2:18:49, 12.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▍       | 217/890 [44:49<2:19:25, 12.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2176, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2176, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2231, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0395, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0395, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  24%|██▍       | 218/890 [45:02<2:18:33, 12.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1120, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1120, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1168, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  25%|██▍       | 219/890 [45:14<2:19:31, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1429, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0009, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1429, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0094, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1522, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6686, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6686, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  25%|██▍       | 220/890 [45:27<2:19:46, 12.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3345, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  25%|██▍       | 221/890 [45:39<2:18:17, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9802, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0066, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  25%|██▍       | 222/890 [45:51<2:17:00, 12.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0996, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0996, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0062, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1058, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  25%|██▌       | 223/890 [46:03<2:16:16, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4918, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4918, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4975, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4672, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4672, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  25%|██▌       | 224/890 [46:15<2:15:16, 12.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7400, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7400, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0077, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7477, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  25%|██▌       | 225/890 [46:28<2:14:55, 12.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6309, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  25%|██▌       | 226/890 [46:40<2:14:17, 12.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1176, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7345, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7345, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▌       | 227/890 [46:52<2:14:18, 12.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8498, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8498, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8532, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0069, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▌       | 228/890 [47:04<2:14:57, 12.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5695, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▌       | 229/890 [47:16<2:14:03, 12.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9694, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9694, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9728, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7443, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7443, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▌       | 230/890 [47:28<2:13:14, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4760, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4760, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4809, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3302, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3302, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▌       | 231/890 [47:40<2:13:03, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6952, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0597, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0597, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▌       | 232/890 [47:52<2:12:34, 12.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7636, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6697, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6697, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▌       | 233/890 [48:05<2:12:39, 12.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5600, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6615, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6615, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▋       | 234/890 [48:16<2:11:56, 12.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0317, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8877, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8877, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  26%|██▋       | 235/890 [48:29<2:11:41, 12.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4036, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4036, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4088, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 236/890 [48:41<2:12:06, 12.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9587, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9587, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9647, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 237/890 [48:53<2:11:22, 12.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5317, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5317, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5356, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 238/890 [49:05<2:11:59, 12.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1753, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1753, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1817, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3240, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3240, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0070, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 239/890 [49:17<2:11:42, 12.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6619, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3549, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3549, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 240/890 [49:30<2:12:34, 12.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7812, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7812, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7850, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 241/890 [49:43<2:16:28, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4684, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4684, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4710, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 242/890 [49:56<2:15:36, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5963, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5963, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6047, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 243/890 [50:08<2:16:28, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6675, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0012, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6675, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0123, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6799, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  27%|██▋       | 244/890 [50:21<2:15:10, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6320, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6320, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6362, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 245/890 [50:33<2:14:58, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5375, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5375, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5412, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 246/890 [50:46<2:13:57, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5051, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0010, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5051, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0104, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5156, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4823, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4823, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0072, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 247/890 [50:58<2:13:09, 12.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1068, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1068, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1094, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7583, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7583, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 248/890 [51:10<2:11:59, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6234, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6234, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6281, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 249/890 [51:22<2:11:00, 12.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0279, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 250/890 [51:34<2:10:05, 12.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5952, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 251/890 [51:47<2:10:53, 12.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5777, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5777, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5806, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 252/890 [52:00<2:13:05, 12.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8024, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8024, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  28%|██▊       | 253/890 [52:12<2:11:50, 12.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5254, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5254, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5281, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5135, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5135, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▊       | 254/890 [52:24<2:10:33, 12.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7105, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7105, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7140, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▊       | 255/890 [52:36<2:10:40, 12.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6874, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6874, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6933, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2713, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2713, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▉       | 256/890 [52:50<2:12:52, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9474, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9474, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9509, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5672, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5672, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▉       | 257/890 [53:02<2:12:33, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7148, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6511, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6511, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▉       | 258/890 [53:15<2:12:57, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7029, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6838, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6838, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▉       | 259/890 [53:27<2:12:28, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7376, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7376, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0082, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7458, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2621, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2621, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▉       | 260/890 [53:40<2:13:52, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9880, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9880, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0075, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9955, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6945, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6945, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▉       | 261/890 [53:53<2:11:32, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2000, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  29%|██▉       | 262/890 [54:05<2:10:46, 12.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7849, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7849, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7887, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|██▉       | 263/890 [54:17<2:09:36, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2320, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0011, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2320, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0107, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2427, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6056, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6056, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|██▉       | 264/890 [54:29<2:08:46, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9222, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9222, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9269, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7483, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7483, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|██▉       | 265/890 [54:42<2:08:54, 12.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6215, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6215, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6247, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|██▉       | 266/890 [54:54<2:09:17, 12.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7388, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7388, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7443, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2410, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2410, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|███       | 267/890 [55:07<2:09:57, 12.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1038, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1038, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0071, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1109, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8060, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8060, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|███       | 268/890 [55:20<2:09:51, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2345, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2345, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2369, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0074, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|███       | 269/890 [55:33<2:10:53, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5943, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5943, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5990, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|███       | 270/890 [55:45<2:09:28, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8095, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8095, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8129, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8738, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8738, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  30%|███       | 271/890 [55:57<2:08:03, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3912, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2456, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2456, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███       | 272/890 [56:09<2:07:08, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7256, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7256, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7282, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2299, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2299, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███       | 273/890 [56:22<2:07:29, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3719, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3719, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3743, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6312, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6312, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███       | 274/890 [56:36<2:11:53, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2014, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6802, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6802, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███       | 275/890 [56:49<2:12:29, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7879, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7879, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7926, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9320, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9320, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███       | 276/890 [57:01<2:11:43, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0234, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4840, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4840, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███       | 277/890 [57:14<2:10:57, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9185, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███       | 278/890 [57:27<2:10:00, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0416, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7453, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7453, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███▏      | 279/890 [57:39<2:07:54, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3969, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7958, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7958, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  31%|███▏      | 280/890 [57:51<2:06:17, 12.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8417, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6815, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6815, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 281/890 [58:03<2:06:25, 12.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5859, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7531, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7531, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 282/890 [58:16<2:07:08, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8528, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8528, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8554, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 283/890 [58:29<2:07:01, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8543, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8543, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8595, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7662, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7662, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 284/890 [58:42<2:08:22, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2824, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2824, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2845, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6837, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6837, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 285/890 [58:55<2:08:09, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9646, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9646, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9678, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 286/890 [59:07<2:06:24, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4202, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 287/890 [59:19<2:05:05, 12.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4595, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4595, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4643, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0157, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0157, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 288/890 [59:32<2:06:05, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6513, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6513, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6540, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  32%|███▏      | 289/890 [59:45<2:07:09, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8744, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8744, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8797, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6072, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6072, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 290/890 [59:57<2:05:32, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8766, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 291/890 [1:00:09<2:05:10, 12.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6783, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6783, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6809, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9194, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9194, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 292/890 [1:00:23<2:07:33, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0331, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0331, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0070, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0401, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3801, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3801, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 293/890 [1:00:36<2:07:44, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9831, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9831, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9851, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 294/890 [1:00:49<2:09:28, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0550, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0550, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0600, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 295/890 [1:01:02<2:07:19, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4172, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8415, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8415, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 296/890 [1:01:14<2:06:27, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5797, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5797, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5819, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 297/890 [1:01:27<2:04:36, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6400, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6400, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6451, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  33%|███▎      | 298/890 [1:01:39<2:04:40, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3657, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7700, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7700, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▎      | 299/890 [1:01:53<2:06:47, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3159, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3159, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3203, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▎      | 300/890 [1:02:06<2:07:03, 12.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6065, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8860, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8860, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▍      | 301/890 [1:02:18<2:05:53, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0055, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0055, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0078, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7056, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7056, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▍      | 302/890 [1:02:31<2:03:59, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8443, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.8906, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8906, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▍      | 303/890 [1:02:43<2:02:33, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8569, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8569, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8604, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▍      | 304/890 [1:02:56<2:03:44, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8124, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0420, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0420, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▍      | 305/890 [1:03:08<2:03:08, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0082, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7406, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▍      | 306/890 [1:03:21<2:03:31, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7080, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  34%|███▍      | 307/890 [1:03:34<2:03:06, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5015, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5015, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5073, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9687, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9687, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  35%|███▍      | 308/890 [1:03:48<2:08:25, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7682, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7682, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7741, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  35%|███▍      | 309/890 [1:04:01<2:06:28, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0072, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7616, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7309, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7309, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  35%|███▍      | 310/890 [1:04:13<2:03:44, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6894, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6894, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6930, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  35%|███▍      | 311/890 [1:04:25<2:01:46, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8807, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8807, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8846, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  35%|███▌      | 312/890 [1:04:38<2:00:48, 12.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4643, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4643, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4674, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  35%|███▌      | 313/890 [1:04:50<2:00:45, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8069, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8069, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9402, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9402, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  35%|███▌      | 314/890 [1:05:03<2:01:34, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2587, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2587, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0056, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2643, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4260, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4260, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  35%|███▌      | 315/890 [1:05:16<2:00:56, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0061, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2484, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1147, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1147, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▌      | 316/890 [1:05:29<2:02:35, 12.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7866, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9334, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9334, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▌      | 317/890 [1:05:42<2:02:36, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7011, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.4498, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4498, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▌      | 318/890 [1:05:54<2:00:59, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7188, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7188, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7250, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▌      | 319/890 [1:06:07<2:00:05, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4037, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▌      | 320/890 [1:06:19<2:00:14, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2186, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2186, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2220, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▌      | 321/890 [1:06:32<1:59:51, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6435, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6435, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6466, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▌      | 322/890 [1:06:45<2:00:07, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6191, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0834, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0834, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▋      | 323/890 [1:06:58<2:00:27, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5611, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5611, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5632, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0305, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0305, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  36%|███▋      | 324/890 [1:07:11<2:00:58, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8932, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8932, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8975, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 325/890 [1:07:24<2:00:41, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7400, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4749, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4749, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 326/890 [1:07:36<2:00:36, 12.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6492, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6492, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6525, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2767, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2767, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 327/890 [1:07:49<1:59:58, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7241, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7241, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7286, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5291, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5291, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 328/890 [1:08:02<1:59:38, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0075, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0075, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0116, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7668, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7668, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 329/890 [1:08:14<1:58:46, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7325, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7325, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7359, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 330/890 [1:08:27<1:58:10, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6872, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2900, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2900, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 331/890 [1:08:40<1:57:54, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9916, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8791, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8791, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 332/890 [1:08:53<1:58:33, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7644, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7644, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7667, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2819, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2819, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  37%|███▋      | 333/890 [1:09:05<1:58:10, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7943, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1442, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1442, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 334/890 [1:09:17<1:56:20, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4996, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 335/890 [1:09:30<1:55:41, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7212, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7212, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7265, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 336/890 [1:09:43<1:56:47, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7115, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7115, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7174, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 337/890 [1:09:55<1:56:01, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8514, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4304, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4304, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 338/890 [1:10:08<1:56:27, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7472, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7472, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7515, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2229, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2229, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 339/890 [1:10:21<1:56:22, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5821, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5821, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0071, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5892, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9957, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9957, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 340/890 [1:10:33<1:55:47, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6875, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6875, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6927, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5814, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5814, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 341/890 [1:10:46<1:55:03, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0820, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0820, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0061, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0882, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  38%|███▊      | 342/890 [1:10:58<1:54:35, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6176, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6176, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0059, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6234, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0635, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0635, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▊      | 343/890 [1:11:11<1:54:15, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7040, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▊      | 344/890 [1:11:24<1:56:08, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7844, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7844, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7874, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▉      | 345/890 [1:11:37<1:55:55, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8139, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8139, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8170, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▉      | 346/890 [1:11:49<1:55:27, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6219, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▉      | 347/890 [1:12:02<1:55:20, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9525, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9525, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9556, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▉      | 348/890 [1:12:15<1:54:52, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1606, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▉      | 349/890 [1:12:27<1:53:43, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8325, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8325, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8378, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7525, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7525, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▉      | 350/890 [1:12:40<1:52:51, 12.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0353, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0353, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0385, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6072, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6072, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  39%|███▉      | 351/890 [1:12:52<1:51:24, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2630, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2630, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0066, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2695, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|███▉      | 352/890 [1:13:04<1:52:10, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1385, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|███▉      | 353/890 [1:13:17<1:51:56, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0054, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6479, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|███▉      | 354/890 [1:13:30<1:52:06, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3051, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3051, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3087, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6205, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6205, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|███▉      | 355/890 [1:13:43<1:53:07, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9518, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5766, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5766, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|████      | 356/890 [1:13:56<1:53:32, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4682, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4682, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0073, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4754, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|████      | 357/890 [1:14:08<1:53:11, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7081, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9868, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9868, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|████      | 358/890 [1:14:21<1:51:52, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.1130, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.1130, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.1161, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5851, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5851, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|████      | 359/890 [1:14:33<1:50:33, 12.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8880, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8880, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8906, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6814, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6814, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  40%|████      | 360/890 [1:14:46<1:51:31, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.4730, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4730, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0073, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4803, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████      | 361/890 [1:14:58<1:50:40, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6718, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6718, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6735, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████      | 362/890 [1:15:11<1:51:09, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6979, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████      | 363/890 [1:15:24<1:50:56, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2462, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2430, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2430, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████      | 364/890 [1:15:36<1:50:36, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3000, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████      | 365/890 [1:15:49<1:50:17, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8482, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8482, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8527, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████      | 366/890 [1:16:01<1:49:52, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0629, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████      | 367/890 [1:16:14<1:49:17, 12.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0329, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0329, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0365, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7850, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7850, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████▏     | 368/890 [1:16:26<1:48:48, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6542, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1039, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1039, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  41%|████▏     | 369/890 [1:16:39<1:49:13, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6409, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6409, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6438, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 370/890 [1:16:52<1:49:37, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9814, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9814, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9839, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7560, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7560, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 371/890 [1:17:05<1:50:03, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6303, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6303, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6323, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 372/890 [1:17:18<1:52:06, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9313, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9313, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9359, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 373/890 [1:17:32<1:53:08, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8635, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5621, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5621, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 374/890 [1:17:44<1:51:16, 12.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1762, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3025, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3025, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 375/890 [1:17:56<1:49:35, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5529, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5529, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5586, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6196, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6196, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 376/890 [1:18:09<1:49:05, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2898, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2898, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2928, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5074, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5074, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 377/890 [1:18:22<1:49:20, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6790, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6790, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6816, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9696, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9696, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  42%|████▏     | 378/890 [1:18:35<1:48:48, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5916, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5916, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5973, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4643, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4643, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 379/890 [1:18:48<1:48:47, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5339, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6756, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6756, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 380/890 [1:19:01<1:49:56, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8139, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7004, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7004, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 381/890 [1:19:13<1:48:50, 12.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7929, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 382/890 [1:19:26<1:47:39, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7315, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7315, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7347, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9917, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9917, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 383/890 [1:19:38<1:46:30, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5584, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 384/890 [1:19:51<1:46:35, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0076, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6581, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9662, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9662, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 385/890 [1:20:04<1:46:47, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6528, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6528, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6546, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 386/890 [1:20:17<1:47:02, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9884, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9884, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9921, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  43%|████▎     | 387/890 [1:20:29<1:46:49, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7804, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7804, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7853, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4903, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4903, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▎     | 388/890 [1:20:42<1:46:44, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0066, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6619, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▎     | 389/890 [1:20:55<1:45:55, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0405, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9117, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9117, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▍     | 390/890 [1:21:07<1:45:29, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8188, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8188, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8209, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7020, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7020, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▍     | 391/890 [1:21:20<1:45:00, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0285, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9072, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9072, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▍     | 392/890 [1:21:33<1:45:44, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3575, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3575, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3589, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6571, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6571, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▍     | 393/890 [1:21:45<1:44:59, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.2025, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.2025, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.2046, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▍     | 394/890 [1:21:58<1:44:45, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3397, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▍     | 395/890 [1:22:11<1:45:04, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0080, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0080, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0106, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5529, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5529, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  44%|████▍     | 396/890 [1:22:24<1:44:51, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7309, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7309, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7332, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  45%|████▍     | 397/890 [1:22:36<1:44:04, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9977, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4469, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4469, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  45%|████▍     | 398/890 [1:22:48<1:42:56, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.2685, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.2685, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.2702, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9687, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9687, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  45%|████▍     | 399/890 [1:23:01<1:42:04, 12.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3360, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  45%|████▍     | 400/890 [1:23:13<1:41:53, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7642, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7642, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7678, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  45%|████▌     | 401/890 [1:23:26<1:41:50, 12.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0636, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0636, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0677, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8851, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8851, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  45%|████▌     | 402/890 [1:23:38<1:41:09, 12.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5410, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5410, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5439, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4210, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4210, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  45%|████▌     | 403/890 [1:23:50<1:40:36, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0162, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0162, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0180, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  45%|████▌     | 404/890 [1:24:03<1:41:15, 12.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6234, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.3470, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.3470, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▌     | 405/890 [1:24:15<1:40:45, 12.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1503, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1503, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1521, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▌     | 406/890 [1:24:28<1:40:07, 12.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8137, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8137, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8183, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.9202, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.9202, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▌     | 407/890 [1:24:40<1:39:22, 12.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9437, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9437, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0063, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9500, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▌     | 408/890 [1:24:53<1:40:20, 12.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9674, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9674, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9698, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▌     | 409/890 [1:25:05<1:39:54, 12.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1024, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1024, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1066, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▌     | 410/890 [1:25:18<1:39:47, 12.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0288, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8290, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8290, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▌     | 411/890 [1:25:31<1:41:14, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8132, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▋     | 412/890 [1:25:44<1:41:44, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3582, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3582, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3601, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6055, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6055, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  46%|████▋     | 413/890 [1:25:57<1:42:25, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7040, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7040, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 414/890 [1:26:09<1:40:57, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.2474, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.2474, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.2511, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5679, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5679, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 415/890 [1:26:22<1:40:06, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5883, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5883, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5908, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0395, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0395, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 416/890 [1:26:34<1:39:57, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4862, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3503, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3503, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 417/890 [1:26:47<1:40:12, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6410, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6410, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6457, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 418/890 [1:27:00<1:40:00, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0064, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8514, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 419/890 [1:27:12<1:39:00, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1027, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.4373, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4373, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 420/890 [1:27:25<1:39:40, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9597, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9597, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9636, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 421/890 [1:27:38<1:39:24, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6391, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6391, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6423, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0887, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0887, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  47%|████▋     | 422/890 [1:27:51<1:38:40, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7277, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4616, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4616, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 423/890 [1:28:03<1:37:56, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5752, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5752, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9791, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9791, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 424/890 [1:28:16<1:38:39, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5462, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5462, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5491, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 425/890 [1:28:28<1:37:57, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7162, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7162, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7195, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0501, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0501, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 426/890 [1:28:41<1:37:16, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7222, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7222, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7261, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 427/890 [1:28:53<1:36:30, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7380, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7380, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7407, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 428/890 [1:29:06<1:35:51, 12.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0437, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0437, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 429/890 [1:29:18<1:36:35, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1583, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 430/890 [1:29:31<1:35:41, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9873, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  48%|████▊     | 431/890 [1:29:43<1:35:05, 12.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7286, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7286, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7330, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2484, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2484, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▊     | 432/890 [1:29:56<1:35:26, 12.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.5123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5155, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6502, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6502, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▊     | 433/890 [1:30:09<1:36:16, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3402, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3402, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3426, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▉     | 434/890 [1:30:21<1:35:57, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0828, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0828, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0876, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3250, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3250, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▉     | 435/890 [1:30:34<1:36:01, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7159, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2401, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2401, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▉     | 436/890 [1:30:47<1:37:20, 12.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0649, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0649, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0062, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0711, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.2021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.2021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▉     | 437/890 [1:31:00<1:36:06, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4632, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▉     | 438/890 [1:31:12<1:35:16, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.0400, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0400, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.0432, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9525, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9525, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▉     | 439/890 [1:31:24<1:34:15, 12.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3396, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3396, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3443, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9120, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9120, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  49%|████▉     | 440/890 [1:31:38<1:35:29, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0084, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0329, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3055, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3055, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|████▉     | 441/890 [1:31:51<1:35:47, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2316, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2316, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2336, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|████▉     | 442/890 [1:32:04<1:36:17, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9183, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|████▉     | 443/890 [1:32:16<1:35:40, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9938, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|████▉     | 444/890 [1:32:30<1:37:40, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7862, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2039, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2039, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|█████     | 445/890 [1:32:43<1:36:43, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4666, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4666, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4684, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|█████     | 446/890 [1:32:56<1:35:26, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5550, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|█████     | 447/890 [1:33:08<1:34:35, 12.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7894, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7894, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7939, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8641, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8641, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|█████     | 448/890 [1:33:21<1:34:11, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7586, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  50%|█████     | 449/890 [1:33:34<1:33:30, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7426, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7426, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7448, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6523, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6523, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████     | 450/890 [1:33:46<1:33:32, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5725, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5725, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5779, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.8844, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8844, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████     | 451/890 [1:33:59<1:33:15, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2746, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2746, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2768, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8872, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8872, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████     | 452/890 [1:34:12<1:33:32, 12.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4282, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6060, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6060, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████     | 453/890 [1:34:26<1:34:59, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6962, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7478, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7478, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████     | 454/890 [1:34:38<1:33:10, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6148, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6148, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6165, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7554, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7554, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████     | 455/890 [1:34:50<1:31:46, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1788, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1788, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1831, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████     | 456/890 [1:35:03<1:31:42, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1381, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5757, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5757, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████▏    | 457/890 [1:35:15<1:31:11, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2938, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2938, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2987, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5291, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5291, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  51%|█████▏    | 458/890 [1:35:29<1:31:58, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7509, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7509, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7549, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 459/890 [1:35:41<1:31:43, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4850, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3836, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3836, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 460/890 [1:35:54<1:31:54, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6555, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6555, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6588, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6851, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6851, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 461/890 [1:36:07<1:31:40, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4477, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7334, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7334, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 462/890 [1:36:20<1:31:09, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5942, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 463/890 [1:36:32<1:29:44, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7432, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7432, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7463, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0639, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0639, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 464/890 [1:36:45<1:30:04, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2677, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2677, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2701, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 465/890 [1:36:58<1:30:12, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4955, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 466/890 [1:37:10<1:29:45, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4768, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4768, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4803, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  52%|█████▏    | 467/890 [1:37:23<1:29:04, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7979, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7979, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7998, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5607, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5607, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 468/890 [1:37:36<1:30:50, 12.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8213, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8213, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8240, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 469/890 [1:37:49<1:30:09, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6195, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8749, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8749, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 470/890 [1:38:01<1:28:57, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6637, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6637, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6685, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.2750, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2750, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 471/890 [1:38:14<1:28:12, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5331, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5331, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5360, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7365, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7365, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 472/890 [1:38:27<1:27:54, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6153, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6153, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6180, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 473/890 [1:38:39<1:27:36, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6006, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 474/890 [1:38:52<1:27:50, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9392, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9392, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9420, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8408, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8408, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 475/890 [1:39:04<1:27:13, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4605, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  53%|█████▎    | 476/890 [1:39:17<1:26:29, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5963, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5963, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5983, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2462, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2462, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▎    | 477/890 [1:39:29<1:26:31, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3912, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3912, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.3944, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6627, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6627, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▎    | 478/890 [1:39:42<1:25:54, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7821, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7821, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7853, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5214, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5214, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▍    | 479/890 [1:39:54<1:25:17, 12.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8619, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8619, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8666, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1195, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1195, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▍    | 480/890 [1:40:07<1:26:18, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4599, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4599, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4623, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▍    | 481/890 [1:40:20<1:26:38, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5073, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▍    | 482/890 [1:40:33<1:26:11, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5651, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5651, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5667, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▍    | 483/890 [1:40:45<1:25:38, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0289, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0289, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0308, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8130, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8130, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▍    | 484/890 [1:40:58<1:25:43, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8509, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6004, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6004, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  54%|█████▍    | 485/890 [1:41:10<1:25:10, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6044, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7068, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7068, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  55%|█████▍    | 486/890 [1:41:22<1:23:46, 12.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2982, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2982, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2997, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3271, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3502e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3271, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  55%|█████▍    | 487/890 [1:41:35<1:23:13, 12.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2029, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2029, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2056, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6952, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6952, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  55%|█████▍    | 488/890 [1:41:47<1:23:04, 12.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0766, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0766, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0796, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  55%|█████▍    | 489/890 [1:42:00<1:24:00, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9852, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9852, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9892, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3257, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3257, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  55%|█████▌    | 490/890 [1:42:13<1:24:55, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0600, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9388, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9388, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  55%|█████▌    | 491/890 [1:42:26<1:24:12, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4439, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4439, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4493, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1742, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1742, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  55%|█████▌    | 492/890 [1:42:39<1:24:51, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9257, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  55%|█████▌    | 493/890 [1:42:51<1:23:53, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9172, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6416, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6416, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▌    | 494/890 [1:43:03<1:22:40, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9056, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6137, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6137, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▌    | 495/890 [1:43:16<1:22:23, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4480, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▌    | 496/890 [1:43:28<1:21:56, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7752, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6305, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6305, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▌    | 497/890 [1:43:41<1:21:56, 12.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9728, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0008, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9728, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0077, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9806, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9301, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9301, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▌    | 498/890 [1:43:54<1:22:35, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7796, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9878, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9878, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▌    | 499/890 [1:44:06<1:21:53, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6340, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6340, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6375, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8997, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8997, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▌    | 500/890 [1:44:19<1:21:16, 12.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5937, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▋    | 501/890 [1:44:32<1:22:11, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6823, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6823, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6854, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3957, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3957, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  56%|█████▋    | 502/890 [1:44:44<1:21:39, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5220, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5220, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5256, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6524, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6524, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 503/890 [1:44:56<1:20:50, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7018, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7018, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7067, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 504/890 [1:45:09<1:20:39, 12.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6040, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 505/890 [1:45:22<1:20:32, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5675, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5675, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5695, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7323, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7323, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 506/890 [1:45:35<1:21:07, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4682, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4682, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4706, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6607, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6607, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 507/890 [1:45:47<1:20:15, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5049, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7113, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7113, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 508/890 [1:46:00<1:20:12, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4717, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4717, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0058, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4775, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.2185, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.2185, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 509/890 [1:46:12<1:19:32, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5767, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5767, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5811, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 510/890 [1:46:24<1:19:10, 12.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6365, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6365, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6401, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  57%|█████▋    | 511/890 [1:46:37<1:18:40, 12.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8345, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8345, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8380, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8596, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8596, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 512/890 [1:46:49<1:18:37, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4672, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4672, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4727, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9191, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9191, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 513/890 [1:47:02<1:18:23, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2794, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8718, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8718, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 514/890 [1:47:14<1:18:46, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8502, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8502, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8547, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5327, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5327, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 515/890 [1:47:27<1:18:41, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0074, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0622, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0395, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0395, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 516/890 [1:47:40<1:19:20, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8935, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0007, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8935, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0067, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9002, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2523, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2523, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 517/890 [1:47:53<1:19:12, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9654, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9654, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9675, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 518/890 [1:48:05<1:18:14, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1264, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1264, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1290, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9734, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9734, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 519/890 [1:48:18<1:17:48, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6841, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0082, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0082, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  58%|█████▊    | 520/890 [1:48:30<1:17:37, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4662, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4662, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4696, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▊    | 521/890 [1:48:43<1:17:05, 12.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3205, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3205, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3220, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6421, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6421, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▊    | 522/890 [1:48:56<1:17:37, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6072, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6072, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6103, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9436, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9436, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▉    | 523/890 [1:49:09<1:18:24, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7582, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7179, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7179, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▉    | 524/890 [1:49:22<1:18:00, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2826, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2826, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2845, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8435, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8435, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▉    | 525/890 [1:49:35<1:18:22, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2338, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7253, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7253, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▉    | 526/890 [1:49:47<1:17:14, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7253, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7253, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7281, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1560, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1560, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▉    | 527/890 [1:49:59<1:16:22, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6876, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5071, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5071, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▉    | 528/890 [1:50:12<1:15:47, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1291, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1291, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1307, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  59%|█████▉    | 529/890 [1:50:25<1:16:05, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0376, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0376, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0408, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|█████▉    | 530/890 [1:50:38<1:16:38, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5954, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5954, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5994, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4764, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4764, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|█████▉    | 531/890 [1:50:50<1:16:01, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6145, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7925, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7925, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|█████▉    | 532/890 [1:51:04<1:16:38, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.4095, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.4095, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.4131, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5695, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5695, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|█████▉    | 533/890 [1:51:16<1:16:01, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0062, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6909, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|██████    | 534/890 [1:51:29<1:15:14, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0227, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0227, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0270, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|██████    | 535/890 [1:51:45<1:21:45, 13.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7224, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7224, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7245, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8115, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8115, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|██████    | 536/890 [1:51:58<1:19:36, 13.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4149, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4149, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4167, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|██████    | 537/890 [1:52:11<1:18:22, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6964, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1717, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1717, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  60%|██████    | 538/890 [1:52:24<1:17:26, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8222, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8222, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8237, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5139, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5139, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████    | 539/890 [1:52:37<1:16:47, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0052, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9821, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████    | 540/890 [1:52:49<1:15:52, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9303, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8849, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8849, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████    | 541/890 [1:53:02<1:14:56, 12.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0050, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7836, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3986, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3986, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████    | 542/890 [1:53:14<1:13:51, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6131, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████    | 543/890 [1:53:27<1:13:11, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6435, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6435, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6457, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████    | 544/890 [1:53:40<1:13:37, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0995, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4522, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4522, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████    | 545/890 [1:53:53<1:13:16, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2512, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2512, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2551, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████▏   | 546/890 [1:54:05<1:13:08, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6416, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  61%|██████▏   | 547/890 [1:54:18<1:12:25, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8026, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8026, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8052, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 548/890 [1:54:31<1:12:20, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7234, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3368, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3368, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 549/890 [1:54:43<1:11:50, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5754, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5754, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5773, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8031, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8031, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 550/890 [1:54:55<1:11:16, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6760, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 551/890 [1:55:08<1:10:39, 12.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0326, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5995, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5995, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 552/890 [1:55:20<1:10:39, 12.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0469, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0469, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0496, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 553/890 [1:55:33<1:11:15, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9925, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9925, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9948, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7210, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7210, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 554/890 [1:55:47<1:11:41, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2983, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5816, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5816, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 555/890 [1:55:59<1:10:58, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5171, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5213, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6768, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6768, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  62%|██████▏   | 556/890 [1:56:12<1:11:54, 12.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.3991, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 557/890 [1:56:25<1:11:06, 12.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7761, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 558/890 [1:56:38<1:10:25, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2024, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7309, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7309, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 559/890 [1:56:50<1:09:50, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5022, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5022, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5043, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 560/890 [1:57:03<1:10:07, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6113, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6113, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6136, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9766, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9766, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 561/890 [1:57:16<1:10:01, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9319, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9319, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9365, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 562/890 [1:57:28<1:09:24, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7029, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7029, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7072, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5283, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5283, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 563/890 [1:57:41<1:09:09, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4735, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8626, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8626, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 564/890 [1:57:53<1:08:27, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2783, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2783, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2837, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  63%|██████▎   | 565/890 [1:58:06<1:08:14, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1973, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0257, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0257, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▎   | 566/890 [1:58:18<1:07:23, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3458, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3458, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6271, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6271, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▎   | 567/890 [1:58:31<1:07:10, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2618, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▍   | 568/890 [1:58:43<1:06:49, 12.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1528, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5315, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5315, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▍   | 569/890 [1:58:56<1:07:08, 12.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3945, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▍   | 570/890 [1:59:09<1:07:47, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7384, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7384, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7407, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▍   | 571/890 [1:59:21<1:07:00, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9093, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▍   | 572/890 [1:59:34<1:07:13, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9829, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9829, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9845, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▍   | 573/890 [1:59:47<1:06:51, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6675, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6933, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6933, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  64%|██████▍   | 574/890 [1:59:59<1:06:11, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0944, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0944, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0963, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7728, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7728, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  65%|██████▍   | 575/890 [2:00:12<1:06:07, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9704, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9704, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9743, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  65%|██████▍   | 576/890 [2:00:24<1:05:49, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9105, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9105, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9132, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  65%|██████▍   | 577/890 [2:00:37<1:05:48, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6603, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0045, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0045, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  65%|██████▍   | 578/890 [2:00:50<1:05:53, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5207, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5207, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5229, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  65%|██████▌   | 579/890 [2:01:02<1:05:35, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6307, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6307, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6346, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7055, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7055, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  65%|██████▌   | 580/890 [2:01:15<1:05:52, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7697, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7697, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7721, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9820, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9820, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0055, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  65%|██████▌   | 581/890 [2:01:28<1:05:13, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3200, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3200, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3230, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  65%|██████▌   | 582/890 [2:01:40<1:04:31, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6649, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  66%|██████▌   | 583/890 [2:01:53<1:03:56, 12.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6284, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6284, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6329, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  66%|██████▌   | 584/890 [2:02:05<1:04:13, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7454, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7454, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  66%|██████▌   | 585/890 [2:02:18<1:04:12, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9829, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9829, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9854, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7869, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7869, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  66%|██████▌   | 586/890 [2:02:31<1:04:56, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9964, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6975, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6975, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  66%|██████▌   | 587/890 [2:02:44<1:04:28, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7155, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7155, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7193, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7417, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7417, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  66%|██████▌   | 588/890 [2:02:57<1:04:08, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5219, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5219, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5257, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3224, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6354e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3224, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  66%|██████▌   | 589/890 [2:03:10<1:04:26, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7675, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7675, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7690, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  66%|██████▋   | 590/890 [2:03:22<1:03:49, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8730, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8730, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8760, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7317, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7317, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  66%|██████▋   | 591/890 [2:03:35<1:03:08, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7269, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7269, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0062, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7331, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 592/890 [2:03:48<1:03:16, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5035, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8218, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8218, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 593/890 [2:04:00<1:02:52, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2917, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2917, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2930, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2360, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2360, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 594/890 [2:04:13<1:03:08, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6999, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6999, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7019, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4140, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4140, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 595/890 [2:04:26<1:02:45, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5343, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6641, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6641, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 596/890 [2:04:40<1:03:43, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4938, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4938, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4956, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7370, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7370, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 597/890 [2:04:52<1:03:00, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0294, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0294, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0324, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 598/890 [2:05:05<1:02:03, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8212, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8212, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8244, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2785, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2785, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 599/890 [2:05:17<1:01:31, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7824, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7824, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7842, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9302, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9302, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  67%|██████▋   | 600/890 [2:05:30<1:02:12, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9051, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9051, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9072, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8107, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8107, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 601/890 [2:05:43<1:01:25, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1035, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 602/890 [2:05:56<1:00:58, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6328, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6328, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6347, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 603/890 [2:06:09<1:01:18, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6648, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6648, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6666, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 604/890 [2:06:21<1:01:12, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5996, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5996, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6010, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 605/890 [2:06:35<1:01:32, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1037, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1037, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 606/890 [2:06:47<1:00:30, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3695, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3695, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3713, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9864, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9864, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 607/890 [2:07:00<1:00:00, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6533, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 608/890 [2:07:12<59:22, 12.63s/batch]  

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1479, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  68%|██████▊   | 609/890 [2:07:24<58:39, 12.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5964, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5964, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5985, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7699, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7699, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  69%|██████▊   | 610/890 [2:07:37<58:17, 12.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6218, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6218, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6230, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.2660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.2660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  69%|██████▊   | 611/890 [2:07:50<58:26, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7672, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  69%|██████▉   | 612/890 [2:08:02<58:41, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2972, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5638, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5638, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  69%|██████▉   | 613/890 [2:08:16<59:05, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6378, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6378, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6403, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1565, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4682e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1565, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  69%|██████▉   | 614/890 [2:08:28<58:37, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0900, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9515, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9515, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  69%|██████▉   | 615/890 [2:08:41<57:53, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0574, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  69%|██████▉   | 616/890 [2:08:54<58:09, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5831, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5831, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5863, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  69%|██████▉   | 617/890 [2:09:06<57:49, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8390, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  69%|██████▉   | 618/890 [2:09:19<57:44, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7344, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7344, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7374, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3079, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3079, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|██████▉   | 619/890 [2:09:32<57:49, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1849, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1849, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1882, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|██████▉   | 620/890 [2:09:45<58:08, 12.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2148, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2148, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2170, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|██████▉   | 621/890 [2:09:58<57:39, 12.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6859, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5074, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5074, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|██████▉   | 622/890 [2:10:10<56:39, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.8377, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.8377, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.8392, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8209, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8209, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|███████   | 623/890 [2:10:22<56:02, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5984, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5984, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6012, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|███████   | 624/890 [2:10:35<56:19, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7481, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7481, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7511, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0790, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0790, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0049, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|███████   | 625/890 [2:10:48<56:11, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.7710, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.7710, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.7745, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|███████   | 626/890 [2:11:01<56:17, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6014, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6014, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6049, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  70%|███████   | 627/890 [2:11:14<56:05, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6303, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7566e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  71%|███████   | 628/890 [2:11:27<55:50, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8196, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8196, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8226, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5723, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5723, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  71%|███████   | 629/890 [2:11:40<55:44, 12.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2770, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2770, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0045, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2816, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8856e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  71%|███████   | 630/890 [2:11:52<55:11, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5875, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5875, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5899, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6445, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6445, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  71%|███████   | 631/890 [2:12:05<54:36, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2767, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2767, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2786, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  71%|███████   | 632/890 [2:12:17<54:03, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6896, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6896, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6917, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4130, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7731e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4130, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  71%|███████   | 633/890 [2:12:30<53:47, 12.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9925, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9925, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9937, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6053, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6053, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  71%|███████   | 634/890 [2:12:42<53:43, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8904, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8904, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8917, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5829, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5829, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  71%|███████▏  | 635/890 [2:12:55<53:59, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9986, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9986, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0007, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8770, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8770, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  71%|███████▏  | 636/890 [2:13:08<53:25, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5330, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3182, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6011e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3182, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  72%|███████▏  | 637/890 [2:13:20<53:30, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7929, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  72%|███████▏  | 638/890 [2:13:33<53:18, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7606, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7606, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7638, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2779, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2779, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  72%|███████▏  | 639/890 [2:13:46<52:47, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0016, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0016, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0035, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0891, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0891, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  72%|███████▏  | 640/890 [2:13:58<52:44, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7865, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5926, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5926, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  72%|███████▏  | 641/890 [2:14:11<52:36, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6896, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  72%|███████▏  | 642/890 [2:14:24<52:37, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6851, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6851, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6876, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8408, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8408, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  72%|███████▏  | 643/890 [2:14:37<52:41, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6426, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.8688, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8688, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  72%|███████▏  | 644/890 [2:14:50<52:21, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9030, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8819, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8819, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  72%|███████▏  | 645/890 [2:15:03<52:42, 12.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9998, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9998, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0028, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8667, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8667, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 646/890 [2:15:15<52:00, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4875, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6491, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6491, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 647/890 [2:15:28<51:12, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1248, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1248, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1273, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 648/890 [2:15:40<50:58, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0235, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0235, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0267, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 649/890 [2:15:53<50:51, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3069, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3069, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3111, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4326, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4326, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 650/890 [2:16:06<50:42, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3492, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4261, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4261, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 651/890 [2:16:19<50:50, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9059, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 652/890 [2:16:32<51:44, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8561, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 653/890 [2:16:45<50:51, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5506, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5506, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5540, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6688, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6688, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  73%|███████▎  | 654/890 [2:16:58<50:47, 12.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8472, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▎  | 655/890 [2:17:10<50:06, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2214, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2214, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2251, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▎  | 656/890 [2:17:23<49:26, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7070, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7070, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7092, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▍  | 657/890 [2:17:36<49:26, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6149, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6149, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6188, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9425, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9425, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▍  | 658/890 [2:17:48<49:22, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4562, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7752, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7752, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▍  | 659/890 [2:18:01<48:41, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6105, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6105, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6136, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0552, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0552, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▍  | 660/890 [2:18:14<48:51, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4216, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4216, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4263, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5931, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5931, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▍  | 661/890 [2:18:27<48:56, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9487, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9487, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9526, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6141, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6141, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▍  | 662/890 [2:18:39<48:11, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0631, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9179, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9179, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  74%|███████▍  | 663/890 [2:18:52<47:53, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6178, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6178, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0053, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6231, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  75%|███████▍  | 664/890 [2:19:04<47:41, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4375, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6788, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6788, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  75%|███████▍  | 665/890 [2:19:17<47:10, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8256, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8256, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8277, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8230, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8230, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  75%|███████▍  | 666/890 [2:19:30<47:28, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0454, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0454, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0486, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8644, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8644, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  75%|███████▍  | 667/890 [2:19:43<47:49, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0667, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1818, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1818, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  75%|███████▌  | 668/890 [2:19:56<47:37, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7378, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7378, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7393, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  75%|███████▌  | 669/890 [2:20:09<47:10, 12.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9675, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9675, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9704, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2802, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2802, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  75%|███████▌  | 670/890 [2:20:21<46:37, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7758, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5447, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5447, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  75%|███████▌  | 671/890 [2:20:33<46:00, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9590, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9590, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9614, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5457, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5457, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  76%|███████▌  | 672/890 [2:20:46<46:01, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2137, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  76%|███████▌  | 673/890 [2:20:59<46:00, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8669, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8669, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8705, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8843, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8843, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  76%|███████▌  | 674/890 [2:21:12<46:00, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3120, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0945, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0945, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  76%|███████▌  | 675/890 [2:21:25<46:03, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2143, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2143, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2189, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9141, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9141, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  76%|███████▌  | 676/890 [2:21:38<45:42, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4502, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4502, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4514, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8213, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8882e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8213, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  76%|███████▌  | 677/890 [2:21:50<45:06, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1746, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1746, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1788, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7137, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7137, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  76%|███████▌  | 678/890 [2:22:03<44:25, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9462, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7672, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9857e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7672, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  76%|███████▋  | 679/890 [2:22:15<43:55, 12.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0195, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  76%|███████▋  | 680/890 [2:22:28<44:26, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8263, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 681/890 [2:22:41<44:06, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1777, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1777, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1812, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 682/890 [2:22:54<44:25, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1846, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3889, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3889, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 683/890 [2:23:06<44:02, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0046, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4620, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9372, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9372, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 684/890 [2:23:19<43:53, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2202, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 685/890 [2:23:32<43:21, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1013, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1013, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1037, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6937, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6937, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 686/890 [2:23:45<43:16, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6244, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6244, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6268, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6881, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6881, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 687/890 [2:23:57<42:36, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6240, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6920, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6920, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 688/890 [2:24:10<42:34, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6966, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6966, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6992, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6582, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6582, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  77%|███████▋  | 689/890 [2:24:22<42:16, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5920, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5920, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5939, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7507, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7507, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  78%|███████▊  | 690/890 [2:24:35<42:29, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7549, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1676, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7088e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1676, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  78%|███████▊  | 691/890 [2:24:48<42:20, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7266, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7266, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7296, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5668, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5668, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  78%|███████▊  | 692/890 [2:25:01<42:48, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0051, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8142, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  78%|███████▊  | 693/890 [2:25:14<42:35, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.6556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.6556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.6578, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2178, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2178, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  78%|███████▊  | 694/890 [2:25:27<41:55, 12.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6454, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6454, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6485, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9471, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9471, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  78%|███████▊  | 695/890 [2:25:39<41:17, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2493, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2493, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2518, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  78%|███████▊  | 696/890 [2:25:52<40:40, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6949, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6949, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6980, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1295, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1295, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  78%|███████▊  | 697/890 [2:26:04<40:25, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4997, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4997, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5016, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  78%|███████▊  | 698/890 [2:26:16<40:00, 12.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4188, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4188, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4222, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  79%|███████▊  | 699/890 [2:26:29<39:44, 12.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3851, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3851, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3873, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  79%|███████▊  | 700/890 [2:26:42<39:55, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7024, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7024, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7063, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  79%|███████▉  | 701/890 [2:26:55<40:03, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7810, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8750, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8750, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  79%|███████▉  | 702/890 [2:27:07<39:26, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7198, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6986, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6986, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  79%|███████▉  | 703/890 [2:27:20<39:12, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6978, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  79%|███████▉  | 704/890 [2:27:33<39:33, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4163, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4163, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4188, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9353, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9353, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  79%|███████▉  | 705/890 [2:27:46<39:25, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5314, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5314, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5345, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2578e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  79%|███████▉  | 706/890 [2:27:58<39:10, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4275, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4275, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4286, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8045, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8045, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  79%|███████▉  | 707/890 [2:28:11<38:42, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7934, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7934, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7963, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6056, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6056, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|███████▉  | 708/890 [2:28:24<38:43, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0351, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0351, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0366, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8167, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8167, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|███████▉  | 709/890 [2:28:37<39:09, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6939, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7344, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7344, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|███████▉  | 710/890 [2:28:50<38:31, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4904, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4904, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4932, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.0505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|███████▉  | 711/890 [2:29:02<37:48, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5783, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5783, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5809, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7622, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7622, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|████████  | 712/890 [2:29:15<37:45, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5809, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.4539, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.4539, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|████████  | 713/890 [2:29:28<37:32, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6615, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8340, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8340, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|████████  | 714/890 [2:29:40<36:59, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3646, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3646, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0041, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.3687, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|████████  | 715/890 [2:29:53<37:07, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6445, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6445, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6464, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8202, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8202, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  80%|████████  | 716/890 [2:30:06<37:12, 12.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8879, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8879, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8890, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  81%|████████  | 717/890 [2:30:19<37:00, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4960, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4960, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4975, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7834, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7834, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  81%|████████  | 718/890 [2:30:31<36:29, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6760, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6760, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6802, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5022, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5022, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  81%|████████  | 719/890 [2:30:44<36:00, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6827, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6827, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6844, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1909e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  81%|████████  | 720/890 [2:30:57<36:28, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3087, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3087, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0044, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3131, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0579, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0579, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  81%|████████  | 721/890 [2:31:10<36:04, 12.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0057, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5826, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  81%|████████  | 722/890 [2:31:23<35:46, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8709, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8709, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8731, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0069, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0069, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  81%|████████  | 723/890 [2:31:36<35:52, 12.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9149, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9149, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9185, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  81%|████████▏ | 724/890 [2:31:49<35:43, 12.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9686, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9686, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0048, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9735, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8975, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2791e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8975, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  81%|████████▏ | 725/890 [2:32:02<35:24, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7812, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7812, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7832, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.4449, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4449, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 726/890 [2:32:14<34:49, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5316, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5316, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5344, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 727/890 [2:32:27<34:25, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1766, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1766, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 728/890 [2:32:40<34:28, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6886, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6886, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6897, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 729/890 [2:32:52<33:58, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6357, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6357, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6381, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7107, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7107, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 730/890 [2:33:04<33:35, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9320, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0666, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0666, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 731/890 [2:33:17<33:18, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8565, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4570, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4570, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 732/890 [2:33:30<33:19, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7307, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7307, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7322, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5221, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5221, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 733/890 [2:33:42<33:11, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6370, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  82%|████████▏ | 734/890 [2:33:55<32:50, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7023, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7033e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  83%|████████▎ | 735/890 [2:34:08<32:45, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6717, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6717, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6730, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4750, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4750, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  83%|████████▎ | 736/890 [2:34:21<32:54, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3442, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3442, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3463, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0133, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0133, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  83%|████████▎ | 737/890 [2:34:33<32:19, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.8438, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8438, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8472, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9214, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9214, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  83%|████████▎ | 738/890 [2:34:46<31:52, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8347, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2513, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1438e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2513, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  83%|████████▎ | 739/890 [2:34:58<31:43, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5569, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  83%|████████▎ | 740/890 [2:35:11<31:57, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0110, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0005, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0110, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0047, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0158, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  83%|████████▎ | 741/890 [2:35:24<31:49, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5319, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  83%|████████▎ | 742/890 [2:35:37<31:16, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6176, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8006, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8822e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8006, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  83%|████████▎ | 743/890 [2:35:49<30:55, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7457, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  84%|████████▎ | 744/890 [2:36:02<30:49, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5361, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5361, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5387, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8777, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8777, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  84%|████████▎ | 745/890 [2:36:14<30:25, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5718, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5718, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5738, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  84%|████████▍ | 746/890 [2:36:27<30:14, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5943, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5943, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5971, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9425, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9425, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  84%|████████▍ | 747/890 [2:36:40<30:05, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7004, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2414, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2414, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  84%|████████▍ | 748/890 [2:36:53<30:01, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8267, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2568, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2568, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  84%|████████▍ | 749/890 [2:37:05<29:56, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8732, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8732, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8752, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5658, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5658, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  84%|████████▍ | 750/890 [2:37:18<29:44, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3785, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1602, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1602, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  84%|████████▍ | 751/890 [2:37:31<29:24, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.0070, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0070, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.0113, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9867e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  84%|████████▍ | 752/890 [2:37:43<29:12, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7201, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7201, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7216, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9898, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9898, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  85%|████████▍ | 753/890 [2:37:56<28:48, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3131, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  85%|████████▍ | 754/890 [2:38:08<28:29, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7250, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7250, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7265, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9607, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9607, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  85%|████████▍ | 755/890 [2:38:21<28:19, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6400, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6400, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6418, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  85%|████████▍ | 756/890 [2:38:34<28:06, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4430, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4430, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4461, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  85%|████████▌ | 757/890 [2:38:47<28:18, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7851, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7851, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7879, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  85%|████████▌ | 758/890 [2:38:59<27:53, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5709, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5709, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5751, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5595, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5595, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  85%|████████▌ | 759/890 [2:39:12<27:33, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1158, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  85%|████████▌ | 760/890 [2:39:25<27:38, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1975, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  86%|████████▌ | 761/890 [2:39:38<27:25, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5831, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9662, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9662, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  86%|████████▌ | 762/890 [2:39:50<27:13, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7189, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8657, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8657, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  86%|████████▌ | 763/890 [2:40:03<26:53, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8488, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8488, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8504, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3855e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  86%|████████▌ | 764/890 [2:40:16<26:41, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7813, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7813, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7843, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8528, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8528, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  86%|████████▌ | 765/890 [2:40:29<26:44, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6546, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  86%|████████▌ | 766/890 [2:40:41<26:19, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5142, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6288, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6288, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  86%|████████▌ | 767/890 [2:40:54<25:56, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7547, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6796, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6796, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  86%|████████▋ | 768/890 [2:41:06<25:42, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4960, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8254, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8254, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  86%|████████▋ | 769/890 [2:41:19<25:33, 12.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5372, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5372, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5391, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  87%|████████▋ | 770/890 [2:41:31<25:10, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9288, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9288, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9321, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  87%|████████▋ | 771/890 [2:41:44<24:50, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0906, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0906, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0929, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5315, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5315, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  87%|████████▋ | 772/890 [2:41:57<24:53, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9140, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9140, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9154, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  87%|████████▋ | 773/890 [2:42:10<24:52, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4869, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4869, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4884, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  87%|████████▋ | 774/890 [2:42:22<24:30, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9798, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9136, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9136, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  87%|████████▋ | 775/890 [2:42:35<24:09, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6777, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6777, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6807, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7555, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7555, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  87%|████████▋ | 776/890 [2:42:47<23:53, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6570, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6570, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6585, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5796, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4623e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5796, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  87%|████████▋ | 777/890 [2:43:00<23:48, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7407, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7758, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7758, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  87%|████████▋ | 778/890 [2:43:13<23:33, 12.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5823, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9876, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6132e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9876, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  88%|████████▊ | 779/890 [2:43:25<23:16, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7321, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  88%|████████▊ | 780/890 [2:43:38<23:10, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7425, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7425, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7436, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8089, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8089, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  88%|████████▊ | 781/890 [2:43:51<23:02, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8848, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3310, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3310, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  88%|████████▊ | 782/890 [2:44:03<22:44, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6972, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  88%|████████▊ | 783/890 [2:44:16<22:46, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5735, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5735, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  88%|████████▊ | 784/890 [2:44:29<22:39, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3608, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3608, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3629, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5732, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5732, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  88%|████████▊ | 785/890 [2:44:42<22:29, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0421, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0421, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0436, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2417, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2417, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  88%|████████▊ | 786/890 [2:44:55<22:10, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9644, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9644, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9670, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4501, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4501, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  88%|████████▊ | 787/890 [2:45:07<21:49, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9443, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9443, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9471, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7388, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7388, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  89%|████████▊ | 788/890 [2:45:20<21:39, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9004, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9004, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9022, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6760, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6760, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  89%|████████▊ | 789/890 [2:45:33<21:33, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6487, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8316, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6725e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8316, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  89%|████████▉ | 790/890 [2:45:46<21:08, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1445, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4202e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1445, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1454, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  89%|████████▉ | 791/890 [2:45:58<20:48, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8347, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  89%|████████▉ | 792/890 [2:46:10<20:34, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0119, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9227, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.6113e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9227, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  89%|████████▉ | 793/890 [2:46:23<20:20, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1880, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1880, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1896, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7826, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7826, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  89%|████████▉ | 794/890 [2:46:36<20:12, 12.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.2406, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.2406, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.2423, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5707e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  89%|████████▉ | 795/890 [2:46:49<20:08, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0207, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2855, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2855, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  89%|████████▉ | 796/890 [2:47:01<19:56, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1484, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1484, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1504, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  90%|████████▉ | 797/890 [2:47:14<19:47, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6735, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6735, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6767, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  90%|████████▉ | 798/890 [2:47:27<19:29, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1186, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0137, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0137, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  90%|████████▉ | 799/890 [2:47:39<19:07, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.3139, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0006, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3139, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0060, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3199, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  90%|████████▉ | 800/890 [2:47:52<19:10, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7202, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7202, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7218, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6264, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6264, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  90%|█████████ | 801/890 [2:48:05<18:49, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1710, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9266, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9266, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  90%|█████████ | 802/890 [2:48:18<18:38, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3953, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6940e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3953, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3963, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3079, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3079, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  90%|█████████ | 803/890 [2:48:31<18:33, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5284, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8636, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8636, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  90%|█████████ | 804/890 [2:48:44<18:33, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8320, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9522, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9522, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  90%|█████████ | 805/890 [2:48:57<18:13, 12.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0148, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0148, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0170, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8054e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  91%|█████████ | 806/890 [2:49:09<17:53, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5579, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6411e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  91%|█████████ | 807/890 [2:49:22<17:38, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9006, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9006, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9034, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6991e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  91%|█████████ | 808/890 [2:49:35<17:30, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5404, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9396, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9396, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  91%|█████████ | 809/890 [2:49:48<17:13, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6760, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6236, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6236, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  91%|█████████ | 810/890 [2:50:01<17:10, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7571, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7571, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7593, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8697, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8697, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  91%|█████████ | 811/890 [2:50:14<17:00, 12.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5236, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7775e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5236, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5245, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8267, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8267, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  91%|█████████ | 812/890 [2:50:26<16:44, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6607, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6165, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6165, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  91%|█████████▏| 813/890 [2:50:39<16:29, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1547, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1547, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1569, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5960, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5960, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  91%|█████████▏| 814/890 [2:50:52<16:10, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3217, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3217, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3235, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0633, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0633, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  92%|█████████▏| 815/890 [2:51:05<15:56, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3231, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3231, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3259, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  92%|█████████▏| 816/890 [2:51:18<15:48, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5229, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5229, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5239, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  92%|█████████▏| 817/890 [2:51:30<15:32, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6253, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6253, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6266, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  92%|█████████▏| 818/890 [2:51:43<15:18, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5700, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5700, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5719, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8305, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8305, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  92%|█████████▏| 819/890 [2:51:56<15:04, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7474, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7474, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7491, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0141e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  92%|█████████▏| 820/890 [2:52:09<15:01, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0243, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0243, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0259, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  92%|█████████▏| 821/890 [2:52:21<14:41, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6885, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9023, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3821e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9023, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  92%|█████████▏| 822/890 [2:52:34<14:25, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5365, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1188, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1188, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  92%|█████████▏| 823/890 [2:52:47<14:12, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5937, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5937, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5955, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1678, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1678, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  93%|█████████▎| 824/890 [2:52:59<13:59, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7200, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7200, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7212, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9081, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9533e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9081, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  93%|█████████▎| 825/890 [2:53:12<13:46, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7067, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  93%|█████████▎| 826/890 [2:53:25<13:32, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5187, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5187, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5203, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8095, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2034e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8095, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  93%|█████████▎| 827/890 [2:53:38<13:26, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5968, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5968, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5987, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.3580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  93%|█████████▎| 828/890 [2:53:52<13:31, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7756, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7756, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7782, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9023, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9023, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  93%|█████████▎| 829/890 [2:54:04<13:09, 12.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9819, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9819, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9835, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  93%|█████████▎| 830/890 [2:54:17<12:49, 12.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8997, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8406, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8406, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  93%|█████████▎| 831/890 [2:54:29<12:31, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6713, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6713, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6733, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0157, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0157, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  93%|█████████▎| 832/890 [2:54:42<12:21, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9110, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9110, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1608, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3959e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1608, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  94%|█████████▎| 833/890 [2:54:55<12:11, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.1364, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.1364, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.1393, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5231, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6371e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5231, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  94%|█████████▎| 834/890 [2:55:08<11:58, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.4703, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.4703, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.4720, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4984, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4984, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  94%|█████████▍| 835/890 [2:55:21<11:47, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8178, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8178, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8198, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4357, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4357, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  94%|█████████▍| 836/890 [2:55:34<11:35, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.3587, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3587, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3606, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2128, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2128, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  94%|█████████▍| 837/890 [2:55:47<11:21, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0411, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  94%|█████████▍| 838/890 [2:55:59<11:04, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1552, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1552, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1563, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6037, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6037, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  94%|█████████▍| 839/890 [2:56:12<10:49, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3007, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3007, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3033, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8814, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8814, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  94%|█████████▍| 840/890 [2:56:24<10:34, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7288, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7288, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7311, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2387, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2387, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  94%|█████████▍| 841/890 [2:56:37<10:23, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  95%|█████████▍| 842/890 [2:56:50<10:11, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5471, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5471, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5484, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  95%|█████████▍| 843/890 [2:57:03<10:00, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1676, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1676, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1693, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1622, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1622, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  95%|█████████▍| 844/890 [2:57:16<09:45, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5284, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5284, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5313, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5229e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  95%|█████████▍| 845/890 [2:57:29<09:39, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6422, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6422, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  95%|█████████▌| 846/890 [2:57:41<09:22, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5086, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5086, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0040, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5126, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9746, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9746, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  95%|█████████▌| 847/890 [2:57:54<09:04, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1352, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6705, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6705, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  95%|█████████▌| 848/890 [2:58:06<08:51, 12.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7164, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7164, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7185, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7220e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  95%|█████████▌| 849/890 [2:58:19<08:40, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5970, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0282, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3569e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0282, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  96%|█████████▌| 850/890 [2:58:32<08:31, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8064, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3914, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3914, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  96%|█████████▌| 851/890 [2:58:45<08:17, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6561, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9901, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9901, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  96%|█████████▌| 852/890 [2:58:58<08:05, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9711, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  96%|█████████▌| 853/890 [2:59:11<07:54, 12.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1057, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  96%|█████████▌| 854/890 [2:59:23<07:40, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8086, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8086, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8101, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  96%|█████████▌| 855/890 [2:59:36<07:25, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1776, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1776, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1793, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8652, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8652, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  96%|█████████▌| 856/890 [2:59:48<07:10, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4059, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8659, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8659, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  96%|█████████▋| 857/890 [3:00:01<06:55, 12.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8279, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7290, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7290, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  96%|█████████▋| 858/890 [3:00:13<06:41, 12.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4300, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4300, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4325, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4380, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4380, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  97%|█████████▋| 859/890 [3:00:26<06:30, 12.60s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3937, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3937, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3962, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  97%|█████████▋| 860/890 [3:00:39<06:20, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9951, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  97%|█████████▋| 861/890 [3:00:52<06:08, 12.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5754, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  97%|█████████▋| 862/890 [3:01:04<05:56, 12.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1398, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1398, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1440, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  97%|█████████▋| 863/890 [3:01:17<05:42, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1839, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0417, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0417, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  97%|█████████▋| 864/890 [3:01:29<05:28, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2199, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2199, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2209, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  97%|█████████▋| 865/890 [3:01:42<05:17, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.3935, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3935, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3966, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7122, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9825e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7122, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  97%|█████████▋| 866/890 [3:01:55<05:06, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7230, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9498, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9498, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  97%|█████████▋| 867/890 [3:02:08<04:53, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7933, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7933, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7947, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2670, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2670, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 868/890 [3:02:21<04:40, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2107, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2107, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1467, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1467, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 869/890 [3:02:33<04:26, 12.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5195, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5195, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5229, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 870/890 [3:02:46<04:12, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5129, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 871/890 [3:02:58<03:59, 12.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4627, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5179, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5179, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 872/890 [3:03:11<03:46, 12.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.9268, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9268, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.9296, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8463, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8463, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 873/890 [3:03:24<03:35, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8165, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8165, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8175, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 874/890 [3:03:36<03:22, 12.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8478, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8478, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8501, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 875/890 [3:03:49<03:11, 12.78s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0010, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7198, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7198, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  98%|█████████▊| 876/890 [3:04:02<02:58, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4924, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2048, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2048, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  99%|█████████▊| 877/890 [3:04:15<02:45, 12.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1883, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0132, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0132, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  99%|█████████▊| 878/890 [3:04:28<02:32, 12.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7216, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7216, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7255, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(7.5682, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(7.5682, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  99%|█████████▉| 879/890 [3:04:40<02:19, 12.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7097, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7097, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7116, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  99%|█████████▉| 880/890 [3:04:53<02:06, 12.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5459, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5459, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5490, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0912, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0912, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0032, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  99%|█████████▉| 881/890 [3:05:06<01:54, 12.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8385, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7134, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7134, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  99%|█████████▉| 882/890 [3:05:18<01:41, 12.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1242, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1242, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1280, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(6.0005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6373e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(6.0005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 7/8):  99%|█████████▉| 883/890 [3:05:31<01:29, 12.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6780, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6780, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6793, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3373, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3373, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  99%|█████████▉| 884/890 [3:05:45<01:18, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7261, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8):  99%|█████████▉| 885/890 [3:05:59<01:06, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5075, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5075, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5101, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9267, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9267, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8): 100%|█████████▉| 886/890 [3:06:11<00:52, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7703, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8): 100%|█████████▉| 887/890 [3:06:24<00:38, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6279, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6279, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6290, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9338, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9338, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8): 100%|█████████▉| 888/890 [3:06:36<00:25, 12.71s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1482, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1482, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1492, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6294, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6294, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8): 100%|█████████▉| 889/890 [3:06:48<00:12, 12.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6135, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6135, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6149, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 7/8): 100%|██████████| 890/890 [3:07:01<00:00, 12.61s/batch]

monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.


monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.

Validating:   1%|          | 1/126 [00:05<11:58,  5.75s/batch]

Validation dice loss per batch: 0.3910426199436188


Validating:   2%|▏         | 2/126 [00:06<05:27,  2.64s/batch]

Validation dice loss per batch: 0.5783846378326416


Validating:   2%|▏         | 3/126 [00:09<06:17,  3.07s/batch]

Validation dice loss per batch: 0.7786287665367126


Validating:   3%|▎         | 4/126 [00:10<04:09,  2.04s/batch]

Validation dice loss per batch: 0.49239999055862427


Validating:   4%|▍         | 5/126 [00:15<06:28,  3.21s/batch]

Validation dice loss per batch: 0.3084975481033325


Validating:   5%|▍         | 6/126 [00:16<04:33,  2.28s/batch]

Validation dice loss per batch: 0.32578879594802856


Validating:   6%|▌         | 7/126 [00:20<05:39,  2.85s/batch]

Validation dice loss per batch: 0.35060596466064453


Validating:   6%|▋         | 8/126 [00:20<04:07,  2.09s/batch]

Validation dice loss per batch: 0.2073134481906891


Validating:   7%|▋         | 9/126 [00:24<05:06,  2.62s/batch]

Validation dice loss per batch: 0.34175658226013184


Validating:   8%|▊         | 10/126 [00:24<03:46,  1.95s/batch]

Validation dice loss per batch: 0.5074227452278137


Validating:   9%|▊         | 11/126 [00:29<05:05,  2.66s/batch]

Validation dice loss per batch: 0.5903387665748596


Validating:  10%|▉         | 12/126 [00:29<03:47,  2.00s/batch]

Validation dice loss per batch: 0.2746943235397339


Validating:  10%|█         | 13/126 [00:33<04:53,  2.60s/batch]

Validation dice loss per batch: 0.5823252201080322


Validating:  11%|█         | 14/126 [00:33<03:39,  1.96s/batch]

Validation dice loss per batch: 0.3341965675354004


Validating:  12%|█▏        | 15/126 [00:40<06:04,  3.28s/batch]

Validation dice loss per batch: 0.4008169174194336


Validating:  13%|█▎        | 16/126 [00:40<04:28,  2.44s/batch]

Validation dice loss per batch: 0.22823432087898254


Validating:  13%|█▎        | 17/126 [00:44<05:07,  2.82s/batch]

Validation dice loss per batch: 0.42890769243240356


Validating:  14%|█▍        | 18/126 [00:44<03:48,  2.12s/batch]

Validation dice loss per batch: 0.5430681705474854


Validating:  15%|█▌        | 19/126 [00:48<04:41,  2.63s/batch]

Validation dice loss per batch: 0.21582862734794617


Validating:  16%|█▌        | 20/126 [00:49<03:30,  1.98s/batch]

Validation dice loss per batch: 0.2818804979324341


Validating:  17%|█▋        | 21/126 [00:53<04:28,  2.56s/batch]

Validation dice loss per batch: 0.5974713563919067


Validating:  17%|█▋        | 22/126 [00:53<03:21,  1.93s/batch]

Validation dice loss per batch: 0.2596006393432617


Validating:  18%|█▊        | 23/126 [00:57<04:22,  2.55s/batch]

Validation dice loss per batch: 1.2905060052871704


Validating:  19%|█▉        | 24/126 [00:58<03:16,  1.93s/batch]

Validation dice loss per batch: 0.8694416284561157


Validating:  20%|█▉        | 25/126 [01:03<05:00,  2.97s/batch]

Validation dice loss per batch: 1.2172856330871582


Validating:  21%|██        | 26/126 [01:04<03:42,  2.23s/batch]

Validation dice loss per batch: 0.43211960792541504


Validating:  21%|██▏       | 27/126 [01:08<04:33,  2.76s/batch]

Validation dice loss per batch: 0.4009482264518738


Validating:  22%|██▏       | 28/126 [01:08<03:24,  2.08s/batch]

Validation dice loss per batch: 0.5837931632995605


Validating:  23%|██▎       | 29/126 [01:12<04:18,  2.66s/batch]

Validation dice loss per batch: 0.2668806314468384


Validating:  24%|██▍       | 30/126 [01:13<03:13,  2.01s/batch]

Validation dice loss per batch: 0.4729764461517334


Validating:  25%|██▍       | 31/126 [01:17<04:34,  2.89s/batch]

Validation dice loss per batch: 0.35475823283195496


Validating:  25%|██▌       | 32/126 [01:18<03:24,  2.17s/batch]

Validation dice loss per batch: 0.36075910925865173


Validating:  26%|██▌       | 33/126 [01:23<04:29,  2.89s/batch]

Validation dice loss per batch: 0.21824975311756134


Validating:  27%|██▋       | 34/126 [01:23<03:20,  2.18s/batch]

Validation dice loss per batch: 0.40728962421417236


Validating:  28%|██▊       | 35/126 [01:27<04:01,  2.66s/batch]

Validation dice loss per batch: 0.6529422998428345


Validating:  29%|██▊       | 36/126 [01:27<03:01,  2.01s/batch]

Validation dice loss per batch: 0.5024410486221313


Validating:  29%|██▉       | 37/126 [01:32<04:12,  2.84s/batch]

Validation dice loss per batch: 0.381723016500473


Validating:  30%|███       | 38/126 [01:33<03:08,  2.14s/batch]

Validation dice loss per batch: 0.5006086826324463


Validating:  31%|███       | 39/126 [01:36<03:41,  2.55s/batch]

Validation dice loss per batch: 0.4683090150356293


Validating:  32%|███▏      | 40/126 [01:37<02:46,  1.93s/batch]

Validation dice loss per batch: 0.5951529741287231


Validating:  33%|███▎      | 41/126 [01:41<03:51,  2.72s/batch]

Validation dice loss per batch: 0.48123401403427124


Validating:  33%|███▎      | 42/126 [01:42<02:52,  2.06s/batch]

Validation dice loss per batch: 0.49378734827041626


Validating:  34%|███▍      | 43/126 [01:46<03:36,  2.60s/batch]

Validation dice loss per batch: 0.5072343349456787


Validating:  35%|███▍      | 44/126 [01:46<02:42,  1.98s/batch]

Validation dice loss per batch: 0.6736710667610168


Validating:  36%|███▌      | 45/126 [01:50<03:21,  2.49s/batch]

Validation dice loss per batch: 0.3847794830799103


Validating:  37%|███▋      | 46/126 [01:50<02:31,  1.90s/batch]

Validation dice loss per batch: 0.30832237005233765


Validating:  37%|███▋      | 47/126 [01:55<03:31,  2.67s/batch]

Validation dice loss per batch: 0.5537353754043579


Validating:  38%|███▊      | 48/126 [01:55<02:38,  2.03s/batch]

Validation dice loss per batch: 0.7125786542892456


Validating:  39%|███▉      | 49/126 [01:59<03:17,  2.56s/batch]

Validation dice loss per batch: 0.42757388949394226


Validating:  40%|███▉      | 50/126 [02:00<02:28,  1.95s/batch]

Validation dice loss per batch: 0.26906323432922363


Validating:  40%|████      | 51/126 [02:04<03:12,  2.56s/batch]

Validation dice loss per batch: 0.3450561761856079


Validating:  41%|████▏     | 52/126 [02:04<02:24,  1.95s/batch]

Validation dice loss per batch: 0.3899059593677521


Validating:  42%|████▏     | 53/126 [02:09<03:23,  2.79s/batch]

Validation dice loss per batch: 0.6686959862709045


Validating:  43%|████▎     | 54/126 [02:09<02:32,  2.11s/batch]

Validation dice loss per batch: 2.085291624069214


Validating:  44%|████▎     | 55/126 [02:13<03:08,  2.65s/batch]

Validation dice loss per batch: 0.5368117690086365


Validating:  44%|████▍     | 56/126 [02:14<02:21,  2.02s/batch]

Validation dice loss per batch: 0.48869308829307556


Validating:  45%|████▌     | 57/126 [02:18<02:59,  2.61s/batch]

Validation dice loss per batch: 0.4981868267059326


Validating:  46%|████▌     | 58/126 [02:18<02:14,  1.98s/batch]

Validation dice loss per batch: 0.7049853205680847


Validating:  47%|████▋     | 59/126 [02:22<02:55,  2.62s/batch]

Validation dice loss per batch: 0.23562006652355194


Validating:  48%|████▊     | 60/126 [02:23<02:11,  1.99s/batch]

Validation dice loss per batch: 0.1967199593782425


Validating:  48%|████▊     | 61/126 [02:27<02:56,  2.72s/batch]

Validation dice loss per batch: 0.3277958929538727


Validating:  49%|████▉     | 62/126 [02:28<02:11,  2.06s/batch]

Validation dice loss per batch: 0.5941933989524841


Validating:  50%|█████     | 63/126 [02:32<02:46,  2.65s/batch]

Validation dice loss per batch: 0.35207366943359375


Validating:  51%|█████     | 64/126 [02:32<02:04,  2.01s/batch]

Validation dice loss per batch: 1.2003235816955566


Validating:  52%|█████▏    | 65/126 [02:38<03:10,  3.12s/batch]

Validation dice loss per batch: 1.3436975479125977


Validating:  52%|█████▏    | 66/126 [02:39<02:20,  2.35s/batch]

Validation dice loss per batch: 0.7471052408218384


Validating:  53%|█████▎    | 67/126 [02:42<02:42,  2.76s/batch]

Validation dice loss per batch: 0.3316921293735504


Validating:  54%|█████▍    | 68/126 [02:43<02:01,  2.09s/batch]

Validation dice loss per batch: 0.2904890775680542


Validating:  55%|█████▍    | 69/126 [02:49<03:03,  3.21s/batch]

Validation dice loss per batch: 1.2936487197875977


Validating:  56%|█████▌    | 70/126 [02:49<02:14,  2.41s/batch]

Validation dice loss per batch: 0.580933690071106


Validating:  56%|█████▋    | 71/126 [02:53<02:34,  2.81s/batch]

Validation dice loss per batch: 0.5848211050033569


Validating:  57%|█████▋    | 72/126 [02:54<01:54,  2.13s/batch]

Validation dice loss per batch: 0.34323611855506897


Validating:  58%|█████▊    | 73/126 [02:59<02:40,  3.03s/batch]

Validation dice loss per batch: 0.2354501634836197


Validating:  59%|█████▊    | 74/126 [02:59<01:58,  2.28s/batch]

Validation dice loss per batch: 0.3622516095638275


Validating:  60%|█████▉    | 75/126 [03:03<02:23,  2.81s/batch]

Validation dice loss per batch: 0.14129012823104858


Validating:  60%|██████    | 76/126 [03:04<01:46,  2.13s/batch]

Validation dice loss per batch: 0.2800547182559967


Validating:  61%|██████    | 77/126 [03:09<02:30,  3.07s/batch]

Validation dice loss per batch: 0.24935853481292725


Validating:  62%|██████▏   | 78/126 [03:10<01:50,  2.31s/batch]

Validation dice loss per batch: 0.23510442674160004


Validating:  63%|██████▎   | 79/126 [03:14<02:13,  2.85s/batch]

Validation dice loss per batch: 0.22490938007831573


Validating:  63%|██████▎   | 80/126 [03:14<01:39,  2.16s/batch]

Validation dice loss per batch: 0.164676234126091


Validating:  64%|██████▍   | 81/126 [03:20<02:27,  3.28s/batch]

Validation dice loss per batch: 2.758333206176758


Validating:  65%|██████▌   | 82/126 [03:21<01:48,  2.46s/batch]

Validation dice loss per batch: 0.2824656367301941


Validating:  66%|██████▌   | 83/126 [03:25<02:04,  2.89s/batch]

Validation dice loss per batch: 0.45658478140830994


Validating:  67%|██████▋   | 84/126 [03:25<01:32,  2.19s/batch]

Validation dice loss per batch: 0.2512749433517456


Validating:  67%|██████▋   | 85/126 [03:31<02:11,  3.20s/batch]

Validation dice loss per batch: 0.2611660361289978


Validating:  68%|██████▊   | 86/126 [03:31<01:36,  2.40s/batch]

Validation dice loss per batch: 0.20070987939834595


Validating:  69%|██████▉   | 87/126 [03:35<01:51,  2.86s/batch]

Validation dice loss per batch: 0.19515281915664673


Validating:  70%|██████▉   | 88/126 [03:36<01:22,  2.17s/batch]

Validation dice loss per batch: 0.5860594511032104


Validating:  71%|███████   | 89/126 [03:40<01:42,  2.76s/batch]

Validation dice loss per batch: 0.2585269510746002


Validating:  71%|███████▏  | 90/126 [03:40<01:15,  2.10s/batch]

Validation dice loss per batch: 0.5075043439865112


Validating:  72%|███████▏  | 91/126 [03:44<01:30,  2.58s/batch]

Validation dice loss per batch: 0.668418288230896


Validating:  73%|███████▎  | 92/126 [03:45<01:06,  1.97s/batch]

Validation dice loss per batch: 0.26884543895721436


Validating:  74%|███████▍  | 93/126 [03:50<01:42,  3.10s/batch]

Validation dice loss per batch: 0.7146730422973633


Validating:  75%|███████▍  | 94/126 [03:51<01:14,  2.33s/batch]

Validation dice loss per batch: 0.42319348454475403


Validating:  75%|███████▌  | 95/126 [03:55<01:24,  2.71s/batch]

Validation dice loss per batch: 0.2081284523010254


Validating:  76%|███████▌  | 96/126 [03:55<01:01,  2.06s/batch]

Validation dice loss per batch: 0.3353832960128784


Validating:  77%|███████▋  | 97/126 [04:00<01:22,  2.84s/batch]

Validation dice loss per batch: 0.4475903809070587


Validating:  78%|███████▊  | 98/126 [04:00<01:00,  2.15s/batch]

Validation dice loss per batch: 0.49665307998657227


Validating:  79%|███████▊  | 99/126 [04:05<01:14,  2.77s/batch]

Validation dice loss per batch: 0.24120375514030457


Validating:  79%|███████▉  | 100/126 [04:05<00:54,  2.10s/batch]

Validation dice loss per batch: 0.17588438093662262


Validating:  80%|████████  | 101/126 [04:10<01:15,  3.02s/batch]

Validation dice loss per batch: 0.2749498784542084


Validating:  81%|████████  | 102/126 [04:11<00:54,  2.28s/batch]

Validation dice loss per batch: 0.3862225413322449


Validating:  82%|████████▏ | 103/126 [04:15<01:04,  2.80s/batch]

Validation dice loss per batch: 0.2187294214963913


Validating:  83%|████████▎ | 104/126 [04:15<00:46,  2.12s/batch]

Validation dice loss per batch: 0.6163294911384583


Validating:  83%|████████▎ | 105/126 [04:20<00:59,  2.85s/batch]

Validation dice loss per batch: 0.7572258114814758


Validating:  84%|████████▍ | 106/126 [04:20<00:43,  2.16s/batch]

Validation dice loss per batch: 0.1642308384180069


Validating:  85%|████████▍ | 107/126 [04:24<00:51,  2.69s/batch]

Validation dice loss per batch: 0.31171703338623047


Validating:  86%|████████▌ | 108/126 [04:25<00:36,  2.05s/batch]

Validation dice loss per batch: 0.5864807963371277


Validating:  87%|████████▋ | 109/126 [04:30<00:50,  2.96s/batch]

Validation dice loss per batch: 0.28402021527290344


Validating:  87%|████████▋ | 110/126 [04:31<00:35,  2.23s/batch]

Validation dice loss per batch: 0.2600232660770416


Validating:  88%|████████▊ | 111/126 [04:34<00:40,  2.73s/batch]

Validation dice loss per batch: 0.1706497073173523


Validating:  89%|████████▉ | 112/126 [04:35<00:29,  2.08s/batch]

Validation dice loss per batch: 0.6285674571990967


Validating:  90%|████████▉ | 113/126 [04:40<00:37,  2.90s/batch]

Validation dice loss per batch: 0.8518513441085815


Validating:  90%|█████████ | 114/126 [04:40<00:26,  2.20s/batch]

Validation dice loss per batch: 0.8599367141723633


Validating:  91%|█████████▏| 115/126 [04:44<00:28,  2.62s/batch]

Validation dice loss per batch: 1.4370211362838745


Validating:  92%|█████████▏| 116/126 [04:45<00:19,  2.00s/batch]

Validation dice loss per batch: 0.7901432514190674


Validating:  93%|█████████▎| 117/126 [04:50<00:27,  3.08s/batch]

Validation dice loss per batch: 0.884609580039978


Validating:  94%|█████████▎| 118/126 [04:51<00:18,  2.32s/batch]

Validation dice loss per batch: 0.9202094078063965


Validating:  94%|█████████▍| 119/126 [04:54<00:18,  2.67s/batch]

Validation dice loss per batch: 0.7208831906318665


Validating:  95%|█████████▌| 120/126 [04:55<00:12,  2.03s/batch]

Validation dice loss per batch: 0.3228347897529602


Validating:  96%|█████████▌| 121/126 [04:59<00:13,  2.67s/batch]

Validation dice loss per batch: 0.5208419561386108


Validating:  97%|█████████▋| 122/126 [04:59<00:08,  2.04s/batch]

Validation dice loss per batch: 0.46781644225120544


Validating:  98%|█████████▊| 123/126 [05:03<00:07,  2.64s/batch]

Validation dice loss per batch: 0.8906987905502319


Validating:  98%|█████████▊| 124/126 [05:04<00:04,  2.01s/batch]

Validation dice loss per batch: 0.5023322105407715


Validating:  99%|█████████▉| 125/126 [05:09<00:02,  2.79s/batch]

Validation dice loss per batch: 0.4788186252117157


                                                                

Validation dice loss per batch: 0.2823736369609833
------Final validation dice loss after epoch 7: 0.5092674493789673-------
Learning rate after epoch 7: 0.001




Model saved after epoch 7



monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.


monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.



Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9635, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   0%|          | 1/890 [00:26<6:36:10, 26.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5951, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5951, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5965, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   0%|          | 2/890 [00:40<4:41:22, 19.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7349, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0543e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7349, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7357, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9626, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9626, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   0%|          | 3/890 [00:53<4:02:44, 16.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2809, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8509e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2809, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2819, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0943, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0943, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   0%|          | 4/890 [01:07<3:45:50, 15.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6725, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5447, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5447, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   1%|          | 5/890 [01:20<3:36:12, 14.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9530, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1854, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6178e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1854, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   1%|          | 6/890 [01:33<3:25:11, 13.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6465, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2013, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2013, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   1%|          | 7/890 [01:46<3:21:20, 13.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5616, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5616, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5628, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   1%|          | 8/890 [01:59<3:17:29, 13.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5810, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5810, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5826, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0617, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0617, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   1%|          | 9/890 [02:12<3:14:42, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9529, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9529, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9547, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6329, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6329, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   1%|          | 10/890 [02:25<3:15:28, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8658, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9586, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5896e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9586, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   1%|          | 11/890 [02:38<3:12:44, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7849, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7849, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7872, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.3091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   1%|▏         | 12/890 [02:51<3:10:01, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8593e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2506, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0438, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0438, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   1%|▏         | 13/890 [03:03<3:07:52, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6539, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6539, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6558, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   2%|▏         | 14/890 [03:16<3:08:51, 12.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6576, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1431, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1431, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   2%|▏         | 15/890 [03:29<3:08:04, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4577, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7666, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5128e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7666, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   2%|▏         | 16/890 [03:43<3:10:26, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6491, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9729, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9814e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9729, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   2%|▏         | 17/890 [03:56<3:11:17, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4618, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0807e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   2%|▏         | 18/890 [04:09<3:09:31, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7909, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7909, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7920, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5327, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5327, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   2%|▏         | 19/890 [04:22<3:08:35, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6461, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   2%|▏         | 20/890 [04:35<3:10:00, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2028, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3390, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7119e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3390, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   2%|▏         | 21/890 [04:48<3:11:29, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3698, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3698, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3712, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4749, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7523e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4749, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   2%|▏         | 22/890 [05:02<3:14:37, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6617, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6617, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6628, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5376, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5376, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   3%|▎         | 23/890 [05:16<3:14:29, 13.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8076, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8076, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8091, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   3%|▎         | 24/890 [05:29<3:13:13, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5000, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   3%|▎         | 25/890 [05:42<3:10:12, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7721, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7108, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7108, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   3%|▎         | 26/890 [05:55<3:08:39, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6173, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4274, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8616e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4274, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   3%|▎         | 27/890 [06:08<3:07:30, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8386, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8233e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8386, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8395, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   3%|▎         | 28/890 [06:21<3:07:25, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4345e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7450, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7563, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8057e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7563, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):   3%|▎         | 29/890 [06:34<3:07:35, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6379, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6379, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6391, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7975, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7975, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   3%|▎         | 30/890 [06:47<3:06:10, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7277, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8716, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8716, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   3%|▎         | 31/890 [06:59<3:05:09, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.8586, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8586, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8611, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5976, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5976, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   4%|▎         | 32/890 [07:13<3:05:41, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8946e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5618, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1163, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1163, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   4%|▎         | 33/890 [07:25<3:03:59, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7834, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7834, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7861, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9552, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5873e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9552, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   4%|▍         | 34/890 [07:39<3:05:50, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1233e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2372, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.5112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   4%|▍         | 35/890 [07:52<3:06:57, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6622, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2541, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2541, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   4%|▍         | 36/890 [08:05<3:08:03, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4067, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4067, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4082, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0549, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0549, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   4%|▍         | 37/890 [08:19<3:08:23, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6579, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.5830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1007e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   4%|▍         | 38/890 [08:31<3:05:56, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8300, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8300, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8316, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   4%|▍         | 39/890 [08:44<3:04:34, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8833, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   4%|▍         | 40/890 [08:58<3:07:41, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2228, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2228, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2243, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8667, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8667, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   5%|▍         | 41/890 [09:11<3:05:04, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0316, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3833e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0316, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0326, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6528, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6528, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   5%|▍         | 42/890 [09:24<3:05:16, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8817, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8817, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8834, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7853, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7853, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   5%|▍         | 43/890 [09:37<3:05:58, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9302, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9302, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9321, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7616, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7616, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   5%|▍         | 44/890 [09:50<3:05:35, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6544, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6159, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6159, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   5%|▌         | 45/890 [10:04<3:08:56, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8618, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8618, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8644, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   5%|▌         | 46/890 [10:17<3:05:15, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3071, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6324e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3071, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3079, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   5%|▌         | 47/890 [10:30<3:03:17, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6705, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6705, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6730, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   5%|▌         | 48/890 [10:43<3:04:54, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5522, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5522, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5534, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▌         | 49/890 [10:56<3:03:01, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6244, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6244, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6254, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6453, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6453, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▌         | 50/890 [11:08<3:00:10, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5749, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5749, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5765, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▌         | 51/890 [11:22<3:02:38, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5313, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5313, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5342, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▌         | 52/890 [11:35<3:03:36, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5952, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6321, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6321, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▌         | 53/890 [11:48<3:02:21, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8591, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7113, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7113, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▌         | 54/890 [12:01<2:59:43, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9492, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9492, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9509, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7823, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7823, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▌         | 55/890 [12:13<2:59:28, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4464, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▋         | 56/890 [12:26<2:59:26, 12.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2334, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6517, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6517, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   6%|▋         | 57/890 [12:39<2:58:04, 12.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6054, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3706, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3706, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   7%|▋         | 58/890 [12:52<2:58:46, 12.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8862, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   7%|▋         | 59/890 [13:05<2:59:11, 12.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5457, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5457, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5484, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   7%|▋         | 60/890 [13:18<2:57:56, 12.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8059, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8059, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8076, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7326, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7326, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   7%|▋         | 61/890 [13:30<2:56:51, 12.80s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7024, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1327e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7024, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7032, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   7%|▋         | 62/890 [13:43<2:56:49, 12.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8624, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.7498, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.7498, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   7%|▋         | 63/890 [13:56<2:56:20, 12.79s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5975, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7274, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7274, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   7%|▋         | 64/890 [14:09<2:56:48, 12.84s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   7%|▋         | 65/890 [14:22<2:57:09, 12.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5115, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5115, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5136, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7361, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7361, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   7%|▋         | 66/890 [14:35<2:59:14, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7425, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7425, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7443, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   8%|▊         | 67/890 [14:49<2:59:11, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6589, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6589, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6608, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.5433, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5433, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   8%|▊         | 68/890 [15:01<2:57:34, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9868, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2440, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2440, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   8%|▊         | 69/890 [15:14<2:57:41, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5654, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7146, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7146, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   8%|▊         | 70/890 [15:27<2:57:23, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6087, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6087, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6108, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   8%|▊         | 71/890 [15:40<2:56:27, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5244, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0612e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5244, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5253, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5117, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5117, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   8%|▊         | 72/890 [15:53<2:56:10, 12.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8321, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8321, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8351, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   8%|▊         | 73/890 [16:06<2:56:21, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3614e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8841, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7330, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.2968e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7330, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):   8%|▊         | 74/890 [16:20<2:58:48, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9411e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7010, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6708e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):   8%|▊         | 75/890 [16:34<3:02:10, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5365, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5365, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5393, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9081, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0214e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9081, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   9%|▊         | 76/890 [16:46<2:59:17, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1471, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1471, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1497, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8650, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8650, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   9%|▊         | 77/890 [17:00<3:00:38, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9791, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9791, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9807, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   9%|▉         | 78/890 [17:13<2:58:00, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3769, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3769, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.3791, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1783, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3220e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1783, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   9%|▉         | 79/890 [17:25<2:55:39, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0039, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6941, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7669, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7669, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   9%|▉         | 80/890 [17:39<2:57:17, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4148, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4148, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4174, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0408, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9410e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0408, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):   9%|▉         | 81/890 [17:52<2:56:52, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7440, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   9%|▉         | 82/890 [18:05<2:54:31, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5884, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5884, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5911, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   9%|▉         | 83/890 [18:17<2:54:17, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0042, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4881, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0519, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0519, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):   9%|▉         | 84/890 [18:30<2:53:57, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0607, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0607, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0619, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5628, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5628, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  10%|▉         | 85/890 [18:43<2:53:02, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8702, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8702, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8715, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6501, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6501, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  10%|▉         | 86/890 [18:56<2:52:19, 12.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7764, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7764, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7784, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6618, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6618, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  10%|▉         | 87/890 [19:09<2:54:26, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9462, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9462, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  10%|▉         | 88/890 [19:22<2:53:10, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1579, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1579, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1603, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6502, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4630e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6502, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  10%|█         | 89/890 [19:35<2:52:52, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2002e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9527, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  10%|█         | 90/890 [19:48<2:53:36, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5773, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5773, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5784, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  10%|█         | 91/890 [20:02<2:56:00, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8020, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8020, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8039, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0717, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0717, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  10%|█         | 92/890 [20:15<2:54:02, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8128, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7179e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  10%|█         | 93/890 [20:28<2:52:48, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4904, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3341, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9229e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3341, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  11%|█         | 94/890 [20:41<2:52:24, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1089e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2009, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7079, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7079, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  11%|█         | 95/890 [20:54<2:52:09, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6807, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7756, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7756, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  11%|█         | 96/890 [21:07<2:53:11, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8514, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8686e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8514, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8524, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4533e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  11%|█         | 97/890 [21:20<2:52:03, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2399, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2399, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2412, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9234, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9234, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  11%|█         | 98/890 [21:34<2:55:09, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6107, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6107, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0043, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6150, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5986, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5986, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  11%|█         | 99/890 [21:47<2:55:16, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2084, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2084, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2114, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  11%|█         | 100/890 [22:00<2:52:48, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3903, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0549, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0549, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  11%|█▏        | 101/890 [22:13<2:52:16, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3769, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8958, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8958, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  11%|█▏        | 102/890 [22:26<2:51:33, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6514, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1798, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1798, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  12%|█▏        | 103/890 [22:38<2:49:35, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5285, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.1945e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  12%|█▏        | 104/890 [22:52<2:51:02, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8074, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  12%|█▏        | 105/890 [23:05<2:51:47, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9319, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9319, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9331, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4905e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  12%|█▏        | 106/890 [23:18<2:50:32, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7136, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7136, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7164, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8374, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8374, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  12%|█▏        | 107/890 [23:32<2:53:15, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9619, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9619, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9636, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  12%|█▏        | 108/890 [23:45<2:52:49, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.3013, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4615, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4615, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  12%|█▏        | 109/890 [23:58<2:53:18, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0019, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0019, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0038, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2484, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2484, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  12%|█▏        | 110/890 [24:11<2:52:32, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0540, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0540, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0037, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0577, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0875, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0875, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  12%|█▏        | 111/890 [24:26<2:57:43, 13.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8004, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3168, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3168, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  13%|█▎        | 112/890 [24:39<2:55:24, 13.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6495, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6495, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6529, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7395, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9170e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7395, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  13%|█▎        | 113/890 [24:52<2:53:11, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5411, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2478e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5411, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5420, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.4006, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4006, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  13%|█▎        | 114/890 [25:07<2:57:24, 13.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8112, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  13%|█▎        | 115/890 [25:20<2:56:12, 13.64s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5782, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5782, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5808, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  13%|█▎        | 116/890 [25:34<2:56:41, 13.70s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6467, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7530, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7530, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  13%|█▎        | 117/890 [25:51<3:10:06, 14.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5705, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5705, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5720, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5996, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9091e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5996, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  13%|█▎        | 118/890 [26:04<3:02:42, 14.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8934, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2721, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5922e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2721, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  13%|█▎        | 119/890 [26:17<2:57:57, 13.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9309e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1946, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  13%|█▎        | 120/890 [26:31<2:58:07, 13.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4818, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  14%|█▎        | 121/890 [26:44<2:54:10, 13.59s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7672, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7672, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7684, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4108, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4108, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  14%|█▎        | 122/890 [26:57<2:52:00, 13.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0740, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0740, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0761, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4448, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8705e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4448, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  14%|█▍        | 123/890 [27:11<2:53:31, 13.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0916, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5506e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0916, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0925, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5384, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0315e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5384, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  14%|█▍        | 124/890 [27:24<2:52:40, 13.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8699, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.3340, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3340, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  14%|█▍        | 125/890 [27:39<2:55:10, 13.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6759, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2008e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6759, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6769, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9026, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9026, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  14%|█▍        | 126/890 [27:52<2:52:53, 13.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9331, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4726, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4726, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  14%|█▍        | 127/890 [28:05<2:50:15, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9023, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  14%|█▍        | 128/890 [28:18<2:49:25, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8071, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2606e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8071, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8081, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4869, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4869, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  14%|█▍        | 129/890 [28:33<2:53:40, 13.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6463, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4116, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4116, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  15%|█▍        | 130/890 [28:45<2:50:23, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6793, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8730, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1824e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8730, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  15%|█▍        | 131/890 [28:59<2:49:07, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5098, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5098, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9320, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9320, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  15%|█▍        | 132/890 [29:12<2:49:44, 13.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5646, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5646, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5659, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0266, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0266, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  15%|█▍        | 133/890 [29:25<2:48:13, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1143, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  15%|█▌        | 134/890 [29:39<2:47:38, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7929, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7929, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7950, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1686, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1686, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  15%|█▌        | 135/890 [29:51<2:45:26, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9514, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4576, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8679e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4576, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  15%|█▌        | 136/890 [30:04<2:44:53, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4541, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5388, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5388, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  15%|█▌        | 137/890 [30:18<2:45:46, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5813, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5813, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5824, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1894, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1894, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▌        | 138/890 [30:31<2:45:52, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7862, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7862, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7895, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1661, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1661, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▌        | 139/890 [30:44<2:44:18, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0033, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9915, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6998, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6998, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▌        | 140/890 [30:58<2:46:11, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5654, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5654, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5688, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▌        | 141/890 [31:11<2:45:08, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7640, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7640, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7665, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▌        | 142/890 [31:23<2:42:58, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7144, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7144, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7163, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0886, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0886, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▌        | 143/890 [31:36<2:41:26, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9194, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9194, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9208, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7139, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7139, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▌        | 144/890 [31:49<2:41:13, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0524, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0524, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0548, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▋        | 145/890 [32:02<2:41:43, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2330, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2330, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2347, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  16%|█▋        | 146/890 [32:16<2:42:24, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5912, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6472, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6472, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  17%|█▋        | 147/890 [32:29<2:41:49, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7231, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0461, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0461, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  17%|█▋        | 148/890 [32:42<2:41:13, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7809, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7809, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7835, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  17%|█▋        | 149/890 [32:55<2:42:33, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2054, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2078, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2078, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  17%|█▋        | 150/890 [33:08<2:41:01, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0416, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0416, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0429, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  17%|█▋        | 151/890 [33:21<2:39:52, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6718, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6718, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6740, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7485e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  17%|█▋        | 152/890 [33:33<2:39:11, 12.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7534, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7534, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7547, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9240, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9240, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  17%|█▋        | 153/890 [33:46<2:38:03, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6575, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6575, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3982, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1869e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3982, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  17%|█▋        | 154/890 [33:59<2:38:51, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2156, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  17%|█▋        | 155/890 [34:13<2:40:44, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8027, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8379, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8379, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  18%|█▊        | 156/890 [34:26<2:42:09, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7745, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  18%|█▊        | 157/890 [34:39<2:41:12, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7535, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7535, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7570, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1810, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5165e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1810, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  18%|█▊        | 158/890 [34:52<2:39:05, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6430, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6430, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6441, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6484, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6484, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  18%|█▊        | 159/890 [35:05<2:37:57, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8869, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4228, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4228, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  18%|█▊        | 160/890 [35:18<2:37:39, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6116, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6116, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6144, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  18%|█▊        | 161/890 [35:31<2:37:56, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7749, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7749, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7760, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5400, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5400, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  18%|█▊        | 162/890 [35:44<2:38:11, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6817, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6817, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6831, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8315, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9203e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8315, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  18%|█▊        | 163/890 [35:57<2:38:39, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6494, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8292, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4126e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8292, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  18%|█▊        | 164/890 [36:10<2:38:00, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8087, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8340, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8340, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  19%|█▊        | 165/890 [36:23<2:37:00, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8176e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9103, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  19%|█▊        | 166/890 [36:36<2:35:42, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7031, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7031, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7051, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5613e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  19%|█▉        | 167/890 [36:49<2:36:33, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0240, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0240, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0265, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7192, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7192, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  19%|█▉        | 168/890 [37:02<2:37:08, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5008, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  19%|█▉        | 169/890 [37:15<2:36:56, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1152, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0238, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0238, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  19%|█▉        | 170/890 [37:29<2:37:41, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1963, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1963, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1974, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9584, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9584, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  19%|█▉        | 171/890 [37:42<2:36:56, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  19%|█▉        | 172/890 [37:55<2:37:14, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8679, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8679, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8691, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.0415, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0415, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  19%|█▉        | 173/890 [38:08<2:36:14, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2628, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2628, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2657, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0538, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0538, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  20%|█▉        | 174/890 [38:21<2:35:38, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8550, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4246e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8550, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8556, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4803, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4803, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  20%|█▉        | 175/890 [38:34<2:35:33, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4315, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4315, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4338, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7844, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7844, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  20%|█▉        | 176/890 [38:47<2:35:36, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1326, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1326, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1339, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7207, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8022e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7207, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  20%|█▉        | 177/890 [39:00<2:35:05, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8886, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8132, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1874e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8132, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  20%|██        | 178/890 [39:14<2:37:22, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8075, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8075, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8085, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  20%|██        | 179/890 [39:28<2:39:34, 13.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2142, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2142, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2157, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8194, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3893e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8194, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  20%|██        | 180/890 [39:41<2:37:09, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6949, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6949, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6961, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2742, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.8636e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2742, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  20%|██        | 181/890 [39:54<2:36:33, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2647, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  20%|██        | 182/890 [40:07<2:36:17, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6447, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3905e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6447, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6457, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  21%|██        | 183/890 [40:20<2:34:39, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4759, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4759, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2783, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2783, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  21%|██        | 184/890 [40:33<2:35:12, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7202, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  21%|██        | 185/890 [40:46<2:35:36, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9864, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9864, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9879, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9742, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9742, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  21%|██        | 186/890 [41:01<2:38:20, 13.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8630, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8630, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8643, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1794, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1794, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  21%|██        | 187/890 [41:14<2:36:55, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7749, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7224, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7224, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  21%|██        | 188/890 [41:27<2:35:59, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7735, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7735, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7748, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  21%|██        | 189/890 [41:41<2:38:42, 13.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6277, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6277, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6296, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9397, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4885e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9397, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  21%|██▏       | 190/890 [41:54<2:36:03, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8781, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8781, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8793, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.7820e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  21%|██▏       | 191/890 [42:07<2:34:09, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5566, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5566, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0035, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5601, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1723, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1723, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  22%|██▏       | 192/890 [42:20<2:34:32, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4110, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4110, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4145, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7411, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7411, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  22%|██▏       | 193/890 [42:33<2:33:37, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7201, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7166e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7201, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7210, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0595, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0692e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0595, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  22%|██▏       | 194/890 [42:47<2:34:38, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.0408, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.0408, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.0434, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9042, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9082e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9042, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  22%|██▏       | 195/890 [43:01<2:35:28, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7654, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7654, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7665, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5616e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  22%|██▏       | 196/890 [43:14<2:34:51, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5456e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0117, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7146, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1202e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7146, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  22%|██▏       | 197/890 [43:27<2:34:02, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6455, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6455, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6473, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  22%|██▏       | 198/890 [43:40<2:32:16, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8830, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7100e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  22%|██▏       | 199/890 [43:53<2:31:23, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7709, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7709, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7743, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7620, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7620, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  22%|██▏       | 200/890 [44:06<2:30:40, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.9631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.9645, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  23%|██▎       | 201/890 [44:20<2:34:24, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6551, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9520, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9520, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  23%|██▎       | 202/890 [44:34<2:34:22, 13.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7863, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.6087, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1521e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.6087, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  23%|██▎       | 203/890 [44:47<2:32:47, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0194, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0194, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0212, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  23%|██▎       | 204/890 [45:00<2:32:05, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7098, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7098, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7127, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6543, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6543, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  23%|██▎       | 205/890 [45:13<2:30:42, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7334, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7334, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7349, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.5106e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  23%|██▎       | 206/890 [45:26<2:29:12, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2009, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  23%|██▎       | 207/890 [45:39<2:29:30, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6587, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  23%|██▎       | 208/890 [45:52<2:29:41, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5658, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5367, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5979e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5367, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  23%|██▎       | 209/890 [46:05<2:28:45, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2697, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2697, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2713, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0089e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  24%|██▎       | 210/890 [46:19<2:29:39, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5876, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5876, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5890, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  24%|██▎       | 211/890 [46:32<2:28:54, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2704e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7606, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0730, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8828e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0730, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  24%|██▍       | 212/890 [46:46<2:30:47, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4499, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4499, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4523, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9898, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.6842e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9898, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  24%|██▍       | 213/890 [46:59<2:31:23, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0879, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0879, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0892, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2671, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3395e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2671, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  24%|██▍       | 214/890 [47:12<2:29:53, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0165, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4338e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0165, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0174, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6904, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6904, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  24%|██▍       | 215/890 [47:25<2:28:06, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5843, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5843, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5857, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.0975, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0975, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  24%|██▍       | 216/890 [47:38<2:26:28, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8355, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8355, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8369, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0350, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0350, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  24%|██▍       | 217/890 [47:51<2:25:48, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2912, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9465, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9465, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  24%|██▍       | 218/890 [48:04<2:26:43, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9884, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9884, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9908, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  25%|██▍       | 219/890 [48:17<2:27:45, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1926, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1926, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1940, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7744, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.1731e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7744, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  25%|██▍       | 220/890 [48:31<2:29:35, 13.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7439e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5507, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0726, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9726e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0726, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  25%|██▍       | 221/890 [48:44<2:28:40, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9484, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9484, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9494, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1898, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4038e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1898, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  25%|██▍       | 222/890 [48:58<2:27:24, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1970, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  25%|██▌       | 223/890 [49:11<2:26:31, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4500, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4500, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4518, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4408, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4408, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  25%|██▌       | 224/890 [49:24<2:26:13, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7234, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7234, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7256, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.5320, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5320, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  25%|██▌       | 225/890 [49:37<2:26:36, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6099, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5896e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6099, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6107, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8554e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  25%|██▌       | 226/890 [49:50<2:25:45, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0856, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8819, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8819, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  26%|██▌       | 227/890 [50:04<2:26:40, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8234, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8234, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8244, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9958, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9958, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  26%|██▌       | 228/890 [50:16<2:24:55, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5663, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6896e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  26%|██▌       | 229/890 [50:30<2:25:23, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0420, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5710e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0420, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0430, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3189e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  26%|██▌       | 230/890 [50:43<2:24:06, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5059, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4274, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7611e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4274, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  26%|██▌       | 231/890 [50:55<2:22:30, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6987, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6987, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7001, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  26%|██▌       | 232/890 [51:09<2:24:12, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7536, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  26%|██▌       | 233/890 [51:26<2:35:31, 14.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5462, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5462, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5477, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  26%|██▋       | 234/890 [51:39<2:31:32, 13.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1095e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9944, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8559, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8559, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  26%|██▋       | 235/890 [51:52<2:29:08, 13.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4021, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3645e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4021, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4028, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6347, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6347, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  27%|██▋       | 236/890 [52:05<2:26:39, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9867, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9867, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9887, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5303e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  27%|██▋       | 237/890 [52:18<2:24:41, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5243, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5243, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5254, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  27%|██▋       | 238/890 [52:31<2:23:36, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8372e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2264, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2285, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2285, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  27%|██▋       | 239/890 [52:43<2:21:57, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6045, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  27%|██▋       | 240/890 [52:57<2:23:15, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7857, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7857, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7868, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2013, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2013, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  27%|██▋       | 241/890 [53:10<2:22:26, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4878, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1192e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4878, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4885, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6219, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5519e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6219, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  27%|██▋       | 242/890 [53:23<2:22:45, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6556, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6556, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6582, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1766, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1766, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  27%|██▋       | 243/890 [53:37<2:22:30, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8903, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  27%|██▋       | 244/890 [53:50<2:22:03, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.6444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.6466, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9383e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  28%|██▊       | 245/890 [54:04<2:25:23, 13.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5601, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5601, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5619, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6317, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3187e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6317, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  28%|██▊       | 246/890 [54:17<2:23:18, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5016, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5016, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0034, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7389, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7389, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  28%|██▊       | 247/890 [54:30<2:21:27, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0534, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9249e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0534, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0541, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5254e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  28%|██▊       | 248/890 [54:43<2:22:01, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4867e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7176, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6198, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1993e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6198, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  28%|██▊       | 249/890 [54:56<2:21:17, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0503, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1692, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7546e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1692, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  28%|██▊       | 250/890 [55:09<2:20:29, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6671, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6671, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6688, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7837, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7837, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  28%|██▊       | 251/890 [55:22<2:19:45, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8469e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6791, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  28%|██▊       | 252/890 [55:35<2:18:48, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9018, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  28%|██▊       | 253/890 [55:48<2:18:39, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5534, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5534, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0038, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5571, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9112e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  29%|██▊       | 254/890 [56:01<2:17:57, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9071, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9071, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9081, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0297, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5998e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0297, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  29%|██▊       | 255/890 [56:14<2:17:04, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6272, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6272, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6291, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4318, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2405e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4318, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  29%|██▉       | 256/890 [56:27<2:16:12, 12.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8272, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2165e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  29%|██▉       | 257/890 [56:40<2:16:55, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7060, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3976e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7060, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7068, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6605, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6605, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  29%|██▉       | 258/890 [56:53<2:16:10, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6061, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6607e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6061, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7874, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7874, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  29%|██▉       | 259/890 [57:06<2:15:39, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7065, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7065, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7091, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  29%|██▉       | 260/890 [57:19<2:16:34, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9971, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9223e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9971, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9977, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5636, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5636, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  29%|██▉       | 261/890 [57:32<2:15:51, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2551, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8385e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2551, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2560, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5383, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5383, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  29%|██▉       | 262/890 [57:45<2:15:21, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1816, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1816, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1829, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  30%|██▉       | 263/890 [57:59<2:18:49, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9935e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8043, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  30%|██▉       | 264/890 [58:12<2:17:21, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8454, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8454, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8468, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6699, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6699, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  30%|██▉       | 265/890 [58:25<2:16:52, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5545, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8160e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5545, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5553, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2495e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  30%|██▉       | 266/890 [58:38<2:17:00, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7465e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9858, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9858, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  30%|███       | 267/890 [58:51<2:17:39, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0753, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8514, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9825e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8514, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  30%|███       | 268/890 [59:05<2:17:28, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0818, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1897e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0818, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0824, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9447, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9447, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  30%|███       | 269/890 [59:18<2:16:27, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5988, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5988, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6001, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5210, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5210, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  30%|███       | 270/890 [59:31<2:15:57, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0820, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0820, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0838, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9299, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9299, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  30%|███       | 271/890 [59:44<2:14:25, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4328e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4111, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  31%|███       | 272/890 [59:57<2:14:06, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8347, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7564e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8347, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8354, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4362e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  31%|███       | 273/890 [1:00:10<2:14:11, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1388, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1388, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1404, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  31%|███       | 274/890 [1:00:23<2:14:16, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5454e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2917, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  31%|███       | 275/890 [1:00:36<2:15:24, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2136e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8402, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3711, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6304e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3711, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  31%|███       | 276/890 [1:00:49<2:14:13, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0108, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0108, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0120, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4609, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4609, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  31%|███       | 277/890 [1:01:02<2:13:56, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1497e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9219, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6853, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.1329e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6853, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  31%|███       | 278/890 [1:01:16<2:14:16, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0530, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7745, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1492e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7745, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  31%|███▏      | 279/890 [1:01:28<2:12:26, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5678, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5678, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5694, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8104, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4312e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8104, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  31%|███▏      | 280/890 [1:01:41<2:11:47, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9185, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  32%|███▏      | 281/890 [1:01:55<2:13:49, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5116e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6055, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7289, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5925e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7289, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  32%|███▏      | 282/890 [1:02:08<2:12:24, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0028, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0028, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0040, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9267, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3084e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9267, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  32%|███▏      | 283/890 [1:02:21<2:12:32, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8318, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8318, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8335, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7339, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7339, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  32%|███▏      | 284/890 [1:02:34<2:12:22, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6934e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3214, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7747, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1890e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7747, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  32%|███▏      | 285/890 [1:02:47<2:10:58, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7871, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9247e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7871, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7880, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3083, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3083, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  32%|███▏      | 286/890 [1:03:00<2:10:57, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4229, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4229, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4240, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  32%|███▏      | 287/890 [1:03:13<2:12:27, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4554, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4554, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4569, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8585e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  32%|███▏      | 288/890 [1:03:27<2:13:01, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6295, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6139e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6295, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6303, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  32%|███▏      | 289/890 [1:03:40<2:11:29, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9083, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9083, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9099, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6243, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6243, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  33%|███▎      | 290/890 [1:03:53<2:11:20, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9186, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9186, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9196, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7013, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7013, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  33%|███▎      | 291/890 [1:04:05<2:09:44, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6202, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6202, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6218, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8783, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8064e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8783, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  33%|███▎      | 292/890 [1:04:19<2:12:49, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9387, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0004, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9387, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0036, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9423, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4282, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4322e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4282, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  33%|███▎      | 293/890 [1:04:32<2:11:13, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2141e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2071, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  33%|███▎      | 294/890 [1:04:49<2:22:13, 14.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9822, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7669, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7669, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  33%|███▎      | 295/890 [1:05:04<2:21:58, 14.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2277, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9173e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  33%|███▎      | 296/890 [1:05:17<2:18:28, 13.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7851, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9151e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7851, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2636e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  33%|███▎      | 297/890 [1:05:30<2:16:04, 13.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9713, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9713, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9730, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2615, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.2130e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2615, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  33%|███▎      | 298/890 [1:05:44<2:16:14, 13.81s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4790, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4790, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4810, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3203e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  34%|███▎      | 299/890 [1:05:57<2:13:44, 13.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1823, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3334e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  34%|███▎      | 300/890 [1:06:10<2:11:36, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6328, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6744e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6328, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6338, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  34%|███▍      | 301/890 [1:06:24<2:12:27, 13.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6826e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0482, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  34%|███▍      | 302/890 [1:06:37<2:10:23, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9073, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9073, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9087, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.4622, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4622, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  34%|███▍      | 303/890 [1:06:50<2:09:06, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8679, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8679, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8702, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6699e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  34%|███▍      | 304/890 [1:07:03<2:10:57, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7083e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7675, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0495, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2449e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0495, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  34%|███▍      | 305/890 [1:07:16<2:08:55, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6593, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6593, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6621, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7785, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7785, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  34%|███▍      | 306/890 [1:07:29<2:07:54, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2454e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7245, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  34%|███▍      | 307/890 [1:07:42<2:07:41, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5061, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9331, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.9553e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9331, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  35%|███▍      | 308/890 [1:07:55<2:07:20, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7673, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7673, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7692, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8657, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8657, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  35%|███▍      | 309/890 [1:08:08<2:06:21, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7476, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7241, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7241, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  35%|███▍      | 310/890 [1:08:21<2:05:55, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7535, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7535, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7546, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5945, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7471e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5945, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  35%|███▍      | 311/890 [1:08:34<2:05:01, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4966e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8992, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1026, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4848e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1026, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  35%|███▌      | 312/890 [1:08:47<2:04:20, 12.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4627, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4169e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4627, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4635, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  35%|███▌      | 313/890 [1:09:00<2:05:19, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8035, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6709, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7471e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6709, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  35%|███▌      | 314/890 [1:09:13<2:05:04, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0591, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0591, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0611, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3334, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3334, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  35%|███▌      | 315/890 [1:09:26<2:05:21, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2315, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0124e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2315, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2324, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2431, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2431, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  36%|███▌      | 316/890 [1:09:40<2:07:37, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7218, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7218, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7236, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  36%|███▌      | 317/890 [1:09:53<2:06:45, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7284, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7284, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7301, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.3169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.3169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  36%|███▌      | 318/890 [1:10:07<2:05:57, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7308, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0630, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0630, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  36%|███▌      | 319/890 [1:10:20<2:05:06, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7926e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0989, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1797, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1797, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  36%|███▌      | 320/890 [1:10:32<2:04:08, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1656, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8176, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8176, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  36%|███▌      | 321/890 [1:10:45<2:03:35, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6113, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6113, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6292, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6292, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  36%|███▌      | 322/890 [1:11:00<2:07:40, 13.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5991, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7408e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5991, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5997, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0167, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1252e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0167, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  36%|███▋      | 323/890 [1:11:13<2:07:22, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5378, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1898e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5378, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5385, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  36%|███▋      | 324/890 [1:11:26<2:05:23, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8303, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8303, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8316, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  37%|███▋      | 325/890 [1:11:40<2:05:27, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0030, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6132, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4743, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4743, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  37%|███▋      | 326/890 [1:11:53<2:05:04, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5490, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5490, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5513, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0885, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0885, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  37%|███▋      | 327/890 [1:12:06<2:05:15, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8197, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8197, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8209, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4535, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9627e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4535, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  37%|███▋      | 328/890 [1:12:21<2:08:43, 13.74s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0686, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2796e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0686, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0692, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9902, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9902, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  37%|███▋      | 329/890 [1:12:34<2:07:14, 13.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6304, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.6954e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6304, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6311, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2098, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2098, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  37%|███▋      | 330/890 [1:12:48<2:08:08, 13.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7854, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2285e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7854, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7860, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8179e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  37%|███▋      | 331/890 [1:13:02<2:09:01, 13.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9749, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9749, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9763, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7809, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6956e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7809, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  37%|███▋      | 332/890 [1:13:16<2:08:33, 13.82s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6547, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3046, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8439e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3046, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  37%|███▋      | 333/890 [1:13:30<2:07:50, 13.77s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7401, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7401, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7418, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0730, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0730, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  38%|███▊      | 334/890 [1:13:43<2:07:05, 13.72s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8375e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4553, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9461e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  38%|███▊      | 335/890 [1:13:57<2:05:39, 13.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6662, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6662, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6679, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  38%|███▊      | 336/890 [1:14:10<2:04:35, 13.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7351, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7351, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7370, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  38%|███▊      | 337/890 [1:14:24<2:04:57, 13.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.8611e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0996, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4053, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4053, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  38%|███▊      | 338/890 [1:14:37<2:04:29, 13.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7030, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7030, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7042, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  38%|███▊      | 339/890 [1:14:51<2:05:21, 13.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5795, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5795, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5819, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  38%|███▊      | 340/890 [1:15:04<2:04:14, 13.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6706, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6706, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6722, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3427, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3427, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  38%|███▊      | 341/890 [1:15:17<2:02:30, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0473, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0473, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0493, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3077, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3077, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  38%|███▊      | 342/890 [1:15:31<2:01:56, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5581, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5581, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5601, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1037, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1037, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  39%|███▊      | 343/890 [1:15:44<2:01:27, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6710, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6710, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6731, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.8121e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  39%|███▊      | 344/890 [1:15:58<2:04:17, 13.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7461e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7421, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0958, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0958, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  39%|███▉      | 345/890 [1:16:12<2:03:36, 13.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7576, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5405e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7576, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7584, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8322, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8322, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  39%|███▉      | 346/890 [1:16:26<2:03:44, 13.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6271, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6271, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6286, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1244e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  39%|███▉      | 347/890 [1:16:39<2:02:19, 13.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0059, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0059, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0080, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6192, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4013e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6192, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  39%|███▉      | 348/890 [1:16:52<2:00:54, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0551, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0551, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0572, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9022, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1258e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9022, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  39%|███▉      | 349/890 [1:17:05<2:01:00, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7407, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5038e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7407, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7413, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7719, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7719, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  39%|███▉      | 350/890 [1:17:19<2:00:28, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1067, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1067, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1078, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1996e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  39%|███▉      | 351/890 [1:17:32<2:00:02, 13.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2071, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2071, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2082, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7407e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  40%|███▉      | 352/890 [1:17:45<1:59:33, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3610e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8931, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8456, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8046e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8456, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  40%|███▉      | 353/890 [1:17:59<1:59:44, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8649, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8649, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8660, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2042, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2042, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  40%|███▉      | 354/890 [1:18:13<2:00:57, 13.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3494, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3494, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3506, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  40%|███▉      | 355/890 [1:18:26<1:59:56, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8797, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4714e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8797, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8806, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5354, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5354, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  40%|████      | 356/890 [1:18:39<1:59:03, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2294e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4730, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7095e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  40%|████      | 357/890 [1:18:53<1:59:05, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6886, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6886, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6903, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1372, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5774e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1372, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  40%|████      | 358/890 [1:19:06<1:58:21, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.2105, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.2105, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.2116, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6327, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5688e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6327, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  40%|████      | 359/890 [1:19:19<1:57:22, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0013, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5221e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0013, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0019, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6103, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6103, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  40%|████      | 360/890 [1:19:32<1:56:22, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.8208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.8234, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6905, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8150e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6905, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  41%|████      | 361/890 [1:19:45<1:56:12, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0106e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6521, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0020, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4065e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0020, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  41%|████      | 362/890 [1:19:59<1:57:07, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6179, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6179, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6190, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5269, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5269, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  41%|████      | 363/890 [1:20:12<1:56:18, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  41%|████      | 364/890 [1:20:25<1:55:37, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2151e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7965, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5555, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5555, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  41%|████      | 365/890 [1:20:38<1:55:58, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8163, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9627e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8163, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8173, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5132, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5132, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  41%|████      | 366/890 [1:20:51<1:55:04, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6259e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9726, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1560e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  41%|████      | 367/890 [1:21:04<1:54:56, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9763, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7056, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8226e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7056, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  41%|████▏     | 368/890 [1:21:18<1:55:18, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9112e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6245, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8482, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8482, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  41%|████▏     | 369/890 [1:21:31<1:55:16, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6029, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0056e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6029, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6038, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7117, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7117, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  42%|████▏     | 370/890 [1:21:45<1:56:25, 13.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7850, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8732e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7850, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7121, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7121, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  42%|████▏     | 371/890 [1:21:59<1:56:37, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5749, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5836e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5749, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5755, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5549, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5549, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  42%|████▏     | 372/890 [1:22:12<1:56:06, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0713, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9408e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0713, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0720, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7667, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7667, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  42%|████▏     | 373/890 [1:22:25<1:55:18, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9247, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9247, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9258, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3880, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3880, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  42%|████▏     | 374/890 [1:22:38<1:53:49, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2875, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2875, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2897, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0297, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8581e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0297, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  42%|████▏     | 375/890 [1:22:51<1:53:31, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5559, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5559, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5571, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5760, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5760, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  42%|████▏     | 376/890 [1:23:04<1:52:50, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9902, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  42%|████▏     | 377/890 [1:23:17<1:52:20, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6700, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4634e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  42%|████▏     | 378/890 [1:23:31<1:52:18, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6164, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6164, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6184, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3671e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  43%|████▎     | 379/890 [1:23:44<1:51:46, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4729, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4729, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4748, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6538e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  43%|████▎     | 380/890 [1:23:56<1:50:58, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8459, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5533e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8459, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8468, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6919, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5207e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6919, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  43%|████▎     | 381/890 [1:24:10<1:51:15, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7637, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7637, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7647, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  43%|████▎     | 382/890 [1:24:23<1:51:29, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6349, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7672, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1861e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7672, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  43%|████▎     | 383/890 [1:24:36<1:50:34, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5288, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5288, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5302, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7706, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6545e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7706, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  43%|████▎     | 384/890 [1:24:49<1:50:38, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8723, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.2525e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8723, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8728, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9962, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.1728e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9962, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  43%|████▎     | 385/890 [1:25:02<1:50:27, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3884e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6093, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7169, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7169, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  43%|████▎     | 386/890 [1:25:16<1:50:38, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6895, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6895, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6907, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7117, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6765e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7117, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  43%|████▎     | 387/890 [1:25:29<1:50:59, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7530, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7530, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7547, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7045e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  44%|████▎     | 388/890 [1:25:42<1:50:36, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4033, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4033, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9519, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9519, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  44%|████▎     | 389/890 [1:25:55<1:50:32, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1963, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7988e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1963, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1969, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8725, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4338e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8725, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  44%|████▍     | 390/890 [1:26:08<1:49:51, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6570, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5549e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6570, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6576, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7879e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  44%|████▍     | 391/890 [1:26:21<1:49:04, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7598e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9313, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6541, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6541, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  44%|████▍     | 392/890 [1:26:35<1:49:19, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8838, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0424e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8838, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8843, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5421, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5421, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  44%|████▍     | 393/890 [1:26:48<1:48:50, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7237, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7237, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7250, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  44%|████▍     | 394/890 [1:27:02<1:50:31, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6394, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6394, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6414, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.9067e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  44%|████▍     | 395/890 [1:27:15<1:49:51, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5293, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5661e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5293, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5302, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9155, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9155, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  44%|████▍     | 396/890 [1:27:28<1:48:43, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2984, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4158e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2984, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2992, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6955, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6955, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  45%|████▍     | 397/890 [1:27:41<1:47:54, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6682, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8009e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6682, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6688, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1953, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1084e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1953, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  45%|████▍     | 398/890 [1:27:54<1:47:26, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.2766, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4179e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.2766, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.2772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9515, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7193e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9515, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  45%|████▍     | 399/890 [1:28:07<1:46:51, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1848, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3909e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1848, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1856, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0267, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7344e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0267, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  45%|████▍     | 400/890 [1:28:20<1:46:38, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6128, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6128, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6571, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6571, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  45%|████▌     | 401/890 [1:28:33<1:45:59, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9147, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9147, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9161, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  45%|████▌     | 402/890 [1:28:46<1:46:01, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5590, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5590, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5601, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9429, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9429, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  45%|████▌     | 403/890 [1:29:00<1:48:04, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7623, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7623, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7636, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1216e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  45%|████▌     | 404/890 [1:29:13<1:48:09, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3523, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3523, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3536, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.6750, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6750, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  46%|████▌     | 405/890 [1:29:27<1:48:36, 13.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0126, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.8476e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0126, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0132, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  46%|████▌     | 406/890 [1:29:40<1:47:51, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6826, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6826, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6842, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.3344, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3966e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.3344, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  46%|████▌     | 407/890 [1:29:53<1:46:52, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6909, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6909, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6933, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7812, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7812, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  46%|████▌     | 408/890 [1:30:06<1:46:21, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6519, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6819e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6519, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6529, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6042, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6042, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  46%|████▌     | 409/890 [1:30:20<1:46:20, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8745, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8745, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8759, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.5608e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  46%|████▌     | 410/890 [1:30:33<1:46:34, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8304, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5887e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8304, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8312, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7426, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7426, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  46%|████▌     | 411/890 [1:30:46<1:46:15, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1673, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1673, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1689, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7740, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7740, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  46%|████▋     | 412/890 [1:31:00<1:46:14, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2759, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4617, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3178e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4617, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  46%|████▋     | 413/890 [1:31:13<1:46:54, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5114, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7039e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5114, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9537, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2128e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9537, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  47%|████▋     | 414/890 [1:31:27<1:46:17, 13.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7977, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7977, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8000, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  47%|████▋     | 415/890 [1:31:40<1:45:40, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5704, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5704, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0029, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5733, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9240, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9240, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  47%|████▋     | 416/890 [1:31:53<1:45:41, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2153, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5485, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5485, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  47%|████▋     | 417/890 [1:32:07<1:46:53, 13.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5526, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5526, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5544, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1010, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7025e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1010, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  47%|████▋     | 418/890 [1:32:21<1:46:39, 13.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8904, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8904, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8926, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4832, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4832, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  47%|████▋     | 419/890 [1:32:34<1:46:20, 13.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1285, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7594e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1285, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1290, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.2503, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2503, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  47%|████▋     | 420/890 [1:32:48<1:46:09, 13.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9268, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9268, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9278, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0870, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0870, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  47%|████▋     | 421/890 [1:33:03<1:48:57, 13.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5952, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5952, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5963, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9744, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9744, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  47%|████▋     | 422/890 [1:33:16<1:46:34, 13.66s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7122, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6667e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7122, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7131, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0257, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3039e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0257, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  48%|████▊     | 423/890 [1:33:29<1:45:20, 13.53s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9878e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5339, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8359e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  48%|████▊     | 424/890 [1:33:43<1:45:13, 13.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4905, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4905, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4924, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3602e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  48%|████▊     | 425/890 [1:33:56<1:45:15, 13.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7886, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7886, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7915, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3282, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.4485e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3282, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  48%|████▊     | 426/890 [1:34:11<1:46:25, 13.76s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6458, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2894e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6458, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6464, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  48%|████▊     | 427/890 [1:34:24<1:45:39, 13.69s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6503, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3330e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6503, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6510, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4069, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4069, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  48%|████▊     | 428/890 [1:34:38<1:45:43, 13.73s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1719, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1719, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1733, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2516, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.8604e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2516, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  48%|████▊     | 429/890 [1:34:51<1:44:37, 13.62s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9420, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0041e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9420, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9426, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4452, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4452, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  48%|████▊     | 430/890 [1:35:04<1:42:41, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9125, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6835e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9125, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9135, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.2612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  48%|████▊     | 431/890 [1:35:17<1:41:54, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6131, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0707e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6131, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6138, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  49%|████▊     | 432/890 [1:35:31<1:42:39, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4815, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4815, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4826, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9874, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0185e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9874, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  49%|████▊     | 433/890 [1:35:44<1:41:43, 13.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1130, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9410e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1130, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1138, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6150, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7167e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6150, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  49%|████▉     | 434/890 [1:35:58<1:42:25, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0951, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0951, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0962, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9835e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  49%|████▉     | 435/890 [1:36:12<1:42:40, 13.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6176, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9524e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6176, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6184, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1463, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2269e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1463, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  49%|████▉     | 436/890 [1:36:25<1:41:30, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1229, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1229, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1254, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(5.0909, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.6862e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.0909, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  49%|████▉     | 437/890 [1:36:38<1:41:35, 13.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4489, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7846e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4489, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4498, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6710, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6710, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  49%|████▉     | 438/890 [1:36:51<1:40:30, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.0940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.0940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.0958, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  49%|████▉     | 439/890 [1:37:04<1:39:26, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2699, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  49%|████▉     | 440/890 [1:37:18<1:39:35, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0559, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8732e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0559, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0567, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9483, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9483, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  50%|████▉     | 441/890 [1:37:31<1:39:44, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1140, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9335, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9335, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  50%|████▉     | 442/890 [1:37:45<1:40:00, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8168, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8168, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8183, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  50%|████▉     | 443/890 [1:37:58<1:39:56, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7422, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7422, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7439, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3274, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3274, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  50%|████▉     | 444/890 [1:38:11<1:39:21, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7423, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7988e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7423, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7429, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0134, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0134, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  50%|█████     | 445/890 [1:38:25<1:39:23, 13.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.1174, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.1174, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.1184, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8843, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8843, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  50%|█████     | 446/890 [1:38:38<1:38:11, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6100, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6100, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4524, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4524, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  50%|█████     | 447/890 [1:38:51<1:37:38, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4203, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9766e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4203, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4210, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8486, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8486, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  50%|█████     | 448/890 [1:39:05<1:38:10, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7225, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7225, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7240, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8987e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  50%|█████     | 449/890 [1:39:18<1:37:05, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7057, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7057, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7079, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9108e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  51%|█████     | 450/890 [1:39:31<1:37:37, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4309, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4309, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4332, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7868, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7868, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  51%|█████     | 451/890 [1:39:44<1:37:08, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0929, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  51%|█████     | 452/890 [1:39:58<1:37:05, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0330, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0330, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0343, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5381, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5508e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5381, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  51%|█████     | 453/890 [1:40:10<1:35:47, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9006e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6166, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0096e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  51%|█████     | 454/890 [1:40:23<1:35:16, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7373e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5552, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.8332, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8332, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  51%|█████     | 455/890 [1:40:36<1:34:40, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9206, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4291e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9206, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9215, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1046, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1046, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  51%|█████     | 456/890 [1:40:50<1:35:08, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0398, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0398, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0421, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  51%|█████▏    | 457/890 [1:41:03<1:34:56, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0787, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0787, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0806, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5027, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0919e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5027, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  51%|█████▏    | 458/890 [1:41:16<1:33:37, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7491, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7491, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7510, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8627, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9032e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8627, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  52%|█████▏    | 459/890 [1:41:29<1:33:32, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4940, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6224e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4940, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4946, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0384, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0933e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0384, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  52%|█████▏    | 460/890 [1:41:42<1:33:32, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6302, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6302, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6313, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8744e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  52%|█████▏    | 461/890 [1:41:55<1:33:56, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4395, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  52%|█████▏    | 462/890 [1:42:08<1:33:17, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5254, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0539e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5254, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5263, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8315, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1717e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8315, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  52%|█████▏    | 463/890 [1:42:21<1:32:39, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6998, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6998, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7009, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9436, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4744e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9436, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  52%|█████▏    | 464/890 [1:42:34<1:32:47, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0844, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0844, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0861, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6087, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6087, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  52%|█████▏    | 465/890 [1:42:47<1:33:02, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5158, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5533e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5158, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5165, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  52%|█████▏    | 466/890 [1:43:01<1:33:35, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4758, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4758, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4771, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4982, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4982, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  52%|█████▏    | 467/890 [1:43:14<1:33:37, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7167, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7167, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7181, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5566, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7052e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5566, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  53%|█████▎    | 468/890 [1:43:27<1:33:05, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8206, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8215e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8206, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8216, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6380, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6380, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  53%|█████▎    | 469/890 [1:43:41<1:33:38, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5697, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9051e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5697, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5703, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8192, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6692e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8192, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  53%|█████▎    | 470/890 [1:43:54<1:32:23, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6582, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6582, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6600, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  53%|█████▎    | 471/890 [1:44:07<1:31:21, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5297, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6748, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3636e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6748, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  53%|█████▎    | 472/890 [1:44:20<1:31:54, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5693, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5693, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5706, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.0081e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  53%|█████▎    | 473/890 [1:44:33<1:31:16, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5282, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5282, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5299, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  53%|█████▎    | 474/890 [1:44:46<1:31:19, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3103, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7973, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7973, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  53%|█████▎    | 475/890 [1:45:00<1:32:02, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4502, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4502, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4515, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9271, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9271, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  53%|█████▎    | 476/890 [1:45:13<1:31:16, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5472, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9367e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5472, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5479, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9177, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8870e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9177, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  54%|█████▎    | 477/890 [1:45:26<1:30:53, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4017, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9579e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4017, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4023, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6694, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6694, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  54%|█████▎    | 478/890 [1:45:39<1:30:28, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6963, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0026, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  54%|█████▍    | 479/890 [1:45:52<1:30:10, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7110, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7110, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6713e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  54%|█████▍    | 480/890 [1:46:05<1:29:29, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4478, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  54%|█████▍    | 481/890 [1:46:18<1:28:35, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5099, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.2679e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5099, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5104, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.4225e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  54%|█████▍    | 482/890 [1:46:31<1:27:57, 12.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5317, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3301e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5317, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5322, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7001, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0388e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7001, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  54%|█████▍    | 483/890 [1:46:44<1:28:37, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9920e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8592, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8568, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8568, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  54%|█████▍    | 484/890 [1:46:58<1:28:57, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8329, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1635e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8329, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8338, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8086, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5268e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8086, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  54%|█████▍    | 485/890 [1:47:11<1:28:54, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5888, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5888, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5899, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7287, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0382e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7287, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  55%|█████▍    | 486/890 [1:47:24<1:27:50, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1849, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3276, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.3132e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3276, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  55%|█████▍    | 487/890 [1:47:36<1:26:59, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9282, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8420e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9282, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9291, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5558, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5558, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  55%|█████▍    | 488/890 [1:47:49<1:26:57, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1599, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6757, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6757, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  55%|█████▍    | 489/890 [1:48:02<1:26:38, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1471, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6554e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1471, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0218, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1489e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0218, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  55%|█████▌    | 490/890 [1:48:16<1:27:08, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1993, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1993, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2017, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7004, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7004, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  55%|█████▌    | 491/890 [1:48:29<1:27:29, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6868, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6868, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6878, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0872, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6228e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0872, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  55%|█████▌    | 492/890 [1:48:43<1:28:25, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8367, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8367, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8379, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  55%|█████▌    | 493/890 [1:48:56<1:27:11, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7303, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7303, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7321, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7797, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9928e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7797, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  56%|█████▌    | 494/890 [1:49:08<1:25:40, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0340, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0340, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0351, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  56%|█████▌    | 495/890 [1:49:22<1:26:24, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6543, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6543, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6555, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4143e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  56%|█████▌    | 496/890 [1:49:35<1:26:12, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8253, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8253, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8268, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3930, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.5369e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3930, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  56%|█████▌    | 497/890 [1:49:47<1:25:15, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0669, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7800e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0669, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0676, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9930, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2633e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9930, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  56%|█████▌    | 498/890 [1:50:01<1:25:25, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7110, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9894, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6439e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9894, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  56%|█████▌    | 499/890 [1:50:14<1:25:27, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6390, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6390, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6403, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7660, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.2844e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7660, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  56%|█████▌    | 500/890 [1:50:27<1:24:32, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5618, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6640e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5618, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5626, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9799, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3969e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9799, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  56%|█████▋    | 501/890 [1:50:39<1:23:36, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7035, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7035, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7046, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2908, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4576e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2908, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  56%|█████▋    | 502/890 [1:50:52<1:23:57, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5948, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5239e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5948, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5955, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  57%|█████▋    | 503/890 [1:51:05<1:23:15, 12.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6755, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5826e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  57%|█████▋    | 504/890 [1:51:18<1:23:20, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6105, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6105, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6126, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8883, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8883, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  57%|█████▋    | 505/890 [1:51:31<1:23:40, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7785e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6240, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8431, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8431, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  57%|█████▋    | 506/890 [1:51:44<1:22:59, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5749, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5749, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5761, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.9985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  57%|█████▋    | 507/890 [1:51:58<1:23:38, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5461, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6426, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0774e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6426, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  57%|█████▋    | 508/890 [1:52:11<1:23:59, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5474, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2806e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  57%|█████▋    | 509/890 [1:52:24<1:23:01, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5122, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5122, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5139, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7893, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9660e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7893, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  57%|█████▋    | 510/890 [1:52:37<1:22:21, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6681, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6681, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6695, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6090, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6090, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  57%|█████▋    | 511/890 [1:52:50<1:21:48, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8152, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8152, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8165, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7700, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.3437e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7700, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  58%|█████▊    | 512/890 [1:53:03<1:21:48, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4414, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4414, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4426, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9851, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9851, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  58%|█████▊    | 513/890 [1:53:15<1:21:15, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4275, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3197e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4275, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4282, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.8852e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  58%|█████▊    | 514/890 [1:53:29<1:21:38, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1864, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1864, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1875, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2057e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  58%|█████▊    | 515/890 [1:53:42<1:21:50, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0031, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9876, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1196, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.4874e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1196, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  58%|█████▊    | 516/890 [1:53:55<1:22:23, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0028, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8028, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4407, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6258e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4407, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  58%|█████▊    | 517/890 [1:54:08<1:21:41, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3573e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8596, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2656, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3302e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2656, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  58%|█████▊    | 518/890 [1:54:21<1:21:06, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9870, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2789, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9372e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2789, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  58%|█████▊    | 519/890 [1:54:34<1:20:13, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5668, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2919e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5668, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5676, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3774e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  58%|█████▊    | 520/890 [1:54:48<1:21:44, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2676, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2676, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2692, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0422, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0422, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  59%|█████▊    | 521/890 [1:55:01<1:20:41, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0703, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9840e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0703, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0708, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.4464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8847e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  59%|█████▊    | 522/890 [1:55:14<1:21:32, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5716, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5716, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5727, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  59%|█████▉    | 523/890 [1:55:28<1:21:15, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8774, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8774, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8785, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4285, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7665e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4285, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  59%|█████▉    | 524/890 [1:55:41<1:20:31, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4292, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4292, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4308, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9958, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9958, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  59%|█████▉    | 525/890 [1:55:54<1:19:42, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2520, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2520, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2544, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.8601e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  59%|█████▉    | 526/890 [1:56:07<1:19:24, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7191, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2207, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9340e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2207, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  59%|█████▉    | 527/890 [1:56:20<1:18:56, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5092, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5092, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5111, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3628, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3628, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  59%|█████▉    | 528/890 [1:56:33<1:18:35, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9939, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9939, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9951, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7569e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  59%|█████▉    | 529/890 [1:56:45<1:18:09, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0582, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0582, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0594, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4903, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4903, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  60%|█████▉    | 530/890 [1:56:58<1:17:58, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5874, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9887e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5874, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5884, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3046, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.2580e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3046, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  60%|█████▉    | 531/890 [1:57:12<1:18:14, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6476, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.1742e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6476, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7405e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  60%|█████▉    | 532/890 [1:57:24<1:17:24, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.1620, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.1620, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.1634, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.2249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  60%|█████▉    | 533/890 [1:57:37<1:16:57, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6217e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7253, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  60%|██████    | 534/890 [1:57:50<1:16:52, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.6004e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0069, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9130, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.8259e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9130, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  60%|██████    | 535/890 [1:58:03<1:16:39, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7032, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8839e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7032, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7040, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6846, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0201e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6846, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  60%|██████    | 536/890 [1:58:16<1:16:33, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9179, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9179, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9197, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2960, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7743e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2960, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  60%|██████    | 537/890 [1:58:29<1:16:11, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7833, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0716, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0716, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  60%|██████    | 538/890 [1:58:43<1:17:02, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0599e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7848, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7910, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.2271e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7910, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  61%|██████    | 539/890 [1:58:56<1:16:33, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7675, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6504, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6504, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  61%|██████    | 540/890 [1:59:09<1:16:28, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8868e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8275, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  61%|██████    | 541/890 [1:59:22<1:16:16, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7364, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9691e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7364, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7371, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2787, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2787, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  61%|██████    | 542/890 [1:59:35<1:15:28, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5578, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2937e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5578, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5585, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4011e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  61%|██████    | 543/890 [1:59:48<1:14:42, 12.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7331, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7331, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7347, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7323, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5133e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7323, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  61%|██████    | 544/890 [2:00:00<1:14:32, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0323, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9814e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0323, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0331, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  61%|██████    | 545/890 [2:00:13<1:14:00, 12.87s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5430, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5430, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5448, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  61%|██████▏   | 546/890 [2:00:27<1:14:46, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6583, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6583, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6596, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  61%|██████▏   | 547/890 [2:00:40<1:14:47, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4789e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8201, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9578, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9058e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9578, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  62%|██████▏   | 548/890 [2:00:53<1:14:37, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6975, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6975, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6990, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3095, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3095, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  62%|██████▏   | 549/890 [2:01:06<1:13:42, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5725, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5725, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5738, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8710, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8710, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  62%|██████▏   | 550/890 [2:01:18<1:13:15, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7006, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0358, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3982e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0358, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  62%|██████▏   | 551/890 [2:01:31<1:13:03, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1033, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5663, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8394e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5663, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  62%|██████▏   | 552/890 [2:01:44<1:12:43, 12.91s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7904, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7904, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7916, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5876e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  62%|██████▏   | 553/890 [2:01:57<1:12:46, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9022, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3652e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  62%|██████▏   | 554/890 [2:02:10<1:12:23, 12.93s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2751, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5696, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0317e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5696, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  62%|██████▏   | 555/890 [2:02:24<1:13:04, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4706, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2588e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4706, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4714, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6695, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4806e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6695, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  62%|██████▏   | 556/890 [2:02:36<1:12:24, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4172, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4172, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4183, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7868, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4849e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7868, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  63%|██████▎   | 557/890 [2:02:49<1:11:59, 12.97s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5709, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5709, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5724, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  63%|██████▎   | 558/890 [2:03:02<1:11:54, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3674, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3674, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3688, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7802, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7802, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  63%|██████▎   | 559/890 [2:03:15<1:11:24, 12.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5684, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5684, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5702, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7023, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.7991e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7023, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  63%|██████▎   | 560/890 [2:03:28<1:11:18, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6348, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6348, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6360, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0038, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8177e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0038, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  63%|██████▎   | 561/890 [2:03:41<1:10:44, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9409, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1284e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9409, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9415, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6202e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  63%|██████▎   | 562/890 [2:03:55<1:11:40, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7216, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7216, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7234, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6811, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3780e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6811, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  63%|██████▎   | 563/890 [2:04:08<1:11:40, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4579, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4579, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4593, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8411, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8411, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  63%|██████▎   | 564/890 [2:04:21<1:11:21, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5728, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9978e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5728, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5734, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9498e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  63%|██████▎   | 565/890 [2:04:34<1:11:28, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4141, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3809e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4141, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4147, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2731e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  64%|██████▎   | 566/890 [2:04:47<1:10:32, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2454e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2610, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6976, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6976, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  64%|██████▎   | 567/890 [2:05:00<1:09:44, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0913, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0913, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0938, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7043, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4393e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7043, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  64%|██████▍   | 568/890 [2:05:12<1:08:56, 12.85s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0619, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2023e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0619, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0626, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  64%|██████▍   | 569/890 [2:05:25<1:08:39, 12.83s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2238, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2535e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2238, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2247, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6381, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8816e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6381, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  64%|██████▍   | 570/890 [2:05:38<1:08:49, 12.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7293, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8879e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7293, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7302, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8434e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  64%|██████▍   | 571/890 [2:05:52<1:09:26, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8104, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0587, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0043e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0587, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  64%|██████▍   | 572/890 [2:06:04<1:08:52, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4275, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4275, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4289, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5108, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5108, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  64%|██████▍   | 573/890 [2:06:17<1:08:16, 12.92s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7375, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7375, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7391, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1475, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5834e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1475, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  64%|██████▍   | 574/890 [2:06:30<1:07:53, 12.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2113, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2113, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2131, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9519, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9519, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  65%|██████▍   | 575/890 [2:06:43<1:07:31, 12.86s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2532, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2532, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2548, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0684, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7027e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0684, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  65%|██████▍   | 576/890 [2:06:56<1:07:50, 12.96s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9879, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9879, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9898, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.4350e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  65%|██████▍   | 577/890 [2:07:09<1:07:31, 12.95s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6417, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6417, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6434, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0210, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0210, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  65%|██████▍   | 578/890 [2:07:23<1:08:33, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6377, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0507e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6377, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6385, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9477, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0151e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9477, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  65%|██████▌   | 579/890 [2:07:36<1:08:48, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6632, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7168e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6632, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6640, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8088, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8088, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  65%|██████▌   | 580/890 [2:07:49<1:07:54, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8981, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8047, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8047, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  65%|██████▌   | 581/890 [2:08:02<1:08:08, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3859, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3859, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3871, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2967, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6620e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2967, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  65%|██████▌   | 582/890 [2:08:15<1:07:32, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7450, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1538e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7450, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7456, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4721, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4721, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  66%|██████▌   | 583/890 [2:08:28<1:07:12, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7044, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7044, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7063, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7113, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0280e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7113, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  66%|██████▌   | 584/890 [2:08:41<1:06:45, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8797, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8797, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8808, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3970e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  66%|██████▌   | 585/890 [2:08:54<1:06:16, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2404, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8855, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8855, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  66%|██████▌   | 586/890 [2:09:08<1:06:40, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0062, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1168e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0062, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0072, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1200, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1659e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1200, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  66%|██████▌   | 587/890 [2:09:21<1:06:55, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7452, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7452, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7470, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0578e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  66%|██████▌   | 588/890 [2:09:35<1:06:48, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8704, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8704, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8719, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0488, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.2469e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0488, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  66%|██████▌   | 589/890 [2:09:48<1:06:03, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8467, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8467, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8479, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6170, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6170, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  66%|██████▋   | 590/890 [2:10:00<1:05:31, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9428, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9428, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9440, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8654, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.7175e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8654, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  66%|██████▋   | 591/890 [2:10:13<1:04:47, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0309, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0309, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0337, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2532, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2532, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  67%|██████▋   | 592/890 [2:10:26<1:04:50, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5544, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5544, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5555, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  67%|██████▋   | 593/890 [2:10:40<1:05:29, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.6050e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3256, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6560e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  67%|██████▋   | 594/890 [2:10:53<1:05:19, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6951, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6951, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6967, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1044e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  67%|██████▋   | 595/890 [2:11:06<1:04:33, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5194, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1567e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5194, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5201, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7175, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2145e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7175, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  67%|██████▋   | 596/890 [2:11:19<1:04:18, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4957, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6880e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4957, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4965, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5983, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5983, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  67%|██████▋   | 597/890 [2:11:33<1:04:22, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0089, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0089, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0100, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5562e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  67%|██████▋   | 598/890 [2:11:45<1:03:37, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9248e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6615, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2788, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3751e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2788, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  67%|██████▋   | 599/890 [2:11:59<1:03:55, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9243, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9243, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9255, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8610, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8610, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  67%|██████▋   | 600/890 [2:12:12<1:03:24, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8273, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8273, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.8288, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8911, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8189e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8911, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  68%|██████▊   | 601/890 [2:12:25<1:03:09, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9623, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8620e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9623, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9631, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7945, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8272e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7945, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  68%|██████▊   | 602/890 [2:12:39<1:04:02, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0631e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7738, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0625, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.7897e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0625, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  68%|██████▊   | 603/890 [2:12:52<1:03:50, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7010, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4242, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.5672e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4242, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  68%|██████▊   | 604/890 [2:13:06<1:03:41, 13.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3379, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3349e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3379, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3385, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7330, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7330, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  68%|██████▊   | 605/890 [2:13:19<1:03:26, 13.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1032, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1032, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1049, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5986, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4925e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5986, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  68%|██████▊   | 606/890 [2:13:32<1:02:50, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.8181, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7485e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.8181, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.8187, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  68%|██████▊   | 607/890 [2:13:45<1:02:34, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5173, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3223e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5173, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5179, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7725, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8946e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7725, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  68%|██████▊   | 608/890 [2:13:59<1:02:25, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5453, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5453, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5466, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.8839, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8839, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  68%|██████▊   | 609/890 [2:14:12<1:02:35, 13.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5809, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0898e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5809, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5817, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7692, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7692, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  69%|██████▊   | 610/890 [2:14:25<1:02:09, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5948, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.2692e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5948, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5952, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9886, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2488e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9886, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  69%|██████▊   | 611/890 [2:14:39<1:01:46, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6703, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5023e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6703, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6708, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9324, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9324, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  69%|██████▉   | 612/890 [2:14:52<1:01:20, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0398, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0398, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0027, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0425, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5409, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5409, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  69%|██████▉   | 613/890 [2:15:05<1:01:14, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6933, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3436e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6933, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6942, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9912, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.2268e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9912, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  69%|██████▉   | 614/890 [2:15:18<1:00:54, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9718, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9718, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9729, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8460, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8460, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  69%|██████▉   | 615/890 [2:15:31<1:00:27, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.4867, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4867, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4880, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8525, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.6293e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8525, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  69%|██████▉   | 616/890 [2:15:44<1:00:04, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6521, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6521, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6533, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5685e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  69%|██████▉   | 617/890 [2:15:57<59:32, 13.09s/batch]  

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2576, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6946e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  69%|██████▉   | 618/890 [2:16:11<1:00:04, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5746, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5746, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5757, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7953e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  70%|██████▉   | 619/890 [2:16:24<59:39, 13.21s/batch]  

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2197, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5664e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  70%|██████▉   | 620/890 [2:16:37<59:14, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1487, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1487, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1502, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1778, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7157e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1778, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  70%|██████▉   | 621/890 [2:16:51<59:45, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5007e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6694, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.6187e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  70%|██████▉   | 622/890 [2:17:04<59:11, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.8567, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.8567, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.8581, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8856, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8856, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  70%|███████   | 623/890 [2:17:17<58:48, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6793, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6793, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6807, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.4967, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2536e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4967, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  70%|███████   | 624/890 [2:17:30<58:46, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5813, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9017e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5813, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5819, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9598, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9598, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  70%|███████   | 625/890 [2:17:44<58:25, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.6107, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.6107, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.6121, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6647, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6909e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6647, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  70%|███████   | 626/890 [2:17:58<59:19, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1688, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1688, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1702, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6897, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6897, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  70%|███████   | 627/890 [2:18:11<59:04, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0278, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8744e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0278, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0287, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.6780e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  71%|███████   | 628/890 [2:18:25<59:09, 13.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4464, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4464, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4475, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1980, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.4920e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1980, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  71%|███████   | 629/890 [2:18:38<58:31, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1104, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1104, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1123, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2956, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2956, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  71%|███████   | 630/890 [2:18:51<57:31, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5391, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5391, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5405, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5115, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0208e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5115, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  71%|███████   | 631/890 [2:19:04<56:54, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9275, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0744e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9275, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9282, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4371, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4371, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  71%|███████   | 632/890 [2:19:17<56:32, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.4597, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0894e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4597, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4605, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2600, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.9584e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2600, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  71%|███████   | 633/890 [2:19:30<56:30, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3054e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8579, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4875, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4875, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  71%|███████   | 634/890 [2:19:43<56:07, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9208, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.7897e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9208, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9212, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5957, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7787e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5957, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  71%|███████▏  | 635/890 [2:19:57<56:10, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9308, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9308, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9320, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9381, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9381, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  71%|███████▏  | 636/890 [2:20:10<56:11, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4822, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1577e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4822, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4830, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.0126e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  72%|███████▏  | 637/890 [2:20:23<55:45, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7548, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0257e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7548, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7554, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8209e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  72%|███████▏  | 638/890 [2:20:36<55:24, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7128, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7128, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7141, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2329, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8028e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2329, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  72%|███████▏  | 639/890 [2:20:49<54:44, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7022, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0562, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0732e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0562, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  72%|███████▏  | 640/890 [2:21:02<54:39, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7858, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7858, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7877, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.4842, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0857e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4842, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  72%|███████▏  | 641/890 [2:21:16<54:35, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4452, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4452, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4468, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6892, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5280e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6892, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  72%|███████▏  | 642/890 [2:21:28<53:52, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5351, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5351, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5373, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7467, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7467, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  72%|███████▏  | 643/890 [2:21:42<53:54, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5649, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3262, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3262, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  72%|███████▏  | 644/890 [2:21:55<54:34, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8830, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2737e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8830, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8836, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8054, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.8338e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8054, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  72%|███████▏  | 645/890 [2:22:09<54:11, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0082, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0082, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0095, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8114, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.6090e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8114, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  73%|███████▎  | 646/890 [2:22:22<53:39, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5098, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7929e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5098, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5107, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.8724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.8724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  73%|███████▎  | 647/890 [2:22:35<53:16, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0865, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0865, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0881, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  73%|███████▎  | 648/890 [2:22:48<53:19, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0635, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4895e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0635, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0640, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0365, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2526e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0365, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  73%|███████▎  | 649/890 [2:23:02<53:55, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3081, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3081, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3099, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3232, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3022e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3232, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  73%|███████▎  | 650/890 [2:23:15<53:09, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1738, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1738, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0024, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1763, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5132, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5132, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  73%|███████▎  | 651/890 [2:23:28<53:20, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5142e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2398, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  73%|███████▎  | 652/890 [2:23:42<53:19, 13.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7777, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3974e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7777, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7787, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8714, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8111e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8714, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  73%|███████▎  | 653/890 [2:23:55<53:04, 13.44s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0742, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0742, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0763, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6835, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9121e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6835, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  73%|███████▎  | 654/890 [2:24:08<52:22, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8374, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4638e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8374, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8381, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3038, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3038, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  74%|███████▎  | 655/890 [2:24:21<51:41, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0475, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5974e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0475, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0481, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7342, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1216e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7342, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  74%|███████▎  | 656/890 [2:24:34<51:17, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6982, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2637e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6982, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6990, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6116, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7459e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6116, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  74%|███████▍  | 657/890 [2:24:47<50:54, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4647, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4647, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4665, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8070, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5183e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8070, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  74%|███████▍  | 658/890 [2:25:01<50:38, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4466, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1370e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4466, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4472, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0633e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  74%|███████▍  | 659/890 [2:25:14<50:20, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4436, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4436, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4448, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0097, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0097, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  74%|███████▍  | 660/890 [2:25:27<49:59, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5185, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7021e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5185, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5192, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6806e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  74%|███████▍  | 661/890 [2:25:40<50:11, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0025, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0025, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0042, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5276, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.6054e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5276, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  74%|███████▍  | 662/890 [2:25:53<49:49, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1739, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2420e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1739, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1747, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.6399e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  74%|███████▍  | 663/890 [2:26:06<49:36, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5969, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5587e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5969, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5977, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3191e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  75%|███████▍  | 664/890 [2:26:19<49:24, 13.12s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1682, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1682, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1696, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6395, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6395, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  75%|███████▍  | 665/890 [2:26:32<49:16, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8359, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6387e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8359, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8367, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6742, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6742, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  75%|███████▍  | 666/890 [2:26:46<49:28, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6283, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6283, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6296, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7507, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6503e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7507, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  75%|███████▍  | 667/890 [2:26:59<49:30, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9942, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0664, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0108e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0664, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  75%|███████▌  | 668/890 [2:27:13<49:19, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7183, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3521e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7183, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7188, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5808, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5808, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  75%|███████▌  | 669/890 [2:27:26<49:06, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9542, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9542, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9554, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5589, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5589, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  75%|███████▌  | 670/890 [2:27:39<48:20, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7290, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7290, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7307, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  75%|███████▌  | 671/890 [2:27:52<47:48, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0015, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5123, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2131e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5123, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  76%|███████▌  | 672/890 [2:28:05<47:18, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1696, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3640e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1696, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1700, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2482e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  76%|███████▌  | 673/890 [2:28:18<47:06, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0860, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7446, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5586e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7446, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  76%|███████▌  | 674/890 [2:28:31<47:04, 13.08s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1764, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7958e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1764, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1774, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1651, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1651, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  76%|███████▌  | 675/890 [2:28:44<46:46, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2882, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2882, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2893, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7245, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8271e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7245, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  76%|███████▌  | 676/890 [2:28:57<46:25, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0340, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.6094e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0340, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0345, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9030, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.4989e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9030, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  76%|███████▌  | 677/890 [2:29:10<46:43, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1843, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8719e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  76%|███████▌  | 678/890 [2:29:23<46:08, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9780e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9571, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7436e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  76%|███████▋  | 679/890 [2:29:36<45:39, 12.98s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5639e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9699, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9629, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1941e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9629, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  76%|███████▋  | 680/890 [2:29:49<45:30, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6431, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2114e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6431, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6438, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9872e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  77%|███████▋  | 681/890 [2:30:02<45:22, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1306, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1306, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1321, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6723, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6723, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  77%|███████▋  | 682/890 [2:30:15<45:09, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3881e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1217, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5273, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5273, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  77%|███████▋  | 683/890 [2:30:28<45:10, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4083, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9091, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0843e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9091, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  77%|███████▋  | 684/890 [2:30:41<44:57, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1456, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4326e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1456, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1464, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4277, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8393e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4277, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  77%|███████▋  | 685/890 [2:30:54<44:32, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9800, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6729, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4853e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6729, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  77%|███████▋  | 686/890 [2:31:07<44:10, 12.99s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6000, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3661e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6000, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6009, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6224, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.2665e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6224, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  77%|███████▋  | 687/890 [2:31:20<44:05, 13.03s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5796, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7406, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7406, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  77%|███████▋  | 688/890 [2:31:34<44:30, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6385, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6385, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6395, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6409, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6409, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  77%|███████▋  | 689/890 [2:31:47<44:02, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5538, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5538, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5557, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7379, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0113e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7379, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  78%|███████▊  | 690/890 [2:32:00<43:55, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7595, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3407e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7595, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7601, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1291, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.8211e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1291, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  78%|███████▊  | 691/890 [2:32:14<43:47, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7218, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7218, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7231, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4973, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5321e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4973, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  78%|███████▊  | 692/890 [2:32:27<43:25, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7531, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3986e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7531, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7537, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6782, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7525e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6782, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  78%|███████▊  | 693/890 [2:32:40<43:11, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.1211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.1211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.1225, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0631, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6814e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0631, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  78%|███████▊  | 694/890 [2:32:53<43:10, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6232, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6232, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6245, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7733, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7733, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  78%|███████▊  | 695/890 [2:33:06<42:35, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4626e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2342, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9942, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.8427e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9942, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  78%|███████▊  | 696/890 [2:33:19<42:12, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6304, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6304, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6317, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9434, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2808e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9434, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  78%|███████▊  | 697/890 [2:33:32<42:02, 13.07s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4994, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4994, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5012, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6455, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4838e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6455, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  78%|███████▊  | 698/890 [2:33:45<42:05, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1833, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9372e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1833, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1838, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5045, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5045, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  79%|███████▊  | 699/890 [2:33:59<42:36, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3620, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3355e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3620, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3629, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7922, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9204e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7922, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  79%|███████▊  | 700/890 [2:34:14<43:19, 13.68s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2051, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0094, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.8251e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0094, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  79%|███████▉  | 701/890 [2:34:27<42:40, 13.55s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7903, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7903, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7915, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8643, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2705e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8643, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  79%|███████▉  | 702/890 [2:34:40<41:40, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6683, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6683, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6701, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7669, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7669, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  79%|███████▉  | 703/890 [2:34:53<41:19, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3348e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7117, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.5456e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  79%|███████▉  | 704/890 [2:35:06<41:33, 13.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.3987, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.9370e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  79%|███████▉  | 705/890 [2:35:19<40:51, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5364, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5364, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5378, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1619, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4492e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1619, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  79%|███████▉  | 706/890 [2:35:33<40:50, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4030, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7969e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4030, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4037, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7231, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7977e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7231, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  79%|███████▉  | 707/890 [2:35:47<41:10, 13.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9556e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7995, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5691, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4607e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5691, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  80%|███████▉  | 708/890 [2:36:00<40:28, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9348, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3455e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9348, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9354, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7666, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7666, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  80%|███████▉  | 709/890 [2:36:13<40:29, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6419, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6419, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6430, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7459, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7459, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  80%|███████▉  | 710/890 [2:36:27<40:42, 13.57s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.4990e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4850, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.5281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  80%|███████▉  | 711/890 [2:36:40<39:45, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5960, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5960, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5972, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6153, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.6787e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6153, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  80%|████████  | 712/890 [2:36:53<39:13, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5535, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0820e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5535, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5542, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0240, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7805e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0240, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  80%|████████  | 713/890 [2:37:06<38:58, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6720, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6720, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6733, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8049, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8049, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  80%|████████  | 714/890 [2:37:19<38:33, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.3265, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.3265, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.3275, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5612, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6906e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5612, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  80%|████████  | 715/890 [2:37:32<38:25, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6519, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.8857e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6519, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6527, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8396, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.1294e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8396, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  80%|████████  | 716/890 [2:37:45<38:04, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8259, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0600e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8259, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8268, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  81%|████████  | 717/890 [2:37:58<37:38, 13.06s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4738, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9087e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4738, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4744, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7627, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3848e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7627, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  81%|████████  | 718/890 [2:38:11<37:15, 13.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7028, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3053e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7028, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7034, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4529, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7726e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4529, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  81%|████████  | 719/890 [2:38:24<37:06, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8038, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8503e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8038, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8045, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9666, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1104e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9666, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  81%|████████  | 720/890 [2:38:37<36:56, 13.04s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9191, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4216e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9191, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9198, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1255, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2137e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1255, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  81%|████████  | 721/890 [2:38:50<36:46, 13.05s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.8069e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5598, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1138, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6805e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1138, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  81%|████████  | 722/890 [2:39:04<36:51, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8175, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.8627e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8175, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8184, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9773, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1102e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9773, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  81%|████████  | 723/890 [2:39:17<36:35, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0127, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4231e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0127, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0134, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6594, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0571e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6594, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  81%|████████▏ | 724/890 [2:39:31<36:45, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0022, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0293, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.8771, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.2995e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8771, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  81%|████████▏ | 725/890 [2:39:44<36:19, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7689, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0011e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7689, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7698, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5002, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5885e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5002, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  82%|████████▏ | 726/890 [2:39:57<36:01, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5527, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5527, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5539, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8393, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8393, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  82%|████████▏ | 727/890 [2:40:11<37:04, 13.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7322, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7391e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7322, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7331, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9705, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7493e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9705, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  82%|████████▏ | 728/890 [2:40:29<39:37, 14.67s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6861, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.4326e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6861, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6866, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8719, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8719, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  82%|████████▏ | 729/890 [2:40:42<38:01, 14.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6040, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3819e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6040, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6049, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7251, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5759e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7251, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  82%|████████▏ | 730/890 [2:40:56<37:46, 14.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7943, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6770e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7943, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7952, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0147, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.8193e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0147, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  82%|████████▏ | 731/890 [2:41:09<36:55, 13.94s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7711, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7711, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7723, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2034, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.2676e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2034, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  82%|████████▏ | 732/890 [2:41:23<36:52, 14.00s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5254, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1961e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5254, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5261, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5026, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2026e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5026, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  82%|████████▏ | 733/890 [2:41:37<36:19, 13.88s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6505, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6505, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6518, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4204, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4204, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  82%|████████▏ | 734/890 [2:41:50<35:12, 13.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0947e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7010, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1748, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.7991e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1748, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  83%|████████▎ | 735/890 [2:42:03<34:33, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6463, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9419e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6463, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6470, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4574, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4574, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  83%|████████▎ | 736/890 [2:42:16<34:33, 13.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3719, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3719, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3735, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.5395, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5605e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5395, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  83%|████████▎ | 737/890 [2:42:29<34:04, 13.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7408, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7408, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7422, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0065, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0035e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0065, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  83%|████████▎ | 738/890 [2:42:43<33:51, 13.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8282, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2464e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8282, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8291, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.3184, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2275e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3184, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  83%|████████▎ | 739/890 [2:42:56<33:38, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0126e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2162, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8104, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8552e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8104, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  83%|████████▎ | 740/890 [2:43:10<33:53, 13.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8990, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8990, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9001, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0880e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  83%|████████▎ | 741/890 [2:43:23<33:28, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4713, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6040e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4713, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4719, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0022, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4219e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0022, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  83%|████████▎ | 742/890 [2:43:37<33:01, 13.39s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6488, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6488, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6502, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7239, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5576e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7239, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  83%|████████▎ | 743/890 [2:43:50<32:35, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7502, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3786e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7502, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7511, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5759, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6643e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5759, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  84%|████████▎ | 744/890 [2:44:03<32:28, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5356, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.9983e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5356, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5360, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7936, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7936, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  84%|████████▎ | 745/890 [2:44:17<32:25, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2407e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5710, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2906, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2906, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  84%|████████▍ | 746/890 [2:44:30<32:05, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5736, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4312e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5736, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5742, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8503, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.1375e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8503, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  84%|████████▍ | 747/890 [2:44:43<31:42, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6311, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6311, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6324, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7846, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2081e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7846, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  84%|████████▍ | 748/890 [2:44:56<31:26, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7570, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7570, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7586, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  84%|████████▍ | 749/890 [2:45:11<31:54, 13.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6552, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6552, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6567, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3925, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2234e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3925, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  84%|████████▍ | 750/890 [2:45:24<31:15, 13.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4399, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0806e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4399, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4405, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0193, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.8028e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0193, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  84%|████████▍ | 751/890 [2:45:37<30:43, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2613, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2613, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2623, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9388, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.9604e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9388, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  84%|████████▍ | 752/890 [2:45:50<30:32, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6101, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6101, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6122, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2141, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7512e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2141, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  85%|████████▍ | 753/890 [2:46:03<30:13, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1893, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1893, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1905, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  85%|████████▍ | 754/890 [2:46:16<29:52, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6757, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9207e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6757, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6762, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8790, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1184e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8790, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  85%|████████▍ | 755/890 [2:46:29<29:39, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4435e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6174, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6085, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.2772e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6085, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  85%|████████▍ | 756/890 [2:46:42<29:20, 13.14s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4189, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4189, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4202, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.7155, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7155, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  85%|████████▌ | 757/890 [2:46:56<29:41, 13.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5401, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5401, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5413, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7280, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5260e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7280, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  85%|████████▌ | 758/890 [2:47:09<29:11, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7324e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5188, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6924e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5188, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  85%|████████▌ | 759/890 [2:47:22<28:42, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2093, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2669e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2093, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2099, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6436, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8342e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6436, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  85%|████████▌ | 760/890 [2:47:35<28:38, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0763, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6193e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0763, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0772, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7364, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1059e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7364, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  86%|████████▌ | 761/890 [2:47:48<28:16, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4965, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4965, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4981, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9616, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7530e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9616, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  86%|████████▌ | 762/890 [2:48:02<28:14, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6675, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9654e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6675, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6685, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8634, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5202e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8634, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  86%|████████▌ | 763/890 [2:48:15<28:13, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7652, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7652, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7664, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6557, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5111e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6557, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  86%|████████▌ | 764/890 [2:48:28<27:47, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7701, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7701, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7714, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9831e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  86%|████████▌ | 765/890 [2:48:43<28:04, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6759, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.0120e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6759, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6763, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2703, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0362e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2703, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  86%|████████▌ | 766/890 [2:48:56<27:51, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4938, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.5561e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4938, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4942, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2061e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  86%|████████▌ | 767/890 [2:49:10<27:45, 13.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6858, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6858, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6876, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6594, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4968e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6594, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  86%|████████▋ | 768/890 [2:49:23<27:36, 13.58s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5096, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.7319e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5096, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5100, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7363, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5209e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7363, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  86%|████████▋ | 769/890 [2:49:37<27:20, 13.56s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5068, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1817e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5068, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5076, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1645, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7675e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1645, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  87%|████████▋ | 770/890 [2:49:50<27:04, 13.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9606, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5054e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9606, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9615, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7883, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7883, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  87%|████████▋ | 771/890 [2:50:04<26:38, 13.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1008, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9884e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1008, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1018, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5588, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6873e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5588, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  87%|████████▋ | 772/890 [2:50:17<26:13, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5866e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8761, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6454, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6454, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  87%|████████▋ | 773/890 [2:50:30<26:00, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5298, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5074e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5298, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5304, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4560e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  87%|████████▋ | 774/890 [2:50:43<25:41, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8855, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8855, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0020, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8874, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8727, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8727, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  87%|████████▋ | 775/890 [2:50:56<25:20, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7346, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7346, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7359, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7098, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3872e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7098, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  87%|████████▋ | 776/890 [2:51:10<25:27, 13.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6445, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5146e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6445, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6451, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7284, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.9585e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7284, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  87%|████████▋ | 777/890 [2:51:24<25:20, 13.45s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7362, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2484e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7362, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7371, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7391, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2312e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7391, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  87%|████████▋ | 778/890 [2:51:37<25:08, 13.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5959, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5959, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5971, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9429, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.0854e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9429, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  88%|████████▊ | 779/890 [2:51:51<24:54, 13.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6014, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1304e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6014, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6021, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7803e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  88%|████████▊ | 780/890 [2:52:04<24:43, 13.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8536, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.1832e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8536, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8541, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8691, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.5869e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8691, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  88%|████████▊ | 781/890 [2:52:18<24:34, 13.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9150, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.2429e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9150, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9155, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1809, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2615e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1809, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  88%|████████▊ | 782/890 [2:52:31<24:17, 13.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6722, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6722, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6736, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1078, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1078, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  88%|████████▊ | 783/890 [2:52:45<24:03, 13.49s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8986, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8986, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9001, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6277, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.2536e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6277, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  88%|████████▊ | 784/890 [2:52:59<24:02, 13.61s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5041, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9339e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5041, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5050, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3784, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3353e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3784, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  88%|████████▊ | 785/890 [2:53:12<23:37, 13.50s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1246, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2630e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1246, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1252, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0222, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9733e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0222, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  88%|████████▊ | 786/890 [2:53:25<23:20, 13.47s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9005, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9005, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9016, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4368, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4368, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  88%|████████▊ | 787/890 [2:53:38<23:02, 13.43s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8534, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8534, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8546, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6928, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.6278e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6928, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  89%|████████▊ | 788/890 [2:53:52<22:41, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8294, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8294, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8307, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6441, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.8721e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6441, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  89%|████████▊ | 789/890 [2:54:05<22:41, 13.48s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6151, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7439e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6151, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6157, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8762, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.2012e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8762, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  89%|████████▉ | 790/890 [2:54:19<22:22, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9621, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.7706e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9621, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9625, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  89%|████████▉ | 791/890 [2:54:32<22:08, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8755, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0746e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8755, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8760, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7655, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5237e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7655, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  89%|████████▉ | 792/890 [2:54:46<22:15, 13.63s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9862, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7937e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9862, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9871, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1149, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.5879e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1149, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  89%|████████▉ | 793/890 [2:55:00<21:51, 13.52s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4119, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2811e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4119, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4125, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.4448e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  89%|████████▉ | 794/890 [2:55:13<21:39, 13.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.8951, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.5715e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.8951, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.8958, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2303, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3802e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2303, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  89%|████████▉ | 795/890 [2:55:27<21:26, 13.54s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.8805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7836e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.8810, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2927, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2033e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2927, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  89%|████████▉ | 796/890 [2:55:40<21:09, 13.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1418, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.5818e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1418, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1422, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9296, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4667e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9296, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  90%|████████▉ | 797/890 [2:55:53<20:47, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5847, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5847, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5861, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4129, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1686e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4129, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  90%|████████▉ | 798/890 [2:56:07<20:34, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0744, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0744, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0760, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0031, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7487e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0031, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  90%|████████▉ | 799/890 [2:56:20<20:09, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.0967, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.8306e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.0967, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.0973, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6119, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0805e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6119, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  90%|████████▉ | 800/890 [2:56:33<19:58, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.5223, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.4493e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5223, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5230, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6405, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6405, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  90%|█████████ | 801/890 [2:56:46<19:39, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2289, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2289, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2301, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3416e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  90%|█████████ | 802/890 [2:57:00<19:31, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.7462e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3407, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0573, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3613e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0573, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  90%|█████████ | 803/890 [2:57:13<19:17, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6824, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.9207e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6824, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6829, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8496, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5527e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8496, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  90%|█████████ | 804/890 [2:57:26<19:09, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7003, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7003, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7014, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7999, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9652e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7999, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  90%|█████████ | 805/890 [2:57:40<19:00, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2345, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2345, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2362, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7792, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7718e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7792, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  91%|█████████ | 806/890 [2:57:53<18:46, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.1941e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5767, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9462, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3319e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9462, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  91%|█████████ | 807/890 [2:58:06<18:25, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4111, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6204e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4111, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4119, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2946, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7394e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2946, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  91%|█████████ | 808/890 [2:58:20<18:11, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5553, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0691e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5553, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5561, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8413, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7704e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8413, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  91%|█████████ | 809/890 [2:58:33<17:55, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6624, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.6543e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6624, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6633, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6243, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9303e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6243, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  91%|█████████ | 810/890 [2:58:46<17:37, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7764, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7764, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7778, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7724, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.3529e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7724, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  91%|█████████ | 811/890 [2:59:00<17:30, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5480, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.9388e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5480, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5484, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7948, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.7238e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7948, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  91%|█████████ | 812/890 [2:59:13<17:13, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6695, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9206e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6695, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6703, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.9414, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6221e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.9414, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  91%|█████████▏| 813/890 [2:59:26<16:59, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.0603, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9001e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0603, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.0610, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6320, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0542e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6320, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  91%|█████████▏| 814/890 [2:59:39<16:40, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.3242e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4770, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(4.0025, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.2090e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0025, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  92%|█████████▏| 815/890 [2:59:52<16:26, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6118, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6118, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6130, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9604, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2091e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9604, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  92%|█████████▏| 816/890 [3:00:05<16:09, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7252, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3379e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7252, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7257, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6452, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6452, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  92%|█████████▏| 817/890 [3:00:18<15:56, 13.10s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5659, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.2525e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5659, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5667, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9723, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5456e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9723, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  92%|█████████▏| 818/890 [3:00:31<15:43, 13.11s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5211, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7382e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5211, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5220, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7352, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7352, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  92%|█████████▏| 819/890 [3:00:45<15:44, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7376, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6336e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7376, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7383, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7697, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.8403e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7697, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  92%|█████████▏| 820/890 [3:00:58<15:28, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.1281, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1281, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.1293, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5219, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4412e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5219, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  92%|█████████▏| 821/890 [3:01:11<15:14, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7776, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7776, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7787, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2985, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.4144e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2985, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  92%|█████████▏| 822/890 [3:01:24<14:57, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.5217e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4670, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4009, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9075e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4009, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  92%|█████████▏| 823/890 [3:01:37<14:40, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7504, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0184e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7504, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7512, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.6974, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7702e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6974, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  93%|█████████▎| 824/890 [3:01:51<14:28, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7436, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7436, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0015, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7451, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9213, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4158e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9213, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  93%|█████████▎| 825/890 [3:02:04<14:10, 13.09s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5707, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5614e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5707, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5717, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.9109, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5288e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9109, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  93%|█████████▎| 826/890 [3:02:17<14:01, 13.15s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4590, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7813e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4590, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4598, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.2925, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9741e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2925, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  93%|█████████▎| 827/890 [3:02:33<14:53, 14.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6080, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6080, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6097, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6339, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7731e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6339, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  93%|█████████▎| 828/890 [3:02:47<14:21, 13.90s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9266, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9266, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9278, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3263, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3263, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  93%|█████████▎| 829/890 [3:03:00<13:58, 13.75s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9424, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.5244e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9424, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9429, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3275, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.0117e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3275, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  93%|█████████▎| 830/890 [3:03:14<13:38, 13.65s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.6493, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.8927e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6493, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.6498, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1546, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0562e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1546, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  93%|█████████▎| 831/890 [3:03:27<13:16, 13.51s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9976, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6865e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9976, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9983, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6112, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9487e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6112, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  93%|█████████▎| 832/890 [3:03:40<12:57, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9568, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9568, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9582, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9067, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.4546e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9067, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  94%|█████████▎| 833/890 [3:03:53<12:39, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.4323, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.5855e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4323, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4328, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3786, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.0794e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3786, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  94%|█████████▎| 834/890 [3:04:06<12:26, 13.32s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(5.4104, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8169e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(5.4104, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(5.4111, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4989, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.4607e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4989, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  94%|█████████▍| 835/890 [3:04:20<12:20, 13.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.9333, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4238e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9333, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.9342, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.4751, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4751, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  94%|█████████▍| 836/890 [3:04:33<11:59, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(4.0867, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4527e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(4.0867, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(4.0875, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0927, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.2878e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0927, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  94%|█████████▍| 837/890 [3:04:47<11:48, 13.36s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4618, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.6070e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4618, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4625, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6508, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6508, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  94%|█████████▍| 838/890 [3:05:00<11:29, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.2430, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.2274e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.2430, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.2435, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7817, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5302e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7817, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  94%|█████████▍| 839/890 [3:05:14<11:26, 13.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5816, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1337, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1337, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  94%|█████████▍| 840/890 [3:05:27<11:12, 13.46s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5166, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5166, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5177, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.5920, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.7158e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5920, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  94%|█████████▍| 841/890 [3:05:40<10:57, 13.42s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3366, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3366, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3377, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6765, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4439e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6765, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  95%|█████████▍| 842/890 [3:05:54<10:41, 13.37s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7416, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9638e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7416, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7426, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2382, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3786e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2382, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  95%|█████████▍| 843/890 [3:06:06<10:21, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2869, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7786e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2869, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2877, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0558, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7378e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0558, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  95%|█████████▍| 844/890 [3:06:20<10:08, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7379, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7379, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7392, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.6947, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.2003e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.6947, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  95%|█████████▍| 845/890 [3:06:33<09:56, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.5451, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.3320e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.5451, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.5458, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7665, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5711e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7665, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  95%|█████████▌| 846/890 [3:06:46<09:39, 13.18s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6145, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6145, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0019, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6164, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9726, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9726, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  95%|█████████▌| 847/890 [3:06:59<09:30, 13.26s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5597, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5597, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5610, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4767, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4767, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  95%|█████████▌| 848/890 [3:07:13<09:20, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0362e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7570, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9805, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.7520e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9805, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  95%|█████████▌| 849/890 [3:07:26<09:04, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6903, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6903, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6913, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9012, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8770e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9012, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  96%|█████████▌| 850/890 [3:07:40<08:53, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.4120, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.0664e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.4120, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.4129, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2068, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2068, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  96%|█████████▌| 851/890 [3:07:53<08:39, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.7006e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9241, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1171, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.1808e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1171, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  96%|█████████▌| 852/890 [3:08:06<08:23, 13.25s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0497, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9545e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0497, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0506, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.1138, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.7793e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.1138, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  96%|█████████▌| 853/890 [3:08:20<08:14, 13.35s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2841, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2841, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2858, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7820, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.3781e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7820, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  96%|█████████▌| 854/890 [3:08:33<07:56, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1796, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7181e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1796, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1804, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3772, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3772, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  96%|█████████▌| 855/890 [3:08:46<07:42, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9115, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9115, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9133, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8690, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8690, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
--------

(Epoch 8/8):  96%|█████████▌| 856/890 [3:08:59<07:28, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4840, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9924e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4840, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4848, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8750, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6400e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8750, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  96%|█████████▋| 857/890 [3:09:12<07:16, 13.21s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.7953, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.1901e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.7953, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.7960, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7026, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0808e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7026, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  96%|█████████▋| 858/890 [3:09:25<07:00, 13.13s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5775, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5775, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5785, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.1050, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.4344e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1050, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 859/890 [3:09:38<06:49, 13.20s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2367, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2367, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0013, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2380, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8699, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.1074e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8699, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 860/890 [3:09:52<06:39, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.8999, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.8999, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9017, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5607, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.3399e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5607, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 861/890 [3:10:05<06:25, 13.29s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6081, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6081, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6092, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7962, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2578e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7962, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 862/890 [3:10:18<06:11, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6866, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6866, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6887, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(3.7768, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0243e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7768, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 863/890 [3:10:31<05:55, 13.17s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.7414, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7414, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0021, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.7435, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0444, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9689e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0444, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 864/890 [3:10:45<05:43, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.2580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0023, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.2603, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6082, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9609e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6082, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 865/890 [3:10:58<05:29, 13.19s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9581, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9581, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9595, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5761, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(3.3950e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5761, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 866/890 [3:11:11<05:19, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3872, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3872, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0016, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3888, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8083, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4706e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8083, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  97%|█████████▋| 867/890 [3:11:25<05:06, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.6585, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.6585, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0018, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.6602, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0349, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3769e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0349, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  98%|█████████▊| 868/890 [3:11:38<04:55, 13.41s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2326, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.6569e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2326, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2332, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6561, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6561, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0011, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  98%|█████████▊| 869/890 [3:11:51<04:39, 13.31s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4124, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4068e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4124, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4133, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6580, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.1772e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6580, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  98%|█████████▊| 870/890 [3:12:05<04:24, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.5007, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.7103e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.5007, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.5011, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.6806, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.2672e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.6806, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  98%|█████████▊| 871/890 [3:12:18<04:11, 13.24s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6014, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6566e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6014, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6023, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2082, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.3413e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2082, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  98%|█████████▊| 872/890 [3:12:31<03:57, 13.22s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.7684, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.7684, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.7696, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5825, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9757e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5825, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  98%|█████████▊| 873/890 [3:12:45<03:46, 13.33s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.9011, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0002, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.9011, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0017, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.9028, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.4331, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2008e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4331, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  98%|█████████▊| 874/890 [3:12:58<03:31, 13.23s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.2249, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.9611e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.2249, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.2259, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5731, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5731, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  98%|█████████▊| 875/890 [3:13:11<03:17, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(3.5042, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.9111e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(3.5042, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(3.5051, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5667, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.8877e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5667, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  98%|█████████▊| 876/890 [3:13:24<03:06, 13.30s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.4336, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.9355e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.4336, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.4342, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7543, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(9.5146e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7543, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  99%|█████████▊| 877/890 [3:13:38<02:54, 13.40s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.3270, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.0331e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.3270, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.3275, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9483, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.4090e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9483, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  99%|█████████▊| 878/890 [3:13:51<02:39, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.9372, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.9372, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0014, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.9387, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(2.0203, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.7792e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0203, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  99%|█████████▉| 879/890 [3:14:04<02:26, 13.34s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.3880, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.5909e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3880, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.3888, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7233, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.6235e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7233, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  99%|█████████▉| 880/890 [3:14:18<02:13, 13.38s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.4887, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.4887, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0012, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.4900, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2102, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.8076e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2102, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0003, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  99%|█████████▉| 881/890 [3:14:31<01:59, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5473, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.5714e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5473, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5478, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.7564, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.2303e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.7564, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  99%|█████████▉| 882/890 [3:14:44<01:45, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(1.4578, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.9992e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.4578, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(1.4585, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9156, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.3640e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9156, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8):  99%|█████████▉| 883/890 [3:14:57<01:32, 13.27s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5592, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0003, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5592, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0025, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5617, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.2063, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.0996e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.2063, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0007, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  99%|█████████▉| 884/890 [3:15:10<01:19, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1518, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.6821e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1518, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0009, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1527, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.0857, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(0.0001, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.0857, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0010, device='cuda:1', grad_fn=<MulBackward0>)
----

(Epoch 8/8):  99%|█████████▉| 885/890 [3:15:24<01:06, 13.28s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.0737, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.2009e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.0737, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.0741, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.7022, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.9996e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7022, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8): 100%|█████████▉| 886/890 [3:15:37<00:52, 13.16s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.7972, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(5.8453e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.7972, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.7978, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(1.3374, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(8.4296e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(1.3374, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8): 100%|█████████▉| 887/890 [3:15:49<00:39, 13.02s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.6403, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.5273e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.6403, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0005, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.6407, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.9224, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0753e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.9224, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8): 100%|█████████▉| 888/890 [3:16:02<00:26, 13.01s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(2.1845, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(4.1154e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(2.1845, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0004, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(2.1849, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.5941, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(7.5128e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5941, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0008, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8): 100%|█████████▉| 889/890 [3:16:15<00:12, 12.89s/batch]

Loaded model: /kaggle/input/gliomateachernewlabels/Teacher_model_after_epoch_100_trainLoss_0.5972_valLoss_0.3019.pth
Seg loss:  tensor(0.5582, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(6.0327e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.5582, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0006, device='cuda:1', grad_fn=<MulBackward0>)
-------------Final student loss-------------
tensor(0.5588, device='cuda:1', grad_fn=<AddBackward0>)
-------------Final student loss-------------
Loaded model: /kaggle/input/africanewlabels/Teacher_model_after_epoch_67_trainLoss_1.1080_valLoss_0.5561.pth
Seg loss:  tensor(0.8950, device='cuda:1', grad_fn=<AddBackward0>)
KL loss with teacher:  tensor(2.2972e-05, device='cuda:1', grad_fn=<DivBackward0>)
Seg loss weighted:  tensor(0.8950, device='cuda:1', grad_fn=<MulBackward0>)
KL loss with teacher weighted:  tensor(0.0002, device='cuda:1', grad_fn=<MulBackward0>)


(Epoch 8/8): 100%|██████████| 890/890 [3:16:28<00:00, 13.25s/batch]

monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.


monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.

Validating:   1%|          | 1/126 [00:05<10:31,  5.06s/batch]

Validation dice loss per batch: 0.3709386885166168


Validating:   2%|▏         | 2/126 [00:05<04:52,  2.36s/batch]

Validation dice loss per batch: 0.25218844413757324


Validating:   2%|▏         | 3/126 [00:08<05:51,  2.86s/batch]

Validation dice loss per batch: 0.8252161741256714


Validating:   3%|▎         | 4/126 [00:09<03:53,  1.91s/batch]

Validation dice loss per batch: 0.6292084455490112


Validating:   4%|▍         | 5/126 [00:13<05:40,  2.82s/batch]

Validation dice loss per batch: 0.30110418796539307


Validating:   5%|▍         | 6/126 [00:14<04:02,  2.02s/batch]

Validation dice loss per batch: 0.32383570075035095


Validating:   6%|▌         | 7/126 [00:18<05:10,  2.61s/batch]

Validation dice loss per batch: 0.3537665009498596


Validating:   6%|▋         | 8/126 [00:18<03:47,  1.93s/batch]

Validation dice loss per batch: 0.1909436732530594


Validating:   7%|▋         | 9/126 [00:22<05:09,  2.64s/batch]

Validation dice loss per batch: 0.3226203918457031


Validating:   8%|▊         | 10/126 [00:23<03:49,  1.98s/batch]

Validation dice loss per batch: 0.4385148882865906


Validating:   9%|▊         | 11/126 [00:28<05:28,  2.86s/batch]

Validation dice loss per batch: 0.4813668131828308


Validating:  10%|▉         | 12/126 [00:28<04:03,  2.13s/batch]

Validation dice loss per batch: 0.23183852434158325


Validating:  10%|█         | 13/126 [00:32<04:57,  2.63s/batch]

Validation dice loss per batch: 0.5189620852470398


Validating:  11%|█         | 14/126 [00:32<03:42,  1.98s/batch]

Validation dice loss per batch: 0.30874836444854736


Validating:  12%|█▏        | 15/126 [00:37<05:10,  2.80s/batch]

Validation dice loss per batch: 0.37175190448760986


Validating:  13%|█▎        | 16/126 [00:38<03:51,  2.10s/batch]

Validation dice loss per batch: 0.219457745552063


Validating:  13%|█▎        | 17/126 [00:42<05:06,  2.81s/batch]

Validation dice loss per batch: 0.4447636604309082


Validating:  14%|█▍        | 18/126 [00:43<03:48,  2.11s/batch]

Validation dice loss per batch: 0.5040987730026245


Validating:  15%|█▌        | 19/126 [00:46<04:40,  2.62s/batch]

Validation dice loss per batch: 0.18981772661209106


Validating:  16%|█▌        | 20/126 [00:47<03:30,  1.98s/batch]

Validation dice loss per batch: 0.33904117345809937


Validating:  17%|█▋        | 21/126 [00:51<04:43,  2.70s/batch]

Validation dice loss per batch: 0.40293216705322266


Validating:  17%|█▋        | 22/126 [00:52<03:31,  2.04s/batch]

Validation dice loss per batch: 0.26538601517677307


Validating:  18%|█▊        | 23/126 [00:55<04:19,  2.52s/batch]

Validation dice loss per batch: 0.8815742135047913


Validating:  19%|█▉        | 24/126 [00:56<03:15,  1.91s/batch]

Validation dice loss per batch: 0.9569066166877747


Validating:  20%|█▉        | 25/126 [00:59<04:02,  2.40s/batch]

Validation dice loss per batch: 2.0199203491210938


Validating:  21%|██        | 26/126 [01:00<03:02,  1.83s/batch]

Validation dice loss per batch: 0.44998422265052795


Validating:  21%|██▏       | 27/126 [01:05<04:26,  2.69s/batch]

Validation dice loss per batch: 0.36925166845321655


Validating:  22%|██▏       | 28/126 [01:05<03:19,  2.03s/batch]

Validation dice loss per batch: 0.3343121409416199


Validating:  23%|██▎       | 29/126 [01:09<04:11,  2.60s/batch]

Validation dice loss per batch: 0.2567306160926819


Validating:  24%|██▍       | 30/126 [01:09<03:08,  1.97s/batch]

Validation dice loss per batch: 0.47677111625671387


Validating:  25%|██▍       | 31/126 [01:13<03:57,  2.50s/batch]

Validation dice loss per batch: 0.3249160647392273


Validating:  25%|██▌       | 32/126 [01:14<02:58,  1.90s/batch]

Validation dice loss per batch: 0.32970669865608215


Validating:  26%|██▌       | 33/126 [01:18<04:03,  2.62s/batch]

Validation dice loss per batch: 0.356804221868515


Validating:  27%|██▋       | 34/126 [01:19<03:02,  1.98s/batch]

Validation dice loss per batch: 0.38309210538864136


Validating:  28%|██▊       | 35/126 [01:22<03:53,  2.56s/batch]

Validation dice loss per batch: 0.6119353771209717


Validating:  29%|██▊       | 36/126 [01:23<02:55,  1.95s/batch]

Validation dice loss per batch: 0.4623178243637085


Validating:  29%|██▉       | 37/126 [01:27<03:41,  2.49s/batch]

Validation dice loss per batch: 0.3413517475128174


Validating:  30%|███       | 38/126 [01:27<02:46,  1.90s/batch]

Validation dice loss per batch: 0.6020255088806152


Validating:  31%|███       | 39/126 [01:31<03:33,  2.45s/batch]

Validation dice loss per batch: 0.4027827978134155


Validating:  32%|███▏      | 40/126 [01:31<02:40,  1.87s/batch]

Validation dice loss per batch: 0.6020124554634094


Validating:  33%|███▎      | 41/126 [01:36<03:36,  2.55s/batch]

Validation dice loss per batch: 0.4142138957977295


Validating:  33%|███▎      | 42/126 [01:36<02:42,  1.94s/batch]

Validation dice loss per batch: 0.5608171820640564


Validating:  34%|███▍      | 43/126 [01:40<03:23,  2.46s/batch]

Validation dice loss per batch: 0.5031864643096924


Validating:  35%|███▍      | 44/126 [01:40<02:33,  1.87s/batch]

Validation dice loss per batch: 0.6222155094146729


Validating:  36%|███▌      | 45/126 [01:44<03:17,  2.44s/batch]

Validation dice loss per batch: 0.35503867268562317


Validating:  37%|███▋      | 46/126 [01:45<02:29,  1.86s/batch]

Validation dice loss per batch: 0.2734736502170563


Validating:  37%|███▋      | 47/126 [01:48<03:15,  2.48s/batch]

Validation dice loss per batch: 0.5065562129020691


Validating:  38%|███▊      | 48/126 [01:49<02:27,  1.89s/batch]

Validation dice loss per batch: 0.7570017576217651


Validating:  39%|███▉      | 49/126 [01:53<03:07,  2.43s/batch]

Validation dice loss per batch: 0.3998187780380249


Validating:  40%|███▉      | 50/126 [01:53<02:21,  1.86s/batch]

Validation dice loss per batch: 0.3026638627052307


Validating:  40%|████      | 51/126 [01:57<03:06,  2.48s/batch]

Validation dice loss per batch: 0.3578827381134033


Validating:  41%|████▏     | 52/126 [01:58<02:20,  1.90s/batch]

Validation dice loss per batch: 0.38034626841545105


Validating:  42%|████▏     | 53/126 [02:02<03:20,  2.75s/batch]

Validation dice loss per batch: 0.47838038206100464


Validating:  43%|████▎     | 54/126 [02:03<02:30,  2.08s/batch]

Validation dice loss per batch: 1.40751051902771


Validating:  44%|████▎     | 55/126 [02:06<02:54,  2.46s/batch]

Validation dice loss per batch: 0.5640220642089844


Validating:  44%|████▍     | 56/126 [02:07<02:11,  1.88s/batch]

Validation dice loss per batch: 0.279031366109848


Validating:  45%|████▌     | 57/126 [02:11<02:52,  2.51s/batch]

Validation dice loss per batch: 0.6322179436683655


Validating:  46%|████▌     | 58/126 [02:11<02:10,  1.92s/batch]

Validation dice loss per batch: 0.838415801525116


Validating:  47%|████▋     | 59/126 [02:16<03:11,  2.85s/batch]

Validation dice loss per batch: 0.18627357482910156


Validating:  48%|████▊     | 60/126 [02:17<02:22,  2.16s/batch]

Validation dice loss per batch: 0.18208405375480652


Validating:  48%|████▊     | 61/126 [02:21<02:52,  2.65s/batch]

Validation dice loss per batch: 0.349536657333374


Validating:  49%|████▉     | 62/126 [02:21<02:09,  2.02s/batch]

Validation dice loss per batch: 0.5575556755065918


Validating:  50%|█████     | 63/126 [02:26<03:02,  2.89s/batch]

Validation dice loss per batch: 0.39512568712234497


Validating:  51%|█████     | 64/126 [02:27<02:15,  2.19s/batch]

Validation dice loss per batch: 1.105738639831543


Validating:  52%|█████▏    | 65/126 [02:31<02:43,  2.68s/batch]

Validation dice loss per batch: 1.0188026428222656


Validating:  52%|█████▏    | 66/126 [02:31<02:02,  2.04s/batch]

Validation dice loss per batch: 0.898064374923706


Validating:  53%|█████▎    | 67/126 [02:36<02:56,  3.00s/batch]

Validation dice loss per batch: 0.25699755549430847


Validating:  54%|█████▍    | 68/126 [02:37<02:10,  2.26s/batch]

Validation dice loss per batch: 0.2940253019332886


Validating:  55%|█████▍    | 69/126 [02:43<03:07,  3.30s/batch]

Validation dice loss per batch: 0.8858116269111633


Validating:  56%|█████▌    | 70/126 [02:43<02:18,  2.47s/batch]

Validation dice loss per batch: 0.7724735736846924


Validating:  56%|█████▋    | 71/126 [02:47<02:44,  2.99s/batch]

Validation dice loss per batch: 0.31711140275001526


Validating:  57%|█████▋    | 72/126 [02:48<02:01,  2.26s/batch]

Validation dice loss per batch: 0.19535627961158752


Validating:  58%|█████▊    | 73/126 [02:52<02:30,  2.84s/batch]

Validation dice loss per batch: 0.19670706987380981


Validating:  59%|█████▊    | 74/126 [02:53<01:51,  2.15s/batch]

Validation dice loss per batch: 0.3352263867855072


Validating:  60%|█████▉    | 75/126 [02:56<02:15,  2.66s/batch]

Validation dice loss per batch: 0.14352114498615265


Validating:  60%|██████    | 76/126 [02:57<01:41,  2.03s/batch]

Validation dice loss per batch: 0.3510321080684662


Validating:  61%|██████    | 77/126 [03:01<02:05,  2.55s/batch]

Validation dice loss per batch: 0.18376950919628143


Validating:  62%|██████▏   | 78/126 [03:01<01:33,  1.95s/batch]

Validation dice loss per batch: 0.2429182529449463


Validating:  63%|██████▎   | 79/126 [03:05<01:55,  2.46s/batch]

Validation dice loss per batch: 0.20766471326351166


Validating:  63%|██████▎   | 80/126 [03:06<01:26,  1.88s/batch]

Validation dice loss per batch: 0.16663846373558044


Validating:  64%|██████▍   | 81/126 [03:10<01:54,  2.55s/batch]

Validation dice loss per batch: 0.5606046915054321


Validating:  65%|██████▌   | 82/126 [03:10<01:25,  1.95s/batch]

Validation dice loss per batch: 0.22818976640701294


Validating:  66%|██████▌   | 83/126 [03:14<01:45,  2.45s/batch]

Validation dice loss per batch: 0.45051729679107666


Validating:  67%|██████▋   | 84/126 [03:14<01:18,  1.88s/batch]

Validation dice loss per batch: 0.2108297497034073


Validating:  67%|██████▋   | 85/126 [03:19<01:57,  2.86s/batch]

Validation dice loss per batch: 0.23867148160934448


Validating:  68%|██████▊   | 86/126 [03:20<01:26,  2.16s/batch]

Validation dice loss per batch: 0.176962748169899


Validating:  69%|██████▉   | 87/126 [03:24<01:44,  2.68s/batch]

Validation dice loss per batch: 0.20903567969799042


Validating:  70%|██████▉   | 88/126 [03:24<01:17,  2.04s/batch]

Validation dice loss per batch: 0.4989639222621918


Validating:  71%|███████   | 89/126 [03:30<01:51,  3.00s/batch]

Validation dice loss per batch: 0.20878107845783234


Validating:  71%|███████▏  | 90/126 [03:30<01:21,  2.26s/batch]

Validation dice loss per batch: 0.3550580143928528


Validating:  72%|███████▏  | 91/126 [03:34<01:37,  2.80s/batch]

Validation dice loss per batch: 0.4858056902885437


Validating:  73%|███████▎  | 92/126 [03:35<01:12,  2.12s/batch]

Validation dice loss per batch: 0.24237927794456482


Validating:  74%|███████▍  | 93/126 [03:39<01:30,  2.75s/batch]

Validation dice loss per batch: 0.6613876819610596


Validating:  75%|███████▍  | 94/126 [03:40<01:06,  2.09s/batch]

Validation dice loss per batch: 0.27837610244750977


Validating:  75%|███████▌  | 95/126 [03:43<01:20,  2.61s/batch]

Validation dice loss per batch: 0.21066071093082428


Validating:  76%|███████▌  | 96/126 [03:44<00:59,  1.99s/batch]

Validation dice loss per batch: 0.3146088123321533


Validating:  77%|███████▋  | 97/126 [03:50<01:33,  3.21s/batch]

Validation dice loss per batch: 0.3365583121776581


Validating:  78%|███████▊  | 98/126 [03:51<01:07,  2.41s/batch]

Validation dice loss per batch: 0.3484492003917694


Validating:  79%|███████▊  | 99/126 [03:54<01:15,  2.81s/batch]

Validation dice loss per batch: 0.15987470746040344


Validating:  79%|███████▉  | 100/126 [03:55<00:55,  2.13s/batch]

Validation dice loss per batch: 0.11896786093711853


Validating:  80%|████████  | 101/126 [04:00<01:14,  2.99s/batch]

Validation dice loss per batch: 0.2426041215658188


Validating:  81%|████████  | 102/126 [04:00<00:54,  2.26s/batch]

Validation dice loss per batch: 0.4750029146671295


Validating:  82%|████████▏ | 103/126 [04:04<01:02,  2.70s/batch]

Validation dice loss per batch: 0.2628283202648163


Validating:  83%|████████▎ | 104/126 [04:05<00:45,  2.06s/batch]

Validation dice loss per batch: 0.4739486277103424


Validating:  83%|████████▎ | 105/126 [04:10<01:01,  2.92s/batch]

Validation dice loss per batch: 1.363182544708252


Validating:  84%|████████▍ | 106/126 [04:10<00:44,  2.20s/batch]

Validation dice loss per batch: 0.14841172099113464


Validating:  85%|████████▍ | 107/126 [04:14<00:50,  2.63s/batch]

Validation dice loss per batch: 0.288280189037323


Validating:  86%|████████▌ | 108/126 [04:14<00:36,  2.01s/batch]

Validation dice loss per batch: 0.5159603357315063


Validating:  87%|████████▋ | 109/126 [04:18<00:44,  2.65s/batch]

Validation dice loss per batch: 0.20096994936466217


Validating:  87%|████████▋ | 110/126 [04:19<00:32,  2.02s/batch]

Validation dice loss per batch: 0.25840288400650024


Validating:  88%|████████▊ | 111/126 [04:23<00:38,  2.57s/batch]

Validation dice loss per batch: 0.1497316062450409


Validating:  89%|████████▉ | 112/126 [04:23<00:27,  1.96s/batch]

Validation dice loss per batch: 0.6022126078605652


Validating:  90%|████████▉ | 113/126 [04:29<00:38,  2.97s/batch]

Validation dice loss per batch: 0.7432289123535156


Validating:  90%|█████████ | 114/126 [04:29<00:26,  2.24s/batch]

Validation dice loss per batch: 0.732255220413208


Validating:  91%|█████████▏| 115/126 [04:33<00:30,  2.74s/batch]

Validation dice loss per batch: 1.4062236547470093


Validating:  92%|█████████▏| 116/126 [04:34<00:20,  2.08s/batch]

Validation dice loss per batch: 0.7413257956504822


Validating:  93%|█████████▎| 117/126 [04:39<00:26,  2.91s/batch]

Validation dice loss per batch: 0.721588134765625


Validating:  94%|█████████▎| 118/126 [04:39<00:17,  2.20s/batch]

Validation dice loss per batch: 0.8430908918380737


Validating:  94%|█████████▍| 119/126 [04:43<00:18,  2.64s/batch]

Validation dice loss per batch: 0.6443632245063782


Validating:  95%|█████████▌| 120/126 [04:43<00:12,  2.01s/batch]

Validation dice loss per batch: 0.29536452889442444


Validating:  96%|█████████▌| 121/126 [04:49<00:15,  3.01s/batch]

Validation dice loss per batch: 0.5145354270935059


Validating:  97%|█████████▋| 122/126 [04:49<00:09,  2.27s/batch]

Validation dice loss per batch: 0.48262888193130493


Validating:  98%|█████████▊| 123/126 [04:54<00:09,  3.07s/batch]

Validation dice loss per batch: 0.8391975164413452


Validating:  98%|█████████▊| 124/126 [04:55<00:04,  2.31s/batch]

Validation dice loss per batch: 0.42722123861312866


Validating:  99%|█████████▉| 125/126 [04:59<00:02,  2.76s/batch]

Validation dice loss per batch: 0.3740164637565613


                                                                

Validation dice loss per batch: 0.522917628288269
------Final validation dice loss after epoch 8: 0.4594309329986572-------




Learning rate after epoch 8: 0.001
Model saved after epoch 8
Training completed.
