In [4]:
print("test")

test


### global

In [2]:
import os
import tensorflow as tf
import torch
print(tf.__version__)
print(torch.__version__)

2.15.0
2.1.0+cu121


In [3]:
import numpy as np
print(np.__version__)
# setting random_state
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
tf.random.set_seed(RANDOM_STATE)
torch.manual_seed(RANDOM_STATE)

1.25.2


<torch._C.Generator at 0x7d5a71a47450>

### some libraries and functions

In [5]:
# libraries
import sys, math
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
import sklearn

In [71]:
# fix random_state
def fixRandomState(fixed_state: int=RANDOM_STATE):
  np.random.seed(fixed_state)
  tf.random.set_seed(fixed_state)
  torch.manual_seed(fixed_state)

# exception
def exception(requirement: bool, content):
  if(requirement == False): raise ValueError(content)
def catchException(ex: Exception):
  print(type(ex), ex.args)
  exception(False, ex)

# message
def mesVerbose(flag: bool, verbose, func_dir: str=""):
  if(flag == False): return
  print("__verbose__:", func_dir, verbose)
def mesWarningToUser(note, func_dir: str=""):
  print("__warning__:", func_dir, str(note) + "&&&")

In [8]:
def over(val, name="") -> tuple:
  try: mesVerbose(True, (type(val), val.shape, str(sys.getsizeof(val)) + "Bytes"), name)
  except: mesVerbose(True, (type(val), "no-shape", str(sys.getsizeof(val)) + "Bytes"), name)

### model architecture

In [74]:
from torch import nn, optim
from torch.utils import data

BATCH_SIZE = 4
IN_SHAPE = (BATCH_SIZE, 3, 224, 224)

YOLO_BACKBONE_ARCHITECTURE = [(64, 7, 2, 'same'), 'M',
                                (192, 3, 1, 'same'), 'M',
                                (128, 1, 1, 'valid'),
                                [(128, 256), 1],
                                [(256, 512), 1], 'M',
                                [(256, 512), 4],
                                [(512, 1024), 1], 'M',
                                [(512, 1024), 2]]

GRID_SIZE = 7
NUM_BOXES = 2
NUM_CLASSES = 3
OUT_SHAPE = (BATCH_SIZE, 7, 7, 8)

In [10]:
DEVICE = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
DEVICE

device(type='cpu')

##### nnModule

In [81]:
class nnModule(nn.Module):
  def __init__(self) -> None:
    super(nnModule, self).__init__()
    self.in_shape = ()
    self.out_shape = ()
    self.model = nn.ModuleList()

  def getInShape(self): return self.in_shape
  def getOutShape(self): return self.out_shape
  def getModel(self): return self.model
  def setInShape(self, in_shape): self.in_shape = in_shape
  def setOutShape(self, out_shape): self.out_shape = out_shape
  def setModel(self, model): self.model = model

  def unittest_backward(self):
    mesVerbose(True, "@@@ test backward", "nnModule > unittest_backward:")
    in_shape = self.getInShape()
    model = self.getModel()
    x = torch.rand(in_shape[0], in_shape[1], in_shape[2], in_shape[3])
    out = self.forward(x)
    loss = nn.MSELoss()(out, torch.rand(*out.shape))

    print("example_mse_loss:", type(loss), loss)
    optimizer = optim.Adam(list(model.parameters()), lr=2e-5, weight_decay=0)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print("backward finish, the weights (state) of current instance CAN changed!")
    params = list(model.parameters())
    print("parameters():")
    for p in params: print(type(p), p.shape)

  def unittest_summary(self):
    mesVerbose(True, "@@@ test summary", "nnModule > unittest_summary:")
    in_shape = self.getInShape()
    model = self.getModel().copy()
    x = torch.rand(in_shape[0], in_shape[1], in_shape[2], in_shape[3])
    for layer in model:
      print("\tin_shape:", type(x), x.shape)
      print(type(layer), sys.getsizeof(layer))
      x = layer(x)
    print("out_shape:", type(x), x.shape)

##### blcoks

In [82]:
class ConvWithBatchNorm(nn.Module):
  """Conv layer with batch norm and leaky relu"""

  def __init__(self, in_c: int, out_c: int, k_size: int, stride=1, negative_slope=0.1):
    super(ConvWithBatchNorm, self).__init__()

    padding = k_size // 2
    layers = nn.ModuleList()
    layers += [nn.Conv2d(in_c, out_c, k_size, stride=stride, padding=padding, bias=False)]
    layers += [nn.BatchNorm2d(num_features=out_c)]
    layers += [nn.LeakyReLU(negative_slope=negative_slope)]
    self.layers = layers

  def forward(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

In [83]:
class BottleNeckBlock(nn.Module):
  """Block of 1x1 reduction layers followed by 3x3 conv. layer"""

  def __init__(self, in_c: int, out_ces: tuple, num_repeat: int):
    super(BottleNeckBlock, self).__init__()

    out_1x1 = out_ces[0]
    out_3x3 = out_ces[1]
    layers = nn.ModuleList()
    for i in range(num_repeat):
      layers += [nn.Conv2d(in_c, out_1x1, 1, stride=1, padding=0, bias=False)]
      layers += [nn.Conv2d(out_1x1, out_3x3, 3, stride=1, padding=1, bias=False)]
    self.layers = layers

  def forward(self, x):
    for layer in self.layers:
      x = layer(x)
    return x

##### YoloBackbone

In [84]:
class YoloBackbone(nnModule):
  """YOLO backbone extract feature from the input"""

  def __init__(self, in_shpae: tuple, backbone_config=YOLO_BACKBONE_ARCHITECTURE):
    super(YoloBackbone, self).__init__()
    self.setInShape(in_shpae)
    model = self.getModel()
    x = torch.rand(in_shpae[0], in_shpae[1], in_shpae[2], in_shpae[3])
    for i, config in enumerate(backbone_config):
      if type(config) == tuple:
        out_c, k_size, stride, _ = config
        model += [ConvWithBatchNorm(in_c=x.shape[1], out_c=out_c, k_size=k_size, stride=stride, negative_slope=0.1)]
        x = model[-1](x)

      elif type(config) == str:
        model += [nn.MaxPool2d(kernel_size=2, stride=2, padding=0)]
        x = model[-1](x)

      elif type(config) == list:
        out_ces, num_repeat = config
        model += [BottleNeckBlock(x.shape[1], out_ces, num_repeat)]
        x = model[-1](x)
    self.setOutShape(x.shape)
    self.setModel(model=model)

  def forward(self, x):
    for layer in self.getModel():
      x = layer(x)
    return x

In [62]:
YoloBackbone((4, 3, 224, 224)).unittest_backward()

__verbose__: nnModule > unittest_backward: @@@ test backward
example_mse_loss: <class 'torch.Tensor'> tensor(0.3335, grad_fn=<MseLossBackward0>)
backward finish, the weights (state) of instance CAN changed!
parameters():
<class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 7, 7])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([192, 64, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([192])
<class 'torch.nn.parameter.Parameter'> torch.Size([192])
<class 'torch.nn.parameter.Parameter'> torch.Size([128, 192, 1, 1])
<class 'torch.nn.parameter.Parameter'> torch.Size([128])
<class 'torch.nn.parameter.Parameter'> torch.Size([128])
<class 'torch.nn.parameter.Parameter'> torch.Size([128, 128, 1, 1])
<class 'torch.nn.parameter.Parameter'> torch.Size([256, 128, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 1, 1])
<class 'torch.nn.parameter.Paramete

##### YoloOutput

In [63]:
YOLO_OUT_ARCHITECTURE = [(4096, 0.1), 0.5, (2040, 0.1), 0.5, (1024, 0.1), 0.5, (GRID_SIZE * GRID_SIZE * (NUM_BOXES * 5 + NUM_CLASSES), 0.1)]

In [85]:
class YoloOutput(nnModule):
  """YOLO last convolution and FC layers to produce prediction"""

  def __init__(self, in_shape: tuple):
    super(YoloOutput, self).__init__()
    self.setInShape(in_shape=in_shape)
    x = torch.rand(in_shape[0], in_shape[1], in_shape[2], in_shape[3])
    model = nn.ModuleList()
    model += [ConvWithBatchNorm(in_shape[1], out_c=1024, k_size=3),
              ConvWithBatchNorm(1024, out_c=1024, k_size=3),
              ConvWithBatchNorm(1024, out_c=1024, k_size=3),
              ConvWithBatchNorm(1024, out_c=1024, k_size=3),
              nn.Flatten()]
    for layer in model: x = layer(x)

    for i, config in enumerate(YOLO_OUT_ARCHITECTURE):
      if type(config) == tuple:
        out_f, slop = config
        model += [nn.Linear(in_features=x.shape[1], out_features=out_f), nn.LeakyReLU(negative_slope=slop)]
        x = model[-1](model[-2](x))
      else:
        p = config
        model += [nn.Dropout(p=0.5)]
        x = model[-1](x)

    self.setOutShape(x.shape)
    self.setModel(model)

  def forward(self, x):
    for layer in self.getModel():
      x = layer(x)
    return x

In [65]:
YoloOutput((16, 1024, 7, 7)).unittest_backward()

__verbose__: nnModule > unittest_backward: @@@ test backward
example_mse_loss: <class 'torch.Tensor'> tensor(0.3051, grad_fn=<MseLossBackward0>)
backward finish, the weights (state) of instance CAN changed!
parameters():
<class 'torch.nn.parameter.Parameter'> torch.Size([1024, 1024, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024, 1024, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024, 1024, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024, 1024, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024])
<class 'torch.nn.parameter.Parameter'> torch.Size([1024])
<class 'torch.nn.parameter.Paramete

##### YoloV1

In [86]:
class YoloV1(nnModule):
  """End-to-end YOLO network"""

  def __init__(self, in_shape: tuple):
    super(YoloV1, self).__init__()
    self.setInShape(in_shape)

    x = torch.rand(in_shape[0], in_shape[1], in_shape[2], in_shape[3])
    yolo_backbone = YoloBackbone(in_shape)
    x = yolo_backbone(x)
    yolo_output = YoloOutput(in_shape=x.shape)
    x = yolo_output(x)

    self.setOutShape(x.shape)
    model = nn.ModuleList()
    model += [yolo_backbone, yolo_output]
    self.setModel(model)

  def forward(self, x):
    for layer in self.getModel():
      x = layer(x)
    return x

In [67]:
YoloV1((16, 3, 224, 224)).unittest_backward()

__verbose__: nnModule > unittest_backward: @@@ test backward
example_mse_loss: <class 'torch.Tensor'> tensor(0.3054, grad_fn=<MseLossBackward0>)
backward finish, the weights (state) of instance CAN changed!
parameters():
<class 'torch.nn.parameter.Parameter'> torch.Size([64, 3, 7, 7])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([64])
<class 'torch.nn.parameter.Parameter'> torch.Size([192, 64, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([192])
<class 'torch.nn.parameter.Parameter'> torch.Size([192])
<class 'torch.nn.parameter.Parameter'> torch.Size([128, 192, 1, 1])
<class 'torch.nn.parameter.Parameter'> torch.Size([128])
<class 'torch.nn.parameter.Parameter'> torch.Size([128])
<class 'torch.nn.parameter.Parameter'> torch.Size([128, 128, 1, 1])
<class 'torch.nn.parameter.Parameter'> torch.Size([256, 128, 3, 3])
<class 'torch.nn.parameter.Parameter'> torch.Size([256, 256, 1, 1])
<class 'torch.nn.parameter.Paramete

### YoloLoss

In [87]:
def intersection_over_union(boxes_preds, boxes_labels, box_format='midpoint'):
  """
  Calculates intersection over union

  Parameters:
      boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
      boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
      box_format (str): midpoint/corners, if boxes are (x,y,w,h) or (x1,y1,x2,y2) respectively.

  Returns:
      tensor: Intersection over union for all examples
  """
  # boxes_preds shape is (N, 4)
  # boxes_labels shape is (N, 4)

  if box_format == 'midpoint':
      box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
      box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
      box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
      box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2

      box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
      box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
      box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
      box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2

  if box_format == 'corners':
      box1_x1 = boxes_preds[..., 0:1]
      box1_y1 = boxes_preds[..., 1:2]
      box1_x2 = boxes_preds[..., 2:3]
      box1_y2 = boxes_preds[..., 3:4]

      box2_x1 = boxes_labels[..., 0:1]
      box2_y1 = boxes_labels[..., 1:2]
      box2_x2 = boxes_labels[..., 2:3]
      box2_y2 = boxes_labels[..., 3:4]

  x1 = torch.max(box1_x1, box2_x1)
  y1 = torch.max(box1_y1, box2_y1)
  x2 = torch.min(box1_x2, box2_x2)
  y2 = torch.min(box1_y2, box2_y2)

  #&&& .clamp(0) is for the case when they don't intersect. Since when they don't intersect, one of these will be negative so that should become 0
  intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
  box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
  box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
  return intersection / (box1_area + box2_area - intersection + 1e-6)


In [69]:
EPS = 1e-6
def sign_sqrt(pred):
  return torch.sign(pred) * torch.sqrt(torch.abs(pred + EPS))
#&&& tai 0 khong co dao ham cua abs

In [109]:
class YoloLoss(nn.Module):
  def __init__(self, coord_c=5, noobj_c=0.5):
    super(YoloLoss, self).__init__()
    self.COORD = coord_c
    self.NOOBJ = noobj_c
    self.mse = nn.MSELoss(reduction="sum")

  def setLoss(self, some_loss): self.some_loss = some_loss
  def getLoss(self): return self.some_loss

  def forward(self, predictions: torch.Tensor, target: torch.Tensor) -> torch.Tensor:
    predictions = predictions.reshape((-1, GRID_SIZE, GRID_SIZE, NUM_BOXES * 5 + NUM_CLASSES))
    exists_box = target[..., [4]]
    iou_b1 = intersection_over_union(
        predictions[...,0:4], target[..., 0:4])
    iou_b2 = intersection_over_union(
        predictions[..., 5:9], target[..., 0:4])
    bestbox = torch.where(iou_b1 >= iou_b2, 0, 1)

    # class loss
    class_loss = self.mse(
      exists_box * predictions[..., 10:],
      exists_box * target[..., 5:])

    # obj loss
    pred_box = (
        (1-bestbox) * predictions[..., [4]] + (bestbox) * predictions[..., [9]]
    )
    object_loss = self.mse(
      exists_box * pred_box,
      exists_box * target[..., [4]]
    )

    # coor loss
    pred_box = (
        (1-bestbox) * predictions[..., 0:4] + (bestbox) * predictions[..., 5:9]
    )
    true_box = target[..., 0:4]
    pred_box[..., 2:4] = sign_sqrt(pred_box[..., 2:4])
    true_box[..., 2:4] = sign_sqrt(true_box[..., 2:4])
    coor_loss = self.mse(
      #exists_box * pred_box, end_dim=-2),
      #exists_box * true_box, end_dim=-2),
      exists_box * pred_box, exists_box * true_box
    )

    # no obj loss
    no_obj_loss = self.mse(
      (1 - exists_box) * predictions[..., [4]], (1 - exists_box) * target[..., [4]]
    )
    no_obj_loss += self.mse(
      (1 - exists_box) * predictions[..., [9]], (1 - exists_box) * target[..., [4]]
    )
    self.setLoss((class_loss, coor_loss, object_loss, no_obj_loss))
    return class_loss + object_loss + self.COORD * coor_loss + self.NOOBJ * no_obj_loss

  def unittest_loss_backloss(self):
    mesVerbose(True, "@@@ test loss and backloss", "YoloLoss > unittest_loss_backloss:")
    model = YoloV1(in_shape=IN_SHAPE)
    x = torch.rand(*IN_SHAPE)
    out = model(x)
    y = torch.rand(*OUT_SHAPE)
    loss = self.forward(out, y)

    print("example_loss:", type(loss), loss)
    optimizer = optim.Adam(list(model.parameters()), lr=2e-5, weight_decay=0)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print("loss and backloss finish")


In [91]:
YoloLoss().unittest_loss_backloss()

__verbose__: YoloLoss > unittest_loss_backloss: @@@ test loss and backloss
example_loss: <class 'torch.Tensor'> tensor(595.8547, grad_fn=<AddBackward0>)
loss and backloss finish


### DataLoad

In [101]:
import os
from xml.etree import ElementTree
import tensorflow as tf
from tqdm import tqdm
from functools import partial
from keras.preprocessing.image import load_img, img_to_array

class_names = ['apple', 'banana', 'orange']

class DataLoad(data.Dataset):
  def utest_loaddata(self):
    mesVerbose(True, "@@@ test load data", "DataLoad(nn.Dataset) > utest_loaddata:")
    print("repeat:", self.repeat)
    print("is aug:", self.aug)
    over(self.imgs, "imgs = ")
    over(self.labels, "labels = ")

  def utest_getdata(self):
    mesVerbose(True, "@@@ test get data", "DataLoad(nn.Dataset) > utest_getdata:")
    x, y = self.__getitem__(0)
    over(x, "x = ")
    over(y, "y = ")

  def __init__(self, file_dir, repeat, aug=False) -> None:
    super().__init__()
    self.repeat, self.aug = repeat, aug
    dataframe = self.get_dataframe(file_dir=file_dir)
    self.imgs, self.labels = self.load_dataset(dataframe, input_shape=(224, 224, 3), #!!!
                                                grid_size=GRID_SIZE) # np.ndarray
    # repeat
    for i in range(repeat):
      self.imgs = np.concatenate((self.imgs, self.imgs), axis=0)
      self.labels = np.concatenate((self.labels, self.labels), axis=0)
    # aug
    if(aug == True):
      for i, img in enumerate(self.imgs):
        label = self.labels[i]
        self.imgs[i], self.labels[i] = self._apply_augmentation(img, label, seed=RANDOM_STATE)

  def __len__(self):
    return len(self.imgs)

  def __getitem__(self, idx):
      x, y = self.imgs[idx], self.labels[idx] # np.ndarray
      x, y = tf.convert_to_tensor(x), tf.convert_to_tensor(y) # tf.tensor
      x = torch.tensor(x.numpy(), dtype=torch.float32)  # torch.tensor
      y = torch.tensor(y.numpy(), dtype=torch.float32)
      return x, y


  def get_dataframe(self, file_dir):
    """
    Get the train/val/test dataframe which contains image
    file names and annotations files. If `phase = train',
    return train and val set
    :param file_dir: File directory to create dataframe
    :return file_df: Train or test dataframe
    """

    img_files = [os.path.join(file_dir, img_file) for img_file
                 in sorted(os.listdir(file_dir)) if img_file[-4:] == '.jpg']
    annot_files = [img_file[:-4] + '.xml' for img_file in img_files]

    img_file_series = pd.Series(img_files, name='Image_file')
    annot_file_series = pd.Series(annot_files, name='Annotation_file')
    file_df = pd.DataFrame(pd.concat([img_file_series, annot_file_series], axis=1))
    return file_df

  def prepare_image(self, filename, input_shape):
    """
    Resize image to expected dimension, and opt. apply some random transformation.
    :param filename: File name
    :param input_shape: Shape expected by the model (image will be resize accordingly)
    :return : 3D image array, pixel values from [0., 1.]
    """

    img = img_to_array(load_img(filename, target_size=input_shape)) / 255.
    img = np.einsum('ijk->kij', img)
    return img

  def convert_to_xywh(self, bboxes):
    """
    Convert list of (xmin, ymin, xmax, ymax) to
    (x_center, y_center, box_width, box_height)
    :param bboxes: List of bounding boxes, each has 4
    values (xmin, ymin, xmax, ymax)
    :return boxes: List of bounding boxes, each has 4
    values (x_center, y_center, box_width, box_height)
    """

    boxes = list()
    for box in bboxes:
        xmin, ymin, xmax, ymax = box

        # Compute width and height of box
        box_width = xmax - xmin
        box_height = ymax - ymin

        # Compute x, y center
        x_center = int(xmin + (box_width / 2))
        y_center = int(ymin + (box_height / 2))

        boxes.append((x_center, y_center, box_width, box_height))

    return boxes

  def extract_annotation_file(self, filename):
    """
    Extract bounding boxes from an annotation file
    :param filename: Annotation file name
    :return boxes: List of bounding boxes in image, each box has
    4 values (x_center, y_center, box_width, box_height)
    :return classes: List of classes in image
    :return width: Width of image
    :return height: Height of image
    """

    # Load and parse the file
    tree = ElementTree.parse(filename)
    # Get the root of the document
    root = tree.getroot()
    boxes = list()
    classes = list()

    # Extract each bounding box
    for box in root.findall('.//object'):
        cls = class_names.index(box.find('name').text)
        xmin = int(box.find('bndbox/xmin').text)
        ymin = int(box.find('bndbox/ymin').text)
        xmax = int(box.find('bndbox/xmax').text)
        ymax = int(box.find('bndbox/ymax').text)
        coors = (xmin, ymin, xmax, ymax)
        boxes.append(coors)
        classes.append(cls)

    boxes = self.convert_to_xywh(boxes)

    # Get width and height of an image
    width = int(root.find('.//size/width').text)
    height = int(root.find('.//size/height').text)

    # Some annotation files have set width and height by 0,
    # so we need to load image and get it width and height
    if (width == 0) or (height == 0):
        img = load_img(filename[:-4] + '.jpg')
        width, height = img.width, img.height

    return boxes, classes, width, height

  def convert_bboxes_to_tensor(self, bboxes, classes, img_width, img_height, grid_size=7):
    """
    Convert list of bounding boxes to tensor target
    :param bboxes: List of bounding boxes in image, each box has
    4 values (x_center, y_center, box_width, box_height)
    :param classes: List of class in image
    :param img_width: Image's width
    :param img_height: Image's height
    :param grid_size: Grid size
    :return target: Target tensor (grid_size x grid_size x (5 + num_classes))
    """

    num_classes = len(class_names)
    target = np.zeros(shape=(grid_size, grid_size, 5 + num_classes), dtype=np.float32)

    for idx, bbox in enumerate(bboxes):
        x_center, y_center, width, height = bbox

        # Compute size of each cell in grid
        cell_w, cell_h = img_width / grid_size, img_height / grid_size

        # Determine cell i, j of bounding box
        i, j = int(y_center / cell_h), int(x_center / cell_w)

        # Compute value of x_center and y_center in cell
        x, y = (x_center / cell_w) - j, (y_center / cell_h) - i

        # Normalize width and height of bounding box
        w_norm, h_norm = width / img_width, height / img_height

        # Add bounding box to tensor
        # Set x, y, w, h
        target[i, j, :4] += (x, y, w_norm, h_norm)
        # Set obj score
        target[i, j, 4] = 1.
        # Set class dist.
        target[i, j, 5 + classes[idx]] = 1.
    return target

  def load_dataset(self, dataframe, input_shape, grid_size=7):
    """
    Load img and target tensor
    :param dataframe: Dataframe contains img files and annotation files
    :param input_shape: Shape expected by the model (image will be resize accordingly)
    :param grid_size: Grid size
    :return dataset: Iterable dataset
    """

    imgs, targets = list(), list()

    for _, row in tqdm(dataframe.iterrows()):
        img = self.prepare_image(row.Image_file, input_shape)
        target = self.extract_annotation_file(row.Annotation_file)
        target = self.convert_bboxes_to_tensor(*target, grid_size)
        imgs.append(img)
        targets.append(target)

    imgs = np.array(imgs)
    targets = np.array(targets)
    return imgs, targets
    # dataset = tf.data.Dataset.from_tensor_slices((imgs, targets))
    # return dataset

  def _apply_augmentation(self, image, target, seed=None):
    """
    Apply random brightness and saturation on image
    :param image: Image to augment
    :param target: Target tensor
    :param seed: Seed for random operation
    :return : Processed data
    """

    # Random bright & saturation change
    image = tf.image.random_brightness(image, max_delta=0.1, seed=seed)
    image = tf.image.random_saturation(image, lower=0.5, upper=1.5, seed=seed)

    # Keeping pixel values in check
    image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=1.0)

    return image, target

  def load_dataset_from_df(self, dataframe, batch_size=32, num_repeat=None, shuffle=False,
                         input_shape=(448, 448, 3), grid_size=7, augment=False,
                         seed=None):
    """
    Instantiate dataset
    :param dataframe: Dataframe contains img files and annotation files
    :param batch_size: Batch size
    :param num_epochs: Number of epochs (to repeat the iteration - infinite if None)
    :param shuffle: Flag to shuffle the dataset (if True)
    :param input_shape: Shape of the processed image
    :param grid_size: Grid size
    :param augment: Flag to apply some random augmentations to the image
    :param seed: Random seed for operation
    :return : Iterable dataset
    """

    apply_augmentation = partial(self._apply_augmentation, seed=seed)
    dataset = self.load_dataset(dataframe, input_shape, grid_size)
    ### !!!
    dataset = dataset.repeat(num_repeat)
    if shuffle:
        dataset = dataset.shuffle(1000, seed)
    if augment:
        dataset = dataset.map(apply_augmentation, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

In [95]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [102]:
train_dir = '/content/drive/MyDrive/Colab Notebooks/My_Laptop_Data/fruits_dataset/train'
dataload = DataLoad(train_dir, aug=False, repeat=4)
train_df = dataload.get_dataframe(train_dir)

240it [00:04, 49.59it/s]


In [104]:
dataload.utest_loaddata()
dataload.utest_getdata()

__verbose__: DataLoad(nn.Dataset) > utest_loaddata: @@@ test load data
repeat: 4
is aug: False
__verbose__: imgs =  (<class 'numpy.ndarray'>, (3840, 3, 224, 224), '2312110240Bytes')
__verbose__: labels =  (<class 'numpy.ndarray'>, (3840, 7, 7, 8), '6021280Bytes')
__verbose__: DataLoad(nn.Dataset) > utest_getdata: @@@ test get data
__verbose__: x =  (<class 'torch.Tensor'>, torch.Size([3, 224, 224]), '80Bytes')
__verbose__: y =  (<class 'torch.Tensor'>, torch.Size([7, 7, 8]), '80Bytes')


In [31]:
# Assuming train_dataset is your training dataset
# train_loader = DataLoader(dataset=dataload, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True, drop_last=True, prefetch_factor=2)


In [105]:
# Assuming train_dataset is your training dataset
train_loader = data.DataLoader(dataset=dataload, batch_size=BATCH_SIZE, shuffle=True, drop_last=False)

In [106]:
over(train_loader, "train_loader=")

__verbose__: train_loader= (<class 'torch.utils.data.dataloader.DataLoader'>, 'no-shape', '48Bytes')


### training and testing

In [110]:
def train_fn(train_loader, model, optimizer, loss_fn):
  loop = tqdm(train_loader, leave=True)
  mean_loss = []

  for batch_idx, (x, y) in enumerate(loop):
    mesVerbose(True, "@@@ --- training loop ---", "train_fn:")
    x, y = x.to(DEVICE), y.to(DEVICE)
    out = model(x).to(DEVICE)
    loss = loss_fn(out, y).to(DEVICE)

    print("some_loss = ", loss_fn.getLoss())
    print("loss = ", loss)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  print(f"Mean loss of epoch was {sum(mean_loss) / len(mean_loss)}")

In [111]:
train_loader = data.DataLoader(dataset=dataload, batch_size=BATCH_SIZE, shuffle=True, drop_last=False)
model = YoloV1(in_shape=IN_SHAPE).to(DEVICE)
optimizer = optim.Adam(list(model.parameters()), lr=2e-5, weight_decay=0)
loss_fn = YoloLoss().to(DEVICE)

In [None]:
train_fn(train_loader, model, optimizer, loss_fn)

  0%|          | 0/960 [00:00<?, ?it/s]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.1005, grad_fn=<MseLossBackward0>), tensor(12.1832, grad_fn=<MseLossBackward0>), tensor(6.6087, grad_fn=<MseLossBackward0>), tensor(0.9023, grad_fn=<AddBackward0>))
loss =  tensor(75.0766, grad_fn=<AddBackward0>)


  0%|          | 1/960 [00:10<2:44:04, 10.27s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(9.8425, grad_fn=<MseLossBackward0>), tensor(12.2627, grad_fn=<MseLossBackward0>), tensor(9.5611, grad_fn=<MseLossBackward0>), tensor(1.4174, grad_fn=<AddBackward0>))
loss =  tensor(81.4257, grad_fn=<AddBackward0>)


  0%|          | 2/960 [00:17<2:16:15,  8.53s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(15.5544, grad_fn=<MseLossBackward0>), tensor(16.9474, grad_fn=<MseLossBackward0>), tensor(14.3654, grad_fn=<MseLossBackward0>), tensor(1.8594, grad_fn=<AddBackward0>))
loss =  tensor(115.5863, grad_fn=<AddBackward0>)


  0%|          | 3/960 [00:24<2:05:11,  7.85s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.4622, grad_fn=<MseLossBackward0>), tensor(5.7498, grad_fn=<MseLossBackward0>), tensor(3.4173, grad_fn=<MseLossBackward0>), tensor(2.9412, grad_fn=<AddBackward0>))
loss =  tensor(37.0993, grad_fn=<AddBackward0>)


  0%|          | 4/960 [00:31<2:00:17,  7.55s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.4199, grad_fn=<MseLossBackward0>), tensor(7.1768, grad_fn=<MseLossBackward0>), tensor(6.6364, grad_fn=<MseLossBackward0>), tensor(3.7139, grad_fn=<AddBackward0>))
loss =  tensor(50.7973, grad_fn=<AddBackward0>)


  1%|          | 5/960 [00:38<1:55:59,  7.29s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.5051, grad_fn=<MseLossBackward0>), tensor(9.6283, grad_fn=<MseLossBackward0>), tensor(5.2060, grad_fn=<MseLossBackward0>), tensor(4.9820, grad_fn=<AddBackward0>))
loss =  tensor(61.3437, grad_fn=<AddBackward0>)


  1%|          | 6/960 [00:45<1:56:42,  7.34s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.6361, grad_fn=<MseLossBackward0>), tensor(6.4325, grad_fn=<MseLossBackward0>), tensor(6.3931, grad_fn=<MseLossBackward0>), tensor(6.1146, grad_fn=<AddBackward0>))
loss =  tensor(48.2488, grad_fn=<AddBackward0>)


  1%|          | 7/960 [00:52<1:54:37,  7.22s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(11.3606, grad_fn=<MseLossBackward0>), tensor(14.7051, grad_fn=<MseLossBackward0>), tensor(10.6731, grad_fn=<MseLossBackward0>), tensor(6.0892, grad_fn=<AddBackward0>))
loss =  tensor(98.6039, grad_fn=<AddBackward0>)


  1%|          | 8/960 [01:00<1:54:28,  7.21s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.8289, grad_fn=<MseLossBackward0>), tensor(8.4887, grad_fn=<MseLossBackward0>), tensor(6.4916, grad_fn=<MseLossBackward0>), tensor(6.4910, grad_fn=<AddBackward0>))
loss =  tensor(59.0098, grad_fn=<AddBackward0>)


  1%|          | 9/960 [01:07<1:53:34,  7.17s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.7151, grad_fn=<MseLossBackward0>), tensor(4.4179, grad_fn=<MseLossBackward0>), tensor(7.2053, grad_fn=<MseLossBackward0>), tensor(8.3654, grad_fn=<AddBackward0>))
loss =  tensor(40.1925, grad_fn=<AddBackward0>)


  1%|          | 10/960 [01:14<1:53:48,  7.19s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.2671, grad_fn=<MseLossBackward0>), tensor(5.3679, grad_fn=<MseLossBackward0>), tensor(3.8683, grad_fn=<MseLossBackward0>), tensor(12.0098, grad_fn=<AddBackward0>))
loss =  tensor(39.9797, grad_fn=<AddBackward0>)


  1%|          | 11/960 [01:21<1:52:05,  7.09s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.4200, grad_fn=<MseLossBackward0>), tensor(1.6826, grad_fn=<MseLossBackward0>), tensor(3.3464, grad_fn=<MseLossBackward0>), tensor(10.4831, grad_fn=<AddBackward0>))
loss =  tensor(20.4210, grad_fn=<AddBackward0>)


  1%|▏         | 12/960 [01:29<1:59:04,  7.54s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.7593, grad_fn=<MseLossBackward0>), tensor(5.7892, grad_fn=<MseLossBackward0>), tensor(4.0791, grad_fn=<MseLossBackward0>), tensor(9.4731, grad_fn=<AddBackward0>))
loss =  tensor(43.5208, grad_fn=<AddBackward0>)


  1%|▏         | 13/960 [01:37<1:57:30,  7.45s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.9574, grad_fn=<MseLossBackward0>), tensor(7.0805, grad_fn=<MseLossBackward0>), tensor(6.7521, grad_fn=<MseLossBackward0>), tensor(10.9639, grad_fn=<AddBackward0>))
loss =  tensor(54.5941, grad_fn=<AddBackward0>)


  1%|▏         | 14/960 [01:44<1:54:56,  7.29s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(2.9030, grad_fn=<MseLossBackward0>), tensor(2.9617, grad_fn=<MseLossBackward0>), tensor(3.4661, grad_fn=<MseLossBackward0>), tensor(11.7590, grad_fn=<AddBackward0>))
loss =  tensor(27.0572, grad_fn=<AddBackward0>)


  2%|▏         | 15/960 [01:51<1:53:39,  7.22s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(12.0976, grad_fn=<MseLossBackward0>), tensor(12.0223, grad_fn=<MseLossBackward0>), tensor(8.8133, grad_fn=<MseLossBackward0>), tensor(8.8743, grad_fn=<AddBackward0>))
loss =  tensor(85.4593, grad_fn=<AddBackward0>)


  2%|▏         | 16/960 [01:58<1:52:30,  7.15s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(16.6057, grad_fn=<MseLossBackward0>), tensor(18.2832, grad_fn=<MseLossBackward0>), tensor(15.4628, grad_fn=<MseLossBackward0>), tensor(10.3314, grad_fn=<AddBackward0>))
loss =  tensor(128.6501, grad_fn=<AddBackward0>)


  2%|▏         | 17/960 [02:05<1:51:46,  7.11s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.5495, grad_fn=<MseLossBackward0>), tensor(9.8846, grad_fn=<MseLossBackward0>), tensor(7.4567, grad_fn=<MseLossBackward0>), tensor(11.7607, grad_fn=<AddBackward0>))
loss =  tensor(70.3097, grad_fn=<AddBackward0>)


  2%|▏         | 18/960 [02:12<1:51:35,  7.11s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(10.4467, grad_fn=<MseLossBackward0>), tensor(8.9702, grad_fn=<MseLossBackward0>), tensor(8.0823, grad_fn=<MseLossBackward0>), tensor(10.0928, grad_fn=<AddBackward0>))
loss =  tensor(68.4264, grad_fn=<AddBackward0>)


  2%|▏         | 19/960 [02:20<1:56:28,  7.43s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(11.6881, grad_fn=<MseLossBackward0>), tensor(11.8008, grad_fn=<MseLossBackward0>), tensor(8.3630, grad_fn=<MseLossBackward0>), tensor(10.0998, grad_fn=<AddBackward0>))
loss =  tensor(84.1052, grad_fn=<AddBackward0>)


  2%|▏         | 20/960 [02:27<1:54:13,  7.29s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.7680, grad_fn=<MseLossBackward0>), tensor(2.6611, grad_fn=<MseLossBackward0>), tensor(2.2824, grad_fn=<MseLossBackward0>), tensor(10.4533, grad_fn=<AddBackward0>))
loss =  tensor(24.5825, grad_fn=<AddBackward0>)


  2%|▏         | 21/960 [02:34<1:53:19,  7.24s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.8554, grad_fn=<MseLossBackward0>), tensor(6.2886, grad_fn=<MseLossBackward0>), tensor(6.4888, grad_fn=<MseLossBackward0>), tensor(12.9257, grad_fn=<AddBackward0>))
loss =  tensor(52.2502, grad_fn=<AddBackward0>)


  2%|▏         | 22/960 [02:43<2:00:14,  7.69s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.3443, grad_fn=<MseLossBackward0>), tensor(4.1024, grad_fn=<MseLossBackward0>), tensor(3.8092, grad_fn=<MseLossBackward0>), tensor(10.8305, grad_fn=<AddBackward0>))
loss =  tensor(34.0808, grad_fn=<AddBackward0>)


  2%|▏         | 23/960 [02:50<1:58:23,  7.58s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.8026, grad_fn=<MseLossBackward0>), tensor(2.8924, grad_fn=<MseLossBackward0>), tensor(4.0461, grad_fn=<MseLossBackward0>), tensor(13.2471, grad_fn=<AddBackward0>))
loss =  tensor(28.9341, grad_fn=<AddBackward0>)


  2%|▎         | 24/960 [02:57<1:54:44,  7.35s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.9271, grad_fn=<MseLossBackward0>), tensor(2.6806, grad_fn=<MseLossBackward0>), tensor(2.1391, grad_fn=<MseLossBackward0>), tensor(10.9879, grad_fn=<AddBackward0>))
loss =  tensor(24.9632, grad_fn=<AddBackward0>)


  3%|▎         | 25/960 [03:04<1:54:16,  7.33s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.7840, grad_fn=<MseLossBackward0>), tensor(9.7662, grad_fn=<MseLossBackward0>), tensor(5.9846, grad_fn=<MseLossBackward0>), tensor(12.0241, grad_fn=<AddBackward0>))
loss =  tensor(68.6118, grad_fn=<AddBackward0>)


  3%|▎         | 26/960 [03:11<1:52:04,  7.20s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.5689, grad_fn=<MseLossBackward0>), tensor(1.2356, grad_fn=<MseLossBackward0>), tensor(3.3488, grad_fn=<MseLossBackward0>), tensor(14.1934, grad_fn=<AddBackward0>))
loss =  tensor(21.1922, grad_fn=<AddBackward0>)


  3%|▎         | 27/960 [03:18<1:51:57,  7.20s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(9.1278, grad_fn=<MseLossBackward0>), tensor(7.9704, grad_fn=<MseLossBackward0>), tensor(7.8128, grad_fn=<MseLossBackward0>), tensor(11.6307, grad_fn=<AddBackward0>))
loss =  tensor(62.6077, grad_fn=<AddBackward0>)


  3%|▎         | 28/960 [03:25<1:49:47,  7.07s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.6286, grad_fn=<MseLossBackward0>), tensor(8.0336, grad_fn=<MseLossBackward0>), tensor(6.2711, grad_fn=<MseLossBackward0>), tensor(13.9957, grad_fn=<AddBackward0>))
loss =  tensor(59.0655, grad_fn=<AddBackward0>)


  3%|▎         | 29/960 [03:32<1:48:38,  7.00s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.6161, grad_fn=<MseLossBackward0>), tensor(1.6525, grad_fn=<MseLossBackward0>), tensor(2.6201, grad_fn=<MseLossBackward0>), tensor(11.6006, grad_fn=<AddBackward0>))
loss =  tensor(20.2989, grad_fn=<AddBackward0>)


  3%|▎         | 30/960 [03:39<1:49:00,  7.03s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.8029, grad_fn=<MseLossBackward0>), tensor(5.2918, grad_fn=<MseLossBackward0>), tensor(4.6656, grad_fn=<MseLossBackward0>), tensor(13.0920, grad_fn=<AddBackward0>))
loss =  tensor(43.4735, grad_fn=<AddBackward0>)


  3%|▎         | 31/960 [03:46<1:49:07,  7.05s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.6738, grad_fn=<MseLossBackward0>), tensor(6.5697, grad_fn=<MseLossBackward0>), tensor(7.0899, grad_fn=<MseLossBackward0>), tensor(11.4306, grad_fn=<AddBackward0>))
loss =  tensor(53.3272, grad_fn=<AddBackward0>)


  3%|▎         | 32/960 [03:53<1:49:25,  7.08s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(11.4431, grad_fn=<MseLossBackward0>), tensor(8.6464, grad_fn=<MseLossBackward0>), tensor(9.5685, grad_fn=<MseLossBackward0>), tensor(9.9500, grad_fn=<AddBackward0>))
loss =  tensor(69.2185, grad_fn=<AddBackward0>)


  3%|▎         | 33/960 [04:00<1:48:29,  7.02s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.7722, grad_fn=<MseLossBackward0>), tensor(3.0848, grad_fn=<MseLossBackward0>), tensor(4.0640, grad_fn=<MseLossBackward0>), tensor(9.3592, grad_fn=<AddBackward0>))
loss =  tensor(28.9396, grad_fn=<AddBackward0>)


  4%|▎         | 34/960 [04:07<1:49:44,  7.11s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(9.5617, grad_fn=<MseLossBackward0>), tensor(7.9692, grad_fn=<MseLossBackward0>), tensor(9.4646, grad_fn=<MseLossBackward0>), tensor(10.1204, grad_fn=<AddBackward0>))
loss =  tensor(63.9327, grad_fn=<AddBackward0>)


  4%|▎         | 35/960 [04:14<1:48:44,  7.05s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.8890, grad_fn=<MseLossBackward0>), tensor(1.5291, grad_fn=<MseLossBackward0>), tensor(2.2672, grad_fn=<MseLossBackward0>), tensor(9.3205, grad_fn=<AddBackward0>))
loss =  tensor(19.4618, grad_fn=<AddBackward0>)


  4%|▍         | 36/960 [04:21<1:49:08,  7.09s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(9.6910, grad_fn=<MseLossBackward0>), tensor(6.8340, grad_fn=<MseLossBackward0>), tensor(9.0305, grad_fn=<MseLossBackward0>), tensor(12.2163, grad_fn=<AddBackward0>))
loss =  tensor(58.9996, grad_fn=<AddBackward0>)


  4%|▍         | 37/960 [04:28<1:46:17,  6.91s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.7607, grad_fn=<MseLossBackward0>), tensor(6.0595, grad_fn=<MseLossBackward0>), tensor(5.2308, grad_fn=<MseLossBackward0>), tensor(9.0927, grad_fn=<AddBackward0>))
loss =  tensor(46.8353, grad_fn=<AddBackward0>)


  4%|▍         | 38/960 [04:35<1:49:00,  7.09s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.1077, grad_fn=<MseLossBackward0>), tensor(6.0342, grad_fn=<MseLossBackward0>), tensor(4.2094, grad_fn=<MseLossBackward0>), tensor(10.3251, grad_fn=<AddBackward0>))
loss =  tensor(44.6508, grad_fn=<AddBackward0>)


  4%|▍         | 39/960 [04:42<1:46:14,  6.92s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.7246, grad_fn=<MseLossBackward0>), tensor(4.0813, grad_fn=<MseLossBackward0>), tensor(3.8147, grad_fn=<MseLossBackward0>), tensor(7.6571, grad_fn=<AddBackward0>))
loss =  tensor(31.7741, grad_fn=<AddBackward0>)


  4%|▍         | 40/960 [04:50<1:49:51,  7.16s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.7436, grad_fn=<MseLossBackward0>), tensor(7.8431, grad_fn=<MseLossBackward0>), tensor(7.7992, grad_fn=<MseLossBackward0>), tensor(9.0631, grad_fn=<AddBackward0>))
loss =  tensor(59.2900, grad_fn=<AddBackward0>)


  4%|▍         | 41/960 [04:56<1:47:22,  7.01s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(13.5066, grad_fn=<MseLossBackward0>), tensor(11.6634, grad_fn=<MseLossBackward0>), tensor(10.7775, grad_fn=<MseLossBackward0>), tensor(12.7442, grad_fn=<AddBackward0>))
loss =  tensor(88.9733, grad_fn=<AddBackward0>)


  4%|▍         | 42/960 [05:04<1:51:12,  7.27s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.4516, grad_fn=<MseLossBackward0>), tensor(6.3792, grad_fn=<MseLossBackward0>), tensor(5.8584, grad_fn=<MseLossBackward0>), tensor(11.2956, grad_fn=<AddBackward0>))
loss =  tensor(48.8538, grad_fn=<AddBackward0>)


  4%|▍         | 43/960 [05:11<1:46:41,  6.98s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.4311, grad_fn=<MseLossBackward0>), tensor(4.5930, grad_fn=<MseLossBackward0>), tensor(5.8003, grad_fn=<MseLossBackward0>), tensor(10.6373, grad_fn=<AddBackward0>))
loss =  tensor(40.5151, grad_fn=<AddBackward0>)


  5%|▍         | 44/960 [05:18<1:50:01,  7.21s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.6755, grad_fn=<MseLossBackward0>), tensor(7.9056, grad_fn=<MseLossBackward0>), tensor(7.3496, grad_fn=<MseLossBackward0>), tensor(12.7761, grad_fn=<AddBackward0>))
loss =  tensor(59.9413, grad_fn=<AddBackward0>)


  5%|▍         | 45/960 [05:25<1:46:34,  6.99s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.2376, grad_fn=<MseLossBackward0>), tensor(4.5857, grad_fn=<MseLossBackward0>), tensor(3.9524, grad_fn=<MseLossBackward0>), tensor(11.2016, grad_fn=<AddBackward0>))
loss =  tensor(39.7195, grad_fn=<AddBackward0>)


  5%|▍         | 46/960 [05:32<1:49:09,  7.17s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.2148, grad_fn=<MseLossBackward0>), tensor(1.7056, grad_fn=<MseLossBackward0>), tensor(5.1784, grad_fn=<MseLossBackward0>), tensor(9.8340, grad_fn=<AddBackward0>))
loss =  tensor(22.8384, grad_fn=<AddBackward0>)


  5%|▍         | 47/960 [05:39<1:46:08,  6.97s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.4315, grad_fn=<MseLossBackward0>), tensor(4.6080, grad_fn=<MseLossBackward0>), tensor(5.0381, grad_fn=<MseLossBackward0>), tensor(8.5540, grad_fn=<AddBackward0>))
loss =  tensor(37.7867, grad_fn=<AddBackward0>)


  5%|▌         | 48/960 [05:47<1:49:05,  7.18s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.4777, grad_fn=<MseLossBackward0>), tensor(3.7224, grad_fn=<MseLossBackward0>), tensor(6.5737, grad_fn=<MseLossBackward0>), tensor(7.1673, grad_fn=<AddBackward0>))
loss =  tensor(36.2473, grad_fn=<AddBackward0>)


  5%|▌         | 49/960 [05:53<1:45:46,  6.97s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(10.9086, grad_fn=<MseLossBackward0>), tensor(10.5664, grad_fn=<MseLossBackward0>), tensor(10.4974, grad_fn=<MseLossBackward0>), tensor(11.1694, grad_fn=<AddBackward0>))
loss =  tensor(79.8224, grad_fn=<AddBackward0>)


  5%|▌         | 50/960 [06:01<1:48:49,  7.18s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.8300, grad_fn=<MseLossBackward0>), tensor(5.3253, grad_fn=<MseLossBackward0>), tensor(6.7186, grad_fn=<MseLossBackward0>), tensor(8.2121, grad_fn=<AddBackward0>))
loss =  tensor(45.2811, grad_fn=<AddBackward0>)


  5%|▌         | 51/960 [06:07<1:46:42,  7.04s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.4340, grad_fn=<MseLossBackward0>), tensor(4.3308, grad_fn=<MseLossBackward0>), tensor(7.3770, grad_fn=<MseLossBackward0>), tensor(10.2239, grad_fn=<AddBackward0>))
loss =  tensor(42.5771, grad_fn=<AddBackward0>)


  5%|▌         | 52/960 [06:15<1:48:53,  7.20s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(10.2879, grad_fn=<MseLossBackward0>), tensor(5.6794, grad_fn=<MseLossBackward0>), tensor(7.5539, grad_fn=<MseLossBackward0>), tensor(10.3796, grad_fn=<AddBackward0>))
loss =  tensor(51.4286, grad_fn=<AddBackward0>)


  6%|▌         | 53/960 [06:22<1:46:16,  7.03s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.4067, grad_fn=<MseLossBackward0>), tensor(3.5784, grad_fn=<MseLossBackward0>), tensor(4.1186, grad_fn=<MseLossBackward0>), tensor(8.2017, grad_fn=<AddBackward0>))
loss =  tensor(32.5183, grad_fn=<AddBackward0>)


  6%|▌         | 54/960 [06:29<1:48:43,  7.20s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(12.4448, grad_fn=<MseLossBackward0>), tensor(8.4181, grad_fn=<MseLossBackward0>), tensor(12.2789, grad_fn=<MseLossBackward0>), tensor(8.0166, grad_fn=<AddBackward0>))
loss =  tensor(70.8223, grad_fn=<AddBackward0>)


  6%|▌         | 55/960 [06:38<1:54:37,  7.60s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.8780, grad_fn=<MseLossBackward0>), tensor(1.8749, grad_fn=<MseLossBackward0>), tensor(3.7662, grad_fn=<MseLossBackward0>), tensor(7.3712, grad_fn=<AddBackward0>))
loss =  tensor(20.7042, grad_fn=<AddBackward0>)


  6%|▌         | 56/960 [06:45<1:50:59,  7.37s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.1837, grad_fn=<MseLossBackward0>), tensor(3.9074, grad_fn=<MseLossBackward0>), tensor(4.5518, grad_fn=<MseLossBackward0>), tensor(6.4701, grad_fn=<AddBackward0>))
loss =  tensor(32.5074, grad_fn=<AddBackward0>)


  6%|▌         | 57/960 [06:52<1:49:49,  7.30s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.7737, grad_fn=<MseLossBackward0>), tensor(2.3063, grad_fn=<MseLossBackward0>), tensor(3.9026, grad_fn=<MseLossBackward0>), tensor(6.2125, grad_fn=<AddBackward0>))
loss =  tensor(23.3141, grad_fn=<AddBackward0>)


  6%|▌         | 58/960 [06:58<1:47:19,  7.14s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.9429, grad_fn=<MseLossBackward0>), tensor(7.1064, grad_fn=<MseLossBackward0>), tensor(8.9982, grad_fn=<MseLossBackward0>), tensor(4.8011, grad_fn=<AddBackward0>))
loss =  tensor(53.8733, grad_fn=<AddBackward0>)


  6%|▌         | 59/960 [07:05<1:46:38,  7.10s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.8287, grad_fn=<MseLossBackward0>), tensor(3.6674, grad_fn=<MseLossBackward0>), tensor(6.9544, grad_fn=<MseLossBackward0>), tensor(6.4754, grad_fn=<AddBackward0>))
loss =  tensor(35.3576, grad_fn=<AddBackward0>)


  6%|▋         | 60/960 [07:12<1:44:54,  6.99s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.9438, grad_fn=<MseLossBackward0>), tensor(4.6432, grad_fn=<MseLossBackward0>), tensor(5.7989, grad_fn=<MseLossBackward0>), tensor(5.7133, grad_fn=<AddBackward0>))
loss =  tensor(36.8153, grad_fn=<AddBackward0>)


  6%|▋         | 61/960 [07:19<1:45:34,  7.05s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.8458, grad_fn=<MseLossBackward0>), tensor(2.2270, grad_fn=<MseLossBackward0>), tensor(2.1332, grad_fn=<MseLossBackward0>), tensor(6.0474, grad_fn=<AddBackward0>))
loss =  tensor(21.1377, grad_fn=<AddBackward0>)


  6%|▋         | 62/960 [07:26<1:44:30,  6.98s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.1194, grad_fn=<MseLossBackward0>), tensor(8.3978, grad_fn=<MseLossBackward0>), tensor(9.1226, grad_fn=<MseLossBackward0>), tensor(5.5068, grad_fn=<AddBackward0>))
loss =  tensor(60.9844, grad_fn=<AddBackward0>)


  7%|▋         | 63/960 [07:33<1:45:09,  7.03s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.5866, grad_fn=<MseLossBackward0>), tensor(2.8270, grad_fn=<MseLossBackward0>), tensor(4.8881, grad_fn=<MseLossBackward0>), tensor(5.8638, grad_fn=<AddBackward0>))
loss =  tensor(27.5418, grad_fn=<AddBackward0>)


  7%|▋         | 64/960 [07:40<1:43:17,  6.92s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.2876, grad_fn=<MseLossBackward0>), tensor(6.2332, grad_fn=<MseLossBackward0>), tensor(5.8896, grad_fn=<MseLossBackward0>), tensor(4.2930, grad_fn=<AddBackward0>))
loss =  tensor(45.4895, grad_fn=<AddBackward0>)


  7%|▋         | 65/960 [07:47<1:44:02,  6.97s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.2628, grad_fn=<MseLossBackward0>), tensor(0.7718, grad_fn=<MseLossBackward0>), tensor(3.2535, grad_fn=<MseLossBackward0>), tensor(6.1333, grad_fn=<AddBackward0>))
loss =  tensor(13.4419, grad_fn=<AddBackward0>)


  7%|▋         | 66/960 [07:54<1:42:56,  6.91s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(13.1749, grad_fn=<MseLossBackward0>), tensor(8.4898, grad_fn=<MseLossBackward0>), tensor(11.8241, grad_fn=<MseLossBackward0>), tensor(8.7053, grad_fn=<AddBackward0>))
loss =  tensor(71.8006, grad_fn=<AddBackward0>)


  7%|▋         | 67/960 [08:01<1:43:20,  6.94s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.2495, grad_fn=<MseLossBackward0>), tensor(4.4076, grad_fn=<MseLossBackward0>), tensor(6.3701, grad_fn=<MseLossBackward0>), tensor(6.3255, grad_fn=<AddBackward0>))
loss =  tensor(38.8203, grad_fn=<AddBackward0>)


  7%|▋         | 68/960 [08:08<1:42:52,  6.92s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.4904, grad_fn=<MseLossBackward0>), tensor(4.6505, grad_fn=<MseLossBackward0>), tensor(6.0179, grad_fn=<MseLossBackward0>), tensor(5.8304, grad_fn=<AddBackward0>))
loss =  tensor(37.6762, grad_fn=<AddBackward0>)


  7%|▋         | 69/960 [08:15<1:43:41,  6.98s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.8522, grad_fn=<MseLossBackward0>), tensor(3.6173, grad_fn=<MseLossBackward0>), tensor(4.5294, grad_fn=<MseLossBackward0>), tensor(8.4871, grad_fn=<AddBackward0>))
loss =  tensor(32.7119, grad_fn=<AddBackward0>)


  7%|▋         | 70/960 [08:22<1:42:51,  6.93s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.6427, grad_fn=<MseLossBackward0>), tensor(6.8342, grad_fn=<MseLossBackward0>), tensor(7.1877, grad_fn=<MseLossBackward0>), tensor(6.6042, grad_fn=<AddBackward0>))
loss =  tensor(53.3034, grad_fn=<AddBackward0>)


  7%|▋         | 71/960 [08:29<1:44:15,  7.04s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(2.5027, grad_fn=<MseLossBackward0>), tensor(2.5331, grad_fn=<MseLossBackward0>), tensor(2.8515, grad_fn=<MseLossBackward0>), tensor(5.9897, grad_fn=<AddBackward0>))
loss =  tensor(21.0144, grad_fn=<AddBackward0>)


  8%|▊         | 72/960 [08:36<1:42:42,  6.94s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.0637, grad_fn=<MseLossBackward0>), tensor(2.7221, grad_fn=<MseLossBackward0>), tensor(2.3089, grad_fn=<MseLossBackward0>), tensor(9.5386, grad_fn=<AddBackward0>))
loss =  tensor(23.7526, grad_fn=<AddBackward0>)


  8%|▊         | 73/960 [08:43<1:44:51,  7.09s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(10.1100, grad_fn=<MseLossBackward0>), tensor(10.7198, grad_fn=<MseLossBackward0>), tensor(11.7164, grad_fn=<MseLossBackward0>), tensor(7.6676, grad_fn=<AddBackward0>))
loss =  tensor(79.2593, grad_fn=<AddBackward0>)


  8%|▊         | 74/960 [08:50<1:42:38,  6.95s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(2.5845, grad_fn=<MseLossBackward0>), tensor(3.0105, grad_fn=<MseLossBackward0>), tensor(2.2000, grad_fn=<MseLossBackward0>), tensor(8.9313, grad_fn=<AddBackward0>))
loss =  tensor(24.3028, grad_fn=<AddBackward0>)


  8%|▊         | 75/960 [08:57<1:45:33,  7.16s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.7922, grad_fn=<MseLossBackward0>), tensor(6.0769, grad_fn=<MseLossBackward0>), tensor(5.9722, grad_fn=<MseLossBackward0>), tensor(6.3764, grad_fn=<AddBackward0>))
loss =  tensor(47.3372, grad_fn=<AddBackward0>)


  8%|▊         | 76/960 [09:04<1:42:53,  6.98s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.1056, grad_fn=<MseLossBackward0>), tensor(2.3160, grad_fn=<MseLossBackward0>), tensor(2.4352, grad_fn=<MseLossBackward0>), tensor(7.3423, grad_fn=<AddBackward0>))
loss =  tensor(21.7920, grad_fn=<AddBackward0>)


  8%|▊         | 77/960 [09:12<1:45:28,  7.17s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.8297, grad_fn=<MseLossBackward0>), tensor(3.2039, grad_fn=<MseLossBackward0>), tensor(2.9471, grad_fn=<MseLossBackward0>), tensor(11.0192, grad_fn=<AddBackward0>))
loss =  tensor(28.3060, grad_fn=<AddBackward0>)


  8%|▊         | 78/960 [09:18<1:42:26,  6.97s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.8314, grad_fn=<MseLossBackward0>), tensor(9.4914, grad_fn=<MseLossBackward0>), tensor(8.7063, grad_fn=<MseLossBackward0>), tensor(8.9236, grad_fn=<AddBackward0>))
loss =  tensor(69.4563, grad_fn=<AddBackward0>)


  8%|▊         | 79/960 [09:26<1:44:51,  7.14s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.9109, grad_fn=<MseLossBackward0>), tensor(5.7593, grad_fn=<MseLossBackward0>), tensor(4.4014, grad_fn=<MseLossBackward0>), tensor(6.3199, grad_fn=<AddBackward0>))
loss =  tensor(40.2687, grad_fn=<AddBackward0>)


  8%|▊         | 80/960 [09:32<1:41:19,  6.91s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.5546, grad_fn=<MseLossBackward0>), tensor(3.0854, grad_fn=<MseLossBackward0>), tensor(5.5748, grad_fn=<MseLossBackward0>), tensor(6.4832, grad_fn=<AddBackward0>))
loss =  tensor(28.7979, grad_fn=<AddBackward0>)


  8%|▊         | 81/960 [09:39<1:43:41,  7.08s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(10.5672, grad_fn=<MseLossBackward0>), tensor(6.7676, grad_fn=<MseLossBackward0>), tensor(10.3054, grad_fn=<MseLossBackward0>), tensor(6.3069, grad_fn=<AddBackward0>))
loss =  tensor(57.8638, grad_fn=<AddBackward0>)


  9%|▊         | 82/960 [09:46<1:40:13,  6.85s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.1023, grad_fn=<MseLossBackward0>), tensor(1.4599, grad_fn=<MseLossBackward0>), tensor(2.3084, grad_fn=<MseLossBackward0>), tensor(9.9274, grad_fn=<AddBackward0>))
loss =  tensor(17.6739, grad_fn=<AddBackward0>)


  9%|▊         | 83/960 [09:53<1:43:03,  7.05s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.5465, grad_fn=<MseLossBackward0>), tensor(4.3883, grad_fn=<MseLossBackward0>), tensor(4.7967, grad_fn=<MseLossBackward0>), tensor(7.6416, grad_fn=<AddBackward0>))
loss =  tensor(34.1056, grad_fn=<AddBackward0>)


  9%|▉         | 84/960 [10:00<1:39:50,  6.84s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.4018, grad_fn=<MseLossBackward0>), tensor(2.8247, grad_fn=<MseLossBackward0>), tensor(4.5256, grad_fn=<MseLossBackward0>), tensor(10.9971, grad_fn=<AddBackward0>))
loss =  tensor(29.5495, grad_fn=<AddBackward0>)


  9%|▉         | 85/960 [10:07<1:42:59,  7.06s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.1036, grad_fn=<MseLossBackward0>), tensor(2.7430, grad_fn=<MseLossBackward0>), tensor(4.6562, grad_fn=<MseLossBackward0>), tensor(7.0613, grad_fn=<AddBackward0>))
loss =  tensor(28.0054, grad_fn=<AddBackward0>)


  9%|▉         | 86/960 [10:14<1:40:04,  6.87s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(9.0066, grad_fn=<MseLossBackward0>), tensor(5.8365, grad_fn=<MseLossBackward0>), tensor(6.7470, grad_fn=<MseLossBackward0>), tensor(6.3933, grad_fn=<AddBackward0>))
loss =  tensor(48.1325, grad_fn=<AddBackward0>)


  9%|▉         | 87/960 [10:21<1:43:27,  7.11s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.7435, grad_fn=<MseLossBackward0>), tensor(5.3456, grad_fn=<MseLossBackward0>), tensor(3.8574, grad_fn=<MseLossBackward0>), tensor(6.2370, grad_fn=<AddBackward0>))
loss =  tensor(38.4476, grad_fn=<AddBackward0>)


  9%|▉         | 88/960 [10:28<1:40:02,  6.88s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.3999, grad_fn=<MseLossBackward0>), tensor(6.9632, grad_fn=<MseLossBackward0>), tensor(9.1782, grad_fn=<MseLossBackward0>), tensor(5.4781, grad_fn=<AddBackward0>))
loss =  tensor(55.1331, grad_fn=<AddBackward0>)


  9%|▉         | 89/960 [10:37<1:49:27,  7.54s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.8196, grad_fn=<MseLossBackward0>), tensor(3.4281, grad_fn=<MseLossBackward0>), tensor(3.7831, grad_fn=<MseLossBackward0>), tensor(6.7163, grad_fn=<AddBackward0>))
loss =  tensor(29.1014, grad_fn=<AddBackward0>)


  9%|▉         | 90/960 [10:43<1:44:50,  7.23s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.4143, grad_fn=<MseLossBackward0>), tensor(7.5514, grad_fn=<MseLossBackward0>), tensor(5.7832, grad_fn=<MseLossBackward0>), tensor(8.9085, grad_fn=<AddBackward0>))
loss =  tensor(56.4090, grad_fn=<AddBackward0>)


  9%|▉         | 91/960 [10:51<1:47:42,  7.44s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.1200, grad_fn=<MseLossBackward0>), tensor(3.1577, grad_fn=<MseLossBackward0>), tensor(5.4017, grad_fn=<MseLossBackward0>), tensor(7.6497, grad_fn=<AddBackward0>))
loss =  tensor(30.1351, grad_fn=<AddBackward0>)


 10%|▉         | 92/960 [10:58<1:43:20,  7.14s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.9071, grad_fn=<MseLossBackward0>), tensor(4.5782, grad_fn=<MseLossBackward0>), tensor(5.6217, grad_fn=<MseLossBackward0>), tensor(6.3130, grad_fn=<AddBackward0>))
loss =  tensor(37.5763, grad_fn=<AddBackward0>)


 10%|▉         | 93/960 [11:05<1:45:51,  7.33s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(7.0289, grad_fn=<MseLossBackward0>), tensor(7.4431, grad_fn=<MseLossBackward0>), tensor(6.9462, grad_fn=<MseLossBackward0>), tensor(7.7002, grad_fn=<AddBackward0>))
loss =  tensor(55.0406, grad_fn=<AddBackward0>)


 10%|▉         | 94/960 [11:12<1:41:44,  7.05s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(11.0107, grad_fn=<MseLossBackward0>), tensor(7.0781, grad_fn=<MseLossBackward0>), tensor(9.8581, grad_fn=<MseLossBackward0>), tensor(5.4145, grad_fn=<AddBackward0>))
loss =  tensor(58.9666, grad_fn=<AddBackward0>)


 10%|▉         | 95/960 [11:20<1:44:51,  7.27s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.7591, grad_fn=<MseLossBackward0>), tensor(4.6628, grad_fn=<MseLossBackward0>), tensor(5.0511, grad_fn=<MseLossBackward0>), tensor(6.5233, grad_fn=<AddBackward0>))
loss =  tensor(37.3860, grad_fn=<AddBackward0>)


 10%|█         | 96/960 [11:26<1:41:03,  7.02s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(10.3877, grad_fn=<MseLossBackward0>), tensor(8.9363, grad_fn=<MseLossBackward0>), tensor(9.1651, grad_fn=<MseLossBackward0>), tensor(5.2081, grad_fn=<AddBackward0>))
loss =  tensor(66.8386, grad_fn=<AddBackward0>)


 10%|█         | 97/960 [11:34<1:44:45,  7.28s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(2.7704, grad_fn=<MseLossBackward0>), tensor(2.4430, grad_fn=<MseLossBackward0>), tensor(2.9698, grad_fn=<MseLossBackward0>), tensor(7.8039, grad_fn=<AddBackward0>))
loss =  tensor(21.8572, grad_fn=<AddBackward0>)


 10%|█         | 98/960 [11:41<1:41:32,  7.07s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.3573, grad_fn=<MseLossBackward0>), tensor(3.9644, grad_fn=<MseLossBackward0>), tensor(4.0065, grad_fn=<MseLossBackward0>), tensor(6.6689, grad_fn=<AddBackward0>))
loss =  tensor(31.5204, grad_fn=<AddBackward0>)


 10%|█         | 99/960 [11:48<1:44:57,  7.31s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(11.4678, grad_fn=<MseLossBackward0>), tensor(7.5915, grad_fn=<MseLossBackward0>), tensor(9.1564, grad_fn=<MseLossBackward0>), tensor(4.3406, grad_fn=<AddBackward0>))
loss =  tensor(60.7519, grad_fn=<AddBackward0>)


 10%|█         | 100/960 [11:55<1:40:59,  7.05s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.9293, grad_fn=<MseLossBackward0>), tensor(2.3770, grad_fn=<MseLossBackward0>), tensor(3.6361, grad_fn=<MseLossBackward0>), tensor(5.7540, grad_fn=<AddBackward0>))
loss =  tensor(23.3276, grad_fn=<AddBackward0>)


 11%|█         | 101/960 [12:03<1:44:17,  7.29s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(9.5948, grad_fn=<MseLossBackward0>), tensor(8.4576, grad_fn=<MseLossBackward0>), tensor(9.1352, grad_fn=<MseLossBackward0>), tensor(5.0769, grad_fn=<AddBackward0>))
loss =  tensor(63.5567, grad_fn=<AddBackward0>)


 11%|█         | 102/960 [12:09<1:41:04,  7.07s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.6574, grad_fn=<MseLossBackward0>), tensor(3.2424, grad_fn=<MseLossBackward0>), tensor(8.1967, grad_fn=<MseLossBackward0>), tensor(4.6258, grad_fn=<AddBackward0>))
loss =  tensor(32.3787, grad_fn=<AddBackward0>)


 11%|█         | 103/960 [12:17<1:44:25,  7.31s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.2032, grad_fn=<MseLossBackward0>), tensor(1.4546, grad_fn=<MseLossBackward0>), tensor(2.7303, grad_fn=<MseLossBackward0>), tensor(6.1247, grad_fn=<AddBackward0>))
loss =  tensor(17.2688, grad_fn=<AddBackward0>)


 11%|█         | 104/960 [12:24<1:41:32,  7.12s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.5166, grad_fn=<MseLossBackward0>), tensor(3.3635, grad_fn=<MseLossBackward0>), tensor(6.9716, grad_fn=<MseLossBackward0>), tensor(5.4154, grad_fn=<AddBackward0>))
loss =  tensor(32.0134, grad_fn=<AddBackward0>)


 11%|█         | 105/960 [12:32<1:44:12,  7.31s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.3960, grad_fn=<MseLossBackward0>), tensor(3.0746, grad_fn=<MseLossBackward0>), tensor(4.0887, grad_fn=<MseLossBackward0>), tensor(6.0785, grad_fn=<AddBackward0>))
loss =  tensor(28.8971, grad_fn=<AddBackward0>)


 11%|█         | 106/960 [12:38<1:39:49,  7.01s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.1122, grad_fn=<MseLossBackward0>), tensor(5.3081, grad_fn=<MseLossBackward0>), tensor(5.9876, grad_fn=<MseLossBackward0>), tensor(6.0460, grad_fn=<AddBackward0>))
loss =  tensor(43.6634, grad_fn=<AddBackward0>)


 11%|█         | 107/960 [12:46<1:42:58,  7.24s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.2245, grad_fn=<MseLossBackward0>), tensor(1.8792, grad_fn=<MseLossBackward0>), tensor(4.3225, grad_fn=<MseLossBackward0>), tensor(5.5186, grad_fn=<AddBackward0>))
loss =  tensor(22.7022, grad_fn=<AddBackward0>)


 11%|█▏        | 108/960 [12:52<1:39:50,  7.03s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.3145, grad_fn=<MseLossBackward0>), tensor(2.3373, grad_fn=<MseLossBackward0>), tensor(2.9825, grad_fn=<MseLossBackward0>), tensor(7.0577, grad_fn=<AddBackward0>))
loss =  tensor(23.5124, grad_fn=<AddBackward0>)


 11%|█▏        | 109/960 [13:00<1:43:10,  7.27s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.4157, grad_fn=<MseLossBackward0>), tensor(1.1371, grad_fn=<MseLossBackward0>), tensor(2.6622, grad_fn=<MseLossBackward0>), tensor(8.8457, grad_fn=<AddBackward0>))
loss =  tensor(18.1865, grad_fn=<AddBackward0>)


 11%|█▏        | 110/960 [13:07<1:40:20,  7.08s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.8121, grad_fn=<MseLossBackward0>), tensor(5.3345, grad_fn=<MseLossBackward0>), tensor(5.4557, grad_fn=<MseLossBackward0>), tensor(6.4731, grad_fn=<AddBackward0>))
loss =  tensor(41.1769, grad_fn=<AddBackward0>)


 12%|█▏        | 111/960 [13:14<1:42:59,  7.28s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.2600, grad_fn=<MseLossBackward0>), tensor(3.2180, grad_fn=<MseLossBackward0>), tensor(3.2404, grad_fn=<MseLossBackward0>), tensor(4.7867, grad_fn=<AddBackward0>))
loss =  tensor(25.9837, grad_fn=<AddBackward0>)


 12%|█▏        | 112/960 [13:21<1:40:03,  7.08s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.5752, grad_fn=<MseLossBackward0>), tensor(3.3063, grad_fn=<MseLossBackward0>), tensor(2.8121, grad_fn=<MseLossBackward0>), tensor(5.4162, grad_fn=<AddBackward0>))
loss =  tensor(26.6272, grad_fn=<AddBackward0>)


 12%|█▏        | 113/960 [13:29<1:42:45,  7.28s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.1483, grad_fn=<MseLossBackward0>), tensor(4.3824, grad_fn=<MseLossBackward0>), tensor(3.6237, grad_fn=<MseLossBackward0>), tensor(6.0917, grad_fn=<AddBackward0>))
loss =  tensor(32.7300, grad_fn=<AddBackward0>)


 12%|█▏        | 114/960 [13:35<1:40:11,  7.11s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(9.1782, grad_fn=<MseLossBackward0>), tensor(3.9565, grad_fn=<MseLossBackward0>), tensor(7.2767, grad_fn=<MseLossBackward0>), tensor(5.9837, grad_fn=<AddBackward0>))
loss =  tensor(39.2293, grad_fn=<AddBackward0>)


 12%|█▏        | 115/960 [13:43<1:41:17,  7.19s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.8790, grad_fn=<MseLossBackward0>), tensor(2.4557, grad_fn=<MseLossBackward0>), tensor(2.5811, grad_fn=<MseLossBackward0>), tensor(5.3420, grad_fn=<AddBackward0>))
loss =  tensor(21.4094, grad_fn=<AddBackward0>)


 12%|█▏        | 116/960 [13:50<1:39:09,  7.05s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(12.5185, grad_fn=<MseLossBackward0>), tensor(7.6561, grad_fn=<MseLossBackward0>), tensor(9.4949, grad_fn=<MseLossBackward0>), tensor(5.5775, grad_fn=<AddBackward0>))
loss =  tensor(63.0828, grad_fn=<AddBackward0>)


 12%|█▏        | 117/960 [13:57<1:40:38,  7.16s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(11.0092, grad_fn=<MseLossBackward0>), tensor(7.0123, grad_fn=<MseLossBackward0>), tensor(10.0882, grad_fn=<MseLossBackward0>), tensor(5.3367, grad_fn=<AddBackward0>))
loss =  tensor(58.8272, grad_fn=<AddBackward0>)


 12%|█▏        | 118/960 [14:04<1:38:56,  7.05s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.8297, grad_fn=<MseLossBackward0>), tensor(3.2087, grad_fn=<MseLossBackward0>), tensor(4.4584, grad_fn=<MseLossBackward0>), tensor(5.1691, grad_fn=<AddBackward0>))
loss =  tensor(28.9163, grad_fn=<AddBackward0>)


 12%|█▏        | 119/960 [14:11<1:40:00,  7.13s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.7607, grad_fn=<MseLossBackward0>), tensor(4.1780, grad_fn=<MseLossBackward0>), tensor(4.3662, grad_fn=<MseLossBackward0>), tensor(5.5343, grad_fn=<AddBackward0>))
loss =  tensor(34.7843, grad_fn=<AddBackward0>)


 12%|█▎        | 120/960 [14:18<1:38:23,  7.03s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(10.8619, grad_fn=<MseLossBackward0>), tensor(9.7161, grad_fn=<MseLossBackward0>), tensor(10.7138, grad_fn=<MseLossBackward0>), tensor(4.9190, grad_fn=<AddBackward0>))
loss =  tensor(72.6158, grad_fn=<AddBackward0>)


 13%|█▎        | 121/960 [14:26<1:41:33,  7.26s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.3729, grad_fn=<MseLossBackward0>), tensor(3.0904, grad_fn=<MseLossBackward0>), tensor(6.1279, grad_fn=<MseLossBackward0>), tensor(4.6186, grad_fn=<AddBackward0>))
loss =  tensor(30.2621, grad_fn=<AddBackward0>)


 13%|█▎        | 122/960 [14:33<1:42:38,  7.35s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.7150, grad_fn=<MseLossBackward0>), tensor(4.8754, grad_fn=<MseLossBackward0>), tensor(6.8039, grad_fn=<MseLossBackward0>), tensor(5.4012, grad_fn=<AddBackward0>))
loss =  tensor(40.5966, grad_fn=<AddBackward0>)


 13%|█▎        | 123/960 [14:40<1:39:39,  7.14s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.0472, grad_fn=<MseLossBackward0>), tensor(2.0225, grad_fn=<MseLossBackward0>), tensor(2.9045, grad_fn=<MseLossBackward0>), tensor(7.7059, grad_fn=<AddBackward0>))
loss =  tensor(21.9173, grad_fn=<AddBackward0>)


 13%|█▎        | 124/960 [14:47<1:39:14,  7.12s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.3634, grad_fn=<MseLossBackward0>), tensor(2.8385, grad_fn=<MseLossBackward0>), tensor(3.5993, grad_fn=<MseLossBackward0>), tensor(4.4468, grad_fn=<AddBackward0>))
loss =  tensor(25.3786, grad_fn=<AddBackward0>)


 13%|█▎        | 125/960 [14:54<1:37:24,  7.00s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(6.4146, grad_fn=<MseLossBackward0>), tensor(1.9396, grad_fn=<MseLossBackward0>), tensor(4.1993, grad_fn=<MseLossBackward0>), tensor(5.6979, grad_fn=<AddBackward0>))
loss =  tensor(23.1607, grad_fn=<AddBackward0>)


 13%|█▎        | 126/960 [15:01<1:38:46,  7.11s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(5.4592, grad_fn=<MseLossBackward0>), tensor(1.9571, grad_fn=<MseLossBackward0>), tensor(4.3621, grad_fn=<MseLossBackward0>), tensor(6.7974, grad_fn=<AddBackward0>))
loss =  tensor(23.0054, grad_fn=<AddBackward0>)


 13%|█▎        | 127/960 [15:08<1:37:14,  7.00s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.5831, grad_fn=<MseLossBackward0>), tensor(0.3152, grad_fn=<MseLossBackward0>), tensor(2.7089, grad_fn=<MseLossBackward0>), tensor(6.3324, grad_fn=<AddBackward0>))
loss =  tensor(11.0340, grad_fn=<AddBackward0>)


 13%|█▎        | 128/960 [15:15<1:38:41,  7.12s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.4541, grad_fn=<MseLossBackward0>), tensor(5.7055, grad_fn=<MseLossBackward0>), tensor(4.8401, grad_fn=<MseLossBackward0>), tensor(7.2186, grad_fn=<AddBackward0>))
loss =  tensor(45.4308, grad_fn=<AddBackward0>)


 13%|█▎        | 129/960 [15:22<1:36:21,  6.96s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(2.0927, grad_fn=<MseLossBackward0>), tensor(2.3312, grad_fn=<MseLossBackward0>), tensor(2.4470, grad_fn=<MseLossBackward0>), tensor(5.0877, grad_fn=<AddBackward0>))
loss =  tensor(18.7393, grad_fn=<AddBackward0>)


 14%|█▎        | 130/960 [15:29<1:37:41,  7.06s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(4.8014, grad_fn=<MseLossBackward0>), tensor(3.7430, grad_fn=<MseLossBackward0>), tensor(5.0586, grad_fn=<MseLossBackward0>), tensor(5.5609, grad_fn=<AddBackward0>))
loss =  tensor(31.3556, grad_fn=<AddBackward0>)


 14%|█▎        | 131/960 [15:36<1:35:00,  6.88s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.0165, grad_fn=<MseLossBackward0>), tensor(5.1103, grad_fn=<MseLossBackward0>), tensor(6.1276, grad_fn=<MseLossBackward0>), tensor(6.4891, grad_fn=<AddBackward0>))
loss =  tensor(42.9400, grad_fn=<AddBackward0>)


 14%|█▍        | 132/960 [15:43<1:36:53,  7.02s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.2119, grad_fn=<MseLossBackward0>), tensor(4.7470, grad_fn=<MseLossBackward0>), tensor(4.2139, grad_fn=<MseLossBackward0>), tensor(8.0792, grad_fn=<AddBackward0>))
loss =  tensor(35.2002, grad_fn=<AddBackward0>)


 14%|█▍        | 133/960 [15:49<1:34:18,  6.84s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.2580, grad_fn=<MseLossBackward0>), tensor(7.5886, grad_fn=<MseLossBackward0>), tensor(7.5619, grad_fn=<MseLossBackward0>), tensor(5.0213, grad_fn=<AddBackward0>))
loss =  tensor(56.2736, grad_fn=<AddBackward0>)


 14%|█▍        | 134/960 [15:57<1:37:20,  7.07s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(3.0253, grad_fn=<MseLossBackward0>), tensor(1.5843, grad_fn=<MseLossBackward0>), tensor(1.6620, grad_fn=<MseLossBackward0>), tensor(6.8294, grad_fn=<AddBackward0>))
loss =  tensor(16.0234, grad_fn=<AddBackward0>)


 14%|█▍        | 135/960 [16:03<1:35:06,  6.92s/it]

__verbose__: train_fn: @@@ --- training loop ---
some_loss =  (tensor(8.1957, grad_fn=<MseLossBackward0>), tensor(4.2775, grad_fn=<MseLossBackward0>), tensor(6.0589, grad_fn=<MseLossBackward0>), tensor(9.0396, grad_fn=<AddBackward0>))
loss =  tensor(40.1619, grad_fn=<AddBackward0>)


### End