In [1]:
import numpy as np

In [2]:
def create_grid(grid_size):
    grid_x, grid_y = np.meshgrid(np.arange(grid_size), np.arange(grid_size))
    return grid_x, grid_y

def set_grid_xy_np(grid_size):
    grid_x, grid_y = create_grid(grid_size)
    grid_x = np.broadcast_to((np.reshape(grid_x, (1, grid_size, grid_size,1))), (1,grid_size, grid_size,3))
    grid_y = np.broadcast_to((np.reshape(grid_y, (1, grid_size, grid_size, 1))), (1,grid_size,grid_size,3))
    return grid_x, grid_y

In [3]:
set_grid_xy_np(19)

(array([[[[ 0,  0,  0],
          [ 1,  1,  1],
          [ 2,  2,  2],
          ...,
          [16, 16, 16],
          [17, 17, 17],
          [18, 18, 18]],
 
         [[ 0,  0,  0],
          [ 1,  1,  1],
          [ 2,  2,  2],
          ...,
          [16, 16, 16],
          [17, 17, 17],
          [18, 18, 18]],
 
         [[ 0,  0,  0],
          [ 1,  1,  1],
          [ 2,  2,  2],
          ...,
          [16, 16, 16],
          [17, 17, 17],
          [18, 18, 18]],
 
         ...,
 
         [[ 0,  0,  0],
          [ 1,  1,  1],
          [ 2,  2,  2],
          ...,
          [16, 16, 16],
          [17, 17, 17],
          [18, 18, 18]],
 
         [[ 0,  0,  0],
          [ 1,  1,  1],
          [ 2,  2,  2],
          ...,
          [16, 16, 16],
          [17, 17, 17],
          [18, 18, 18]],
 
         [[ 0,  0,  0],
          [ 1,  1,  1],
          [ 2,  2,  2],
          ...,
          [16, 16, 16],
          [17, 17, 17],
          [18, 18, 18]]]]),
 array([[[[

In [4]:
import torch

In [5]:
def set_grid(grid_size:int):
  grid_x, grid_y = torch.meshgrid((torch.arange(grid_size), torch.arange(grid_size)), indexing='ij')
  return grid_x, grid_y

def set_grid_xy(grid_size:int):
  #Set/generate the co-ordinates for each grid cell of final output featuer map's size
  grid_x, grid_y =  set_grid(grid_size=grid_size)
  grid_x = grid_x.contiguous().view(1, grid_size, grid_size, 1).expand(1,grid_size, grid_size,3) #Here, '.contiguous()' : This creates a new tensor with the same data but in a contiguous memory layout. The new tensor is a copy and not a view.
  grid_y = grid_y.contiguous().view(1, grid_size, grid_size, 1).expand(1,grid_size, grid_size,3) #Shape of both grid is : [1,13,13,1]
  return grid_y, grid_x

In [6]:
set_grid_xy(19)

(tensor([[[[ 0,  0,  0],
           [ 1,  1,  1],
           [ 2,  2,  2],
           ...,
           [16, 16, 16],
           [17, 17, 17],
           [18, 18, 18]],
 
          [[ 0,  0,  0],
           [ 1,  1,  1],
           [ 2,  2,  2],
           ...,
           [16, 16, 16],
           [17, 17, 17],
           [18, 18, 18]],
 
          [[ 0,  0,  0],
           [ 1,  1,  1],
           [ 2,  2,  2],
           ...,
           [16, 16, 16],
           [17, 17, 17],
           [18, 18, 18]],
 
          ...,
 
          [[ 0,  0,  0],
           [ 1,  1,  1],
           [ 2,  2,  2],
           ...,
           [16, 16, 16],
           [17, 17, 17],
           [18, 18, 18]],
 
          [[ 0,  0,  0],
           [ 1,  1,  1],
           [ 2,  2,  2],
           ...,
           [16, 16, 16],
           [17, 17, 17],
           [18, 18, 18]],
 
          [[ 0,  0,  0],
           [ 1,  1,  1],
           [ 2,  2,  2],
           ...,
           [16, 16, 16],
           [17, 17, 17

In [7]:
from board_inference.tools import sigmoid
anchors = [
            [116,90, 156,198, 373,326],
            [30,61, 62,45, 59,119],
            [10,13, 16,30, 33,23]]

In [8]:
def decode(pred_boxes, grids, scale):
    # print(f'shape of preds received in decode: {pred_boxes.shape}')
    trans_pred = np.transpose(pred_boxes, (0,2,3,1)).reshape(1,grids[scale], grids[scale], 3, 85)
    trans_pred = np.concatenate((trans_pred[..., 0:1],trans_pred[..., 1:2],trans_pred[..., 2:3],trans_pred[..., 3:4],trans_pred[..., 4:5],trans_pred[..., 5:]), axis=-1)
  
    conf = trans_pred[..., 4]
    x_c = trans_pred[..., 0]
    y_c = trans_pred[..., 1]
    wid = trans_pred[..., 2]
    hei = trans_pred[..., 3]
    cls_prob = trans_pred[..., 5:]
    
    scale_anchor = np.array(anchors[scale])
    scale_anchor_width = np.broadcast_to((np.reshape(scale_anchor[::2], (1,1,1,3))), (1,grids[scale], grids[scale], 3))
    scale_anchor_height = np.broadcast_to((np.reshape(scale_anchor[1::2], (1,1,1,3))), (1, grids[scale], grids[scale], 3))
    
    grid_x, grid_y = set_grid_xy_np(grid_size=grids[scale])
    conf = sigmoid(conf)
    x_center = sigmoid(x_c) + grid_x
    y_center = sigmoid(y_c) + grid_y
    width = np.exp(wid) * scale_anchor_width
    height = np.exp(hei) * scale_anchor_height
    
    coordinates = np.stack((x_center, y_center, width, height, conf), axis=-1)
    coordinates = np.concatenate((coordinates, cls_prob), axis=-1)
    return coordinates

In [9]:
def transform_predicted_txtytwth(bboxes:torch.Tensor, grid_size:list, device:torch.device, scale:str):
  """
  This function converts the co-ordinates of bounding boxes relative to grid cell into the co-ordinates of bboxes relative to anchor boxes.

  Parameters/Arguments:
    bboxes: Co-ordinates of bounding boxes i.e. [[..., to, tx, ty, w, h, class_prob]].
    anchors: Pre-defined boxes to predict bounding boxes.
    grid_size: size of final grid/feature map.
    device: It is the device on which predictions are located.

  Returns the co-ordinates of bounding boxes in the format (x_center, y_center, width, height).
  """
  #First extract the co-ordinates of bboxes.
  
  tx = bboxes[..., 0]
  ty = bboxes[..., 1]
  w = bboxes[..., 2]
  h = bboxes[..., 3] #shape: [batch_size, gird_x, grid_y, no. of anchor boxes]
  to = bboxes[..., 4]
  c = bboxes[..., 5:]
  # print(f"shape of outputs : {tx.shape, ty.shape, w.shape, h.shape}")
  anchors = [
            [116,90, 156,198, 373,326],
            [30,61, 62,45, 59,119],
            [10,13, 16,30, 33,23]]
  large_scale_anchor = torch.tensor(anchors[0][:])
  large_scale_anchor_w = large_scale_anchor[::2].view(1,1,1,3).expand(1,19,19,3).to(device)
  large_scale_anchor_h = large_scale_anchor[1::2].view(1,1,1,3).expand(1,19,19,3).to(device)
  # print(f"shape of large anchor w: {large_scale_anchor_w.shape}, {large_scale_anchor_w}")
  
  medium_scale_anchor = torch.tensor(anchors[1][:])
  medium_scale_anchor_w = medium_scale_anchor[::2].view(1,1,1,3).expand(1,38,38,3).to(device)
  medium_scale_anchor_h = medium_scale_anchor[1::2].view(1,1,1,3).expand(1,38,38,3).to(device)
  small_scale_anchor = torch.tensor(anchors[2][:])
  small_scale_anchor_w = small_scale_anchor[::2].view(1,1,1,3).expand(1,76,76,3).to(device)
  small_scale_anchor_h = small_scale_anchor[1::2].view(1,1,1,3).expand(1,76,76,3).to(device)

  #Now convert the predicted co-ordinates of bboxes into format (x_center, y_center, width, height) according paper: https://arxiv.org/abs/1612.08242 Figure3
  if scale == "large":
    grid_x_large, grid_y_large = set_grid_xy(grid_size=grid_size[0])
    # print(f"grid_x:  {grid_x_large.shape}")
    # print(f"grid_y: {grid_y_large}, {grid_y_large.shape}")
    to = torch.sigmoid(to)
    x_c = torch.sigmoid(tx) + grid_x_large.to(device)
    y_c = torch.sigmoid(ty) + grid_y_large.to(device)
    w = torch.exp(w) *large_scale_anchor_w
    h = torch.exp(h) * large_scale_anchor_h

    coordinates = torch.stack((x_c, y_c, w, h, to), dim=-1) #here, stack will stack in the co-ordinates into column/verrically (adds in last dimension) and its shape is: [169, 5]+[169, 5]+[169, 5]+[169, 5] = [169, 5, 4] ---> this means we have 5 bounding boxes with 4 co-ordinates in each 169 pixels/grid_cell of predicted/final output layer.
    return torch.cat((coordinates, c), dim=-1)
  
  if scale == "medium":
    grid_x_medium, grid_y_medium  = set_grid_xy(grid_size=grid_size[1])
    to = torch.sigmoid(to)
    x_c = torch.sigmoid(tx) + grid_x_medium.to(device)
    y_c = torch.sigmoid(ty) + grid_y_medium.to(device)
    w = torch.exp(w) *medium_scale_anchor_w
    h = torch.exp(h) * medium_scale_anchor_h
    coordinates = torch.stack((x_c, y_c, w, h, to), dim=-1) #here, stack will stack in the co-ordinates into column/verrically (adds in last dimension) and its shape is: [169, 5]+[169, 5]+[169, 5]+[169, 5] = [169, 5, 4] ---> this means we have 5 bounding boxes with 4 co-ordinates in each 169 pixels/grid_cell of predicted/final output layer.
    return torch.cat((coordinates, c), dim=-1)

  if scale == "small":
    grid_x_small, grid_y_small = set_grid_xy(grid_size=grid_size[2])
    to = torch.sigmoid(to)
    x_c = torch.sigmoid(tx) + grid_x_small.to(device)
    y_c = torch.sigmoid(ty) + grid_y_small.to(device)
    w = torch.exp(w) *small_scale_anchor_w
    h = torch.exp(h) * small_scale_anchor_h

    coordinates = torch.stack((x_c, y_c, w, h, to), dim=-1) #here, stack will stack in the co-ordinates into column/verrically (adds in last dimension) and its shape is: [169, 5]+[169, 5]+[169, 5]+[169, 5] = [169, 5, 4] ---> this means we have 5 bounding boxes with 4 co-ordinates in each 169 pixels/grid_cell of predicted/final output layer.
    return torch.cat((coordinates, c), dim=-1)

In [10]:
grid = [19,38,76]

In [11]:
import pickle
with open('yolov3_model_preds_large.pkl', 'rb') as f:
    large_boxes = pickle.load(f)

In [12]:
large_box_torch = torch.from_numpy(large_boxes)

In [13]:
large_boxes.shape, large_box_torch.shape

((1, 255, 19, 19), torch.Size([1, 255, 19, 19]))

In [14]:
stage1_out = large_box_torch.permute(0,2,3,1).view(1, grid[0], grid[0], 3, 85)
stage1_confidence = stage1_out[..., 0:1]
stage1_x_center = stage1_out[..., 1:2]
stage1_y_center = stage1_out[..., 2:3]
stage1_width = stage1_out[..., 3:4]
stage1_height = stage1_out[..., 4:5]
stage1_class = stage1_out[..., 5:]
large_scale_prediction = torch.cat((stage1_confidence, stage1_x_center, stage1_y_center, stage1_width, stage1_height, stage1_class), dim=-1)

In [15]:
large_scale_prediction

tensor([[[[[-6.6131e-01,  1.2269e-01,  2.0438e-01,  ..., -6.3020e+00,
            -6.7725e+00, -6.5205e+00],
           [-1.1413e-01, -4.5040e-01, -4.1537e-01,  ..., -7.4727e+00,
            -6.3311e+00, -5.7697e+00],
           [ 9.0114e-02, -1.5121e-01, -4.4365e-02,  ..., -5.9864e+00,
            -5.1295e+00, -4.6727e+00]],

          [[ 1.4814e+00,  8.3989e-01, -1.9034e-02,  ..., -7.3016e+00,
            -8.9832e+00, -9.2903e+00],
           [ 9.6497e-01, -8.7730e-01, -2.2197e-01,  ..., -8.3006e+00,
            -8.4210e+00, -8.1073e+00],
           [ 8.1820e-01,  2.8035e-02, -1.5133e-01,  ..., -8.1420e+00,
            -6.9334e+00, -6.6426e+00]],

          [[ 4.3651e-01,  3.6127e-01,  3.7698e-01,  ..., -8.4452e+00,
            -1.1298e+01, -1.2068e+01],
           [ 8.4074e-01, -8.2581e-01, -1.3653e-02,  ..., -9.6648e+00,
            -1.0813e+01, -1.0271e+01],
           [ 1.2284e+00, -2.6527e-02,  8.8607e-02,  ..., -1.0332e+01,
            -8.9867e+00, -9.0961e+00]],

          ...

In [16]:
tens = transform_predicted_txtytwth(large_scale_prediction, grid, torch.device('cpu'),'large')

In [17]:
tens

tensor([[[[[ 3.4045e-01,  5.3063e-01,  1.4230e+02,  ..., -6.3020e+00,
            -6.7725e+00, -6.5205e+00],
           [ 4.7150e-01,  3.8927e-01,  1.0297e+02,  ..., -7.4727e+00,
            -6.3311e+00, -5.7697e+00],
           [ 5.2251e-01,  4.6227e-01,  3.5681e+02,  ..., -5.9864e+00,
            -5.1295e+00, -4.6727e+00]],

          [[ 1.8148e+00,  6.9844e-01,  1.1381e+02,  ..., -7.3016e+00,
            -8.9832e+00, -9.2903e+00],
           [ 1.7241e+00,  2.9374e-01,  1.2495e+02,  ..., -8.3006e+00,
            -8.4210e+00, -8.1073e+00],
           [ 1.6939e+00,  5.0701e-01,  3.2062e+02,  ..., -8.1420e+00,
            -6.9334e+00, -6.6426e+00]],

          [[ 2.6074e+00,  5.8935e-01,  1.6911e+02,  ..., -8.4452e+00,
            -1.1298e+01, -1.2068e+01],
           [ 2.6986e+00,  3.0453e-01,  1.5388e+02,  ..., -9.6648e+00,
            -1.0813e+01, -1.0271e+01],
           [ 2.7735e+00,  4.9337e-01,  4.0756e+02,  ..., -1.0332e+01,
            -8.9867e+00, -9.0961e+00]],

          ...

In [18]:
arr = decode(large_boxes, grid, 0)

In [19]:
arr

array([[[[[ 3.40446204e-01,  5.30634701e-01,  1.42304924e+02, ...,
           -6.30198288e+00, -6.77245331e+00, -6.52050352e+00],
          [ 4.71499294e-01,  3.89265090e-01,  1.02974891e+02, ...,
           -7.47267342e+00, -6.33111763e+00, -5.76968050e+00],
          [ 5.22513330e-01,  4.62268561e-01,  3.56813649e+02, ...,
           -5.98641205e+00, -5.12946844e+00, -4.67272663e+00]],

         [[ 1.81478554e+00,  6.98442221e-01,  1.13812982e+02, ...,
           -7.30158138e+00, -8.98322010e+00, -9.29034901e+00],
          [ 1.72411513e+00,  2.93737382e-01,  1.24946748e+02, ...,
           -8.30055523e+00, -8.42095089e+00, -8.10733604e+00],
          [ 1.69385350e+00,  5.07008374e-01,  3.20617730e+02, ...,
           -8.14195633e+00, -6.93343925e+00, -6.64258575e+00]],

         [[ 2.60742730e+00,  5.89348555e-01,  1.69112907e+02, ...,
           -8.44515038e+00, -1.12982712e+01, -1.20682898e+01],
          [ 2.69862086e+00,  3.04531097e-01,  1.53884566e+02, ...,
           -9.66483

In [20]:
ar = np.array([0.65, 0.36])
te = torch.tensor([0.65, 0.36])

In [21]:
torch.sigmoid(te)

tensor([0.6570, 0.5890])

In [22]:
sigmoid(ar)

array([0.65701046, 0.58904043])

In [26]:
def calculate_ious(bbox1, bbox2):
    x1 = bbox1[..., 0:1] - bbox1[..., 2:3]/2
    y1 = bbox1[..., 1:2] - bbox1[..., 3:4]/2
    x2 = bbox1[..., 0:1] + bbox1[..., 2:3]/2
    y2 = bbox1[..., 1:2] + bbox1[..., 3:4]/2
    
    X1 = bbox2[..., 0:1] - (bbox2[..., 2:3] / 2)
    Y1 = bbox2[..., 1:2] - (bbox2[..., 3:4] / 2)
    X2 = bbox2[..., 0:1] + (bbox2[..., 2:3] / 2)
    Y2 = bbox2[..., 1:2] + (bbox2[..., 3:4] / 2)

    overlapped_xmin = np.maximum(x1,X1)
    overlapped_xmax = np.minimum(x2,X2)
    overlapped_ymin = np.maximum(y1,Y1)
    overlapped_ymax = np.minimum(y2,Y2)
    
    overlapped_width = np.maximum((overlapped_xmax-overlapped_xmin), 0)
    overlapped_height = np.maximum((overlapped_ymax-overlapped_ymin), 0)
    intersection_area = overlapped_width*overlapped_height
    
    bbox1_area = abs((x2-x1) * (y2-y1))
    bbox2_area = abs((X2-X1) * (Y2-Y1)) 
    
    union_area = bbox1_area + bbox2_area - intersection_area
    IoU = intersection_area/union_area
    return IoU

def nms_np(decoded_bboxes):
    bboxes = decoded_bboxes.reshape(-1, decoded_bboxes.shape[4])
    bboxes = bboxes.tolist()
    bboxes_obj = [box for box in bboxes if box[4]>0.5]
    bboxes_obj = sorted(bboxes_obj, key=lambda x:x[4], reverse=True)
    selected_bboxes = []
    
    while bboxes_obj:
        chosen_bbox = bboxes_obj.pop(0)
        bboxes_obj = [bbox for bbox in bboxes_obj if np.argmax(np.array(bbox[5:]))!=np.argmax(np.array(chosen_bbox[5:])) or calculate_ious(bbox1=np.array(bbox[0:4]), bbox2=np.array(chosen_bbox[0:4]))<0.3]
        selected_bboxes.append(chosen_bbox)
    
    return selected_bboxes

In [29]:
def calculate_iou(bbox1, bbox2, format="corners"):
  """This is a function that calculates the intersection over union between two bounding boxes
  Parameters/Arguments:
    bbox1: coordinates of first bounding box
    bbox2: coordinates of second bounding box
    format: format of bounding box i.e.

      if format == "corners":
        bbox = [..., x_min, y_min, x_max, y_max]
      if format == "center":
        bbox = [..., x_center, y_center, width, height]

  Returns the intersection over union score i.e. between 0 to 1 which has the shape of [batch_size(number of bounding boxes), iou_score]
    """

  #Extract the co-ordinates of bounding boxes in [x_min, y_min, x_max, y_max] format irrespective of given format.
  if format == "corners":
    x1 = bbox1[..., 0:1]
    y1 = bbox1[..., 1:2] #(x1, y1) --> upper left corner of bbox1
    x2 = bbox1[..., 2:3]
    y2 = bbox1[..., 3:4] #(x2, y2) --> bottom right corner of bbox1

    X1 = bbox2[..., 0:1]
    Y1 = bbox2[..., 1:2] #(X1, Y1) --> upper left corner of bbox1
    X2 = bbox2[..., 2:3]
    Y2 = bbox2[..., 3:4] #(X2, Y2) --> bottom right corner of bbox1

  elif format == "center":
    x1 = bbox1[..., 0:1] - (bbox1[..., 2:3] / 2)
    y1 = bbox1[..., 1:2] - (bbox1[..., 3:4] / 2) #(x1, y1) --> upper left corner of bbox1
    x2 = bbox1[..., 0:1] + (bbox1[..., 2:3] / 2)
    y2 = bbox1[..., 1:2] + (bbox1[..., 3:4] / 2) #(x2, y2) --> bottom right corner of bbox1

    X1 = bbox2[..., 0:1] - (bbox2[..., 2:3] / 2)
    Y1 = bbox2[..., 1:2] - (bbox2[..., 3:4] / 2) #(X1, Y1) --> upper left corner of bbox1
    X2 = bbox2[..., 0:1] + (bbox2[..., 2:3] / 2)
    Y2 = bbox2[..., 1:2] + (bbox2[..., 3:4] / 2) #(X2, Y2) --> bottom right corner of bbox1

  #Calculate the area of intesection between two bounding boxes.
  a = torch.max(x1, X1)
  b = torch.max(y1, Y1) # co-ordinates of upper left corner of intersected region
  c = torch.min(x2, X2)
  d = torch.min(y2, Y2) # co-ordinates of bottom right corner of intersected region

  W = (c-a).clamp(0)
  H = (d-b).clamp(0)
  intersection_area = W * H  # Area of intersection of two bounding boxes

  #Calculate the area of union between two bounding boxes.
  bbox1_area = abs((x2-x1) * (y2-y1)) # (x2-x1) gives width of bbox and (y2-y1) gives height of bbox
  bbox2_area = abs((X2-X1) * (Y2-Y1)) # These i.e. bbox1_area and bbox2_area both are the total area of two given bboxes.

  union_area = bbox1_area + bbox2_area - (intersection_area)

  IoU = (intersection_area / union_area)

  return IoU

# if __name__ =='__main__':
#   a1 = torch.tensor([[4.0, 6.0, 4.0, 4.0],
#                    [4.0, 5.0, 4.0, 4.0]]) # x, y, w, h
#   a2 = torch.tensor([[6.0, 4.0, 6.0, 2.0],
#                    [6.0, 3.0, 6.0, 2.0]])
#   iou = calculate_iou(a1, a2, format="center")
#   print(f"The iou  is : {iou} and shape of outputed iou is: {iou.shape}")


def nms(pred_bboxes:list,
        prob_threshold:float,
        iou_threshold:float,
        format:str):
  """ This is the function that performs the non-maximum suppression between predicted bounding boxes form the model, which means it is used in post-processing.

  Parameters/Argumenst:
  pred_bboxes: It is the list that contains predicted bounding boxes.
                i.e. [[confidence_score, x_min, y_min, x_max, y_max, class_probabilities],[],................,845] ; here, the co-ordinates of bounding box could be in different format.
  prob_threshold: Threshold value to select the few predicted bounding boxes which may contains object.
  iou_threshold: Threshold value to select only one bb of a class.
  format: It is the format of bounding box representation i.e. 'corners' or 'center'.

  Returns the list of bounding boxes for each class which contains the object.
  """
  assert type(pred_bboxes)==list, f"The given pred bboxes are not list, instead they are {type(pred_bboxes)}"
  bboxes = [box for box in pred_bboxes if box[4] > prob_threshold] #This is list comprehension that keeps only the bboxes which has confidence/probability score higher than given threshold.
  bboxes = sorted(bboxes, key=lambda x:x[4], reverse=True) # This keeps the bounding boxes in descending order according to its confidence/probability score.
  selected_bboxes = [] # To store the bboxes that have highest confidence score for each class

  while bboxes:
    choosen_box = bboxes.pop(0) # pop out the bbox of the first index of that sorted list to choosen_box variable and with this we are going to calculate iou with others.

    #Now in that sorted list, check each bbox has same class with choosen one or not and check iou between that box and choosen is less than given iou_threshold or not,
    #If the bbox and choosen bbox are in same class and have higher iou then they are not included in list.
    #If they are in different class and have lower iou then they are included in that list and that will be again checked in next iteration.

    bboxes = [box for box in bboxes if torch.argmax(torch.tensor(box[5:])) != torch.argmax(torch.tensor(choosen_box[5:])) or calculate_iou(torch.tensor(box[0:4]), torch.tensor(choosen_box[0:4]), format="center")<iou_threshold]
    selected_bboxes.append(choosen_box) #This list contains the bbox that have high probability of object of a certain class.
  return selected_bboxes

In [28]:
nms_arr = nms_np(arr)

In [30]:
tens.shape

torch.Size([1, 19, 19, 3, 85])

In [32]:
tens_pred  = tens.flatten(0,3).tolist()

In [34]:
nms_tens = nms(tens_pred, 0.5,0.3, 'center')

In [35]:
nms_arr

[[8.632639706134796,
  10.453214257955551,
  94.86082863807678,
  413.07774925231934,
  0.9994925260543823,
  10.880245208740234,
  -14.100337028503418,
  -14.091873168945312,
  -13.969831466674805,
  -13.115316390991211,
  -13.502264976501465,
  -14.150325775146484,
  -13.85133171081543,
  -14.244987487792969,
  -14.325185775756836,
  -12.964098930358887,
  -13.669536590576172,
  -13.734960556030273,
  -13.53000545501709,
  -15.028631210327148,
  -13.472461700439453,
  -14.453124046325684,
  -14.484792709350586,
  -13.720623970031738,
  -14.387433052062988,
  -14.368809700012207,
  -13.88200855255127,
  -12.172685623168945,
  -13.736736297607422,
  -14.186943054199219,
  -15.43814468383789,
  -11.733830451965332,
  -14.723088264465332,
  -15.266780853271484,
  -14.813467025756836,
  -13.936478614807129,
  -13.402120590209961,
  -13.923203468322754,
  -14.789435386657715,
  -14.698049545288086,
  -13.884147644042969,
  -12.768237113952637,
  -13.563483238220215,
  -15.692556381225586,


In [36]:
nms_tens

[[8.63263988494873,
  10.453214645385742,
  94.86083221435547,
  413.07769775390625,
  0.9994925260543823,
  10.880245208740234,
  -14.100337028503418,
  -14.091873168945312,
  -13.969831466674805,
  -13.115316390991211,
  -13.502264976501465,
  -14.150325775146484,
  -13.85133171081543,
  -14.244987487792969,
  -14.325185775756836,
  -12.964098930358887,
  -13.669536590576172,
  -13.734960556030273,
  -13.53000545501709,
  -15.028631210327148,
  -13.472461700439453,
  -14.453124046325684,
  -14.484792709350586,
  -13.720623970031738,
  -14.387433052062988,
  -14.368809700012207,
  -13.88200855255127,
  -12.172685623168945,
  -13.736736297607422,
  -14.186943054199219,
  -15.43814468383789,
  -11.733830451965332,
  -14.723088264465332,
  -15.266780853271484,
  -14.813467025756836,
  -13.936478614807129,
  -13.402120590209961,
  -13.923203468322754,
  -14.789435386657715,
  -14.698049545288086,
  -13.884147644042969,
  -12.768237113952637,
  -13.563483238220215,
  -15.692556381225586,
 

In [38]:
len(nms_arr[0]), len(nms_tens[0])

(85, 85)