In [14]:
import pandas as pd
import numpy as np

from PIL import Image
from PIL import ImageDraw

In [15]:
#importing the drive (where the data currently is)
from google.colab import drive 
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [16]:
#reading the text file of the annotation into a pandas dataframe
annot = pd.read_csv('/gdrive/My Drive/giro1_1_TXT.txt_moving.txt', header = None ,sep = ' ' )
annot = annot.rename(columns={0: "dr1", 1: "frame", 2: "ID", 3: "X1", 4: "Y1", 5: "X2", 6: "Y2", 7: "dr2", 8: "object", 9: "dr3"})
annot.drop(columns = ["dr1", "dr2", "dr3"], inplace = True)
annot.head()


Unnamed: 0,frame,ID,X1,Y1,X2,Y2,object
0,215,1,538,503,608,705,1F
1,215,2,638,506,713,705,1F
2,215,3,742,508,810,708,1F
3,215,4,857,507,920,708,1F
4,215,5,967,509,1027,707,1F


In [17]:
#the classes
class_={
    '1F': 'Front View',
    '1B': 'Back View',
    '1L': 'Left View',
    '1R': 'Right View',
    '2': ' Bicycle Crowd',
    '5H': 'High-Density Human Crowd',
    '5L': 'Low-Density Human Crowd',
    '0': 'irrelevant TV graphics'
}
#a list of the frames that have annotation
af = list(annot.frame.unique())

#a list of the frames that have pictures from a rear view
af_backview = list(annot[annot["object"] == '1B'].frame.unique())



In [33]:
import torch

class BoxList(object):
  def __init__(self, bbox, image_size, mode="xyxy"):
      device = bbox.device if isinstance(bbox, torch.Tensor) else torch.device("cpu") #selecting the hardware#
      bbox = torch.as_tensor(bbox, dtype=torch.float32, device=device)
      if bbox.ndimension() != 2: #should be a 2dim tensor (matrix)#
          raise ValueError(
              "bbox should have 2 dimensions, got {}".format(bbox.ndimension())
          )
      if bbox.size(-1) != 4: #the length of the annotation must be 4#
          raise ValueError(
              "last dimension of bbox should have a "
              "size of 4, got {}".format(bbox.size(-1))
          )
      if mode not in ("xyxy", "xywh"):
          raise ValueError("mode should be 'xyxy' or 'xywh'")
  
      self.bbox = bbox #the matrix of the annotations as a tensor#
      self.size = image_size  # (image_width, image_height)
      self.mode = mode
      self.extra_fields = {}
  
  def add_field(self, field, field_data):
      self.extra_fields[field] = field_data
  
  def get_field(self, field):
      return self.extra_fields[field]
  
  def has_field(self, field):
      return field in self.extra_fields
  
  def fields(self):
      return list(self.extra_fields.keys())
  
  def _copy_extra_fields(self, bbox):
      for k, v in bbox.extra_fields.items():
          self.extra_fields[k] = v
  
  def convert(self, mode):
      if mode not in ("xyxy", "xywh"):
          raise ValueError("mode should be 'xyxy' or 'xywh'")
      if mode == self.mode:
          return self
      # we only have two modes, so don't need to check
      # self.mode
      xmin, ymin, xmax, ymax = self._split_into_xyxy() #splits the annotation matrix into its columns#
      if mode == "xyxy": #this is what we use#
          bbox = torch.cat((xmin, ymin, xmax, ymax), dim=-1) #recreates the matrix#
          bbox = BoxList(bbox, self.size, mode=mode)
      else:
          TO_REMOVE = 1
          bbox = torch.cat(
              (xmin, ymin, xmax - xmin + TO_REMOVE, ymax - ymin + TO_REMOVE), dim=-1
          )
          bbox = BoxList(bbox, self.size, mode=mode)
      bbox._copy_extra_fields(self)
      return bbox
  
  def _split_into_xyxy(self):
      if self.mode == "xyxy":
          xmin, ymin, xmax, ymax = self.bbox.split(1, dim=-1)
          return xmin, ymin, xmax, ymax
      elif self.mode == "xywh":
          TO_REMOVE = 1
          xmin, ymin, w, h = self.bbox.split(1, dim=-1)
          return (
              xmin,
              ymin,
              xmin + (w - TO_REMOVE).clamp(min=0),
              ymin + (h - TO_REMOVE).clamp(min=0),
          )
      else:
          raise RuntimeError("Should not be here")
  
  def resize(self, size, *args, **kwargs):
      """
      Returns a resized copy of this bounding box
      :param size: The requested size in pixels, as a 2-tuple:
          (width, height).
      """
  
      ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(size, self.size)) #s?#
      if ratios[0] == ratios[1]:
          ratio = ratios[0]
          scaled_box = self.bbox * ratio
          bbox = BoxList(scaled_box, size, mode=self.mode)
          # bbox._copy_extra_fields(self)
          for k, v in self.extra_fields.items():
              if not isinstance(v, torch.Tensor):
                  v = v.resize(size, *args, **kwargs)
              bbox.add_field(k, v)
          return bbox
  
      ratio_width, ratio_height = ratios
      xmin, ymin, xmax, ymax = self._split_into_xyxy()
      scaled_xmin = xmin * ratio_width
      scaled_xmax = xmax * ratio_width
      scaled_ymin = ymin * ratio_height
      scaled_ymax = ymax * ratio_height
      scaled_box = torch.cat(
          (scaled_xmin, scaled_ymin, scaled_xmax, scaled_ymax), dim=-1
      )
      bbox = BoxList(scaled_box, size, mode="xyxy")
      # bbox._copy_extra_fields(self)
      for k, v in self.extra_fields.items():
          if not isinstance(v, torch.Tensor):
              v = v.resize(size, *args, **kwargs)
          bbox.add_field(k, v)
  
      return bbox.convert(self.mode)
  
  def transpose(self, method):
      """
      Transpose bounding box (flip or rotate in 90 degree steps)
      :param method: One of :py:attr:`PIL.Image.FLIP_LEFT_RIGHT`,
        :py:attr:`PIL.Image.FLIP_TOP_BOTTOM`, :py:attr:`PIL.Image.ROTATE_90`,
        :py:attr:`PIL.Image.ROTATE_180`, :py:attr:`PIL.Image.ROTATE_270`,
        :py:attr:`PIL.Image.TRANSPOSE` or :py:attr:`PIL.Image.TRANSVERSE`.
      """
      if method not in (FLIP_LEFT_RIGHT, FLIP_TOP_BOTTOM):
          raise NotImplementedError(
              "Only FLIP_LEFT_RIGHT and FLIP_TOP_BOTTOM implemented"
          )
  
      image_width, image_height = self.size
      xmin, ymin, xmax, ymax = self._split_into_xyxy()
      if method == FLIP_LEFT_RIGHT:
          TO_REMOVE = 1
          transposed_xmin = image_width - xmax - TO_REMOVE
          transposed_xmax = image_width - xmin - TO_REMOVE
          transposed_ymin = ymin
          transposed_ymax = ymax
      elif method == FLIP_TOP_BOTTOM:
          transposed_xmin = xmin
          transposed_xmax = xmax
          transposed_ymin = image_height - ymax
          transposed_ymax = image_height - ymin
  
      transposed_boxes = torch.cat(
          (transposed_xmin, transposed_ymin, transposed_xmax, transposed_ymax), dim=-1
      )
      bbox = BoxList(transposed_boxes, self.size, mode="xyxy")
      # bbox._copy_extra_fields(self)
      for k, v in self.extra_fields.items():
          if not isinstance(v, torch.Tensor):
              v = v.transpose(method)
          bbox.add_field(k, v)
      return bbox.convert(self.mode)
  
  def crop(self, box):
      """
      Cropss a rectangular region from this bounding box. The box is a
      4-tuple defining the left, upper, right, and lower pixel
      coordinate.
      """
      xmin, ymin, xmax, ymax = self._split_into_xyxy()
      w, h = box[2] - box[0], box[3] - box[1]
      cropped_xmin = (xmin - box[0]).clamp(min=0, max=w)
      cropped_ymin = (ymin - box[1]).clamp(min=0, max=h)
      cropped_xmax = (xmax - box[0]).clamp(min=0, max=w)
      cropped_ymax = (ymax - box[1]).clamp(min=0, max=h)
  
      # TODO should I filter empty boxes here?
      if False:
          is_empty = (cropped_xmin == cropped_xmax) | (cropped_ymin == cropped_ymax)
  
      cropped_box = torch.cat(
          (cropped_xmin, cropped_ymin, cropped_xmax, cropped_ymax), dim=-1
      )
      bbox = BoxList(cropped_box, (w, h), mode="xyxy")
      # bbox._copy_extra_fields(self)
      for k, v in self.extra_fields.items():
          if not isinstance(v, torch.Tensor):
              v = v.crop(box)
          bbox.add_field(k, v)
      return bbox.convert(self.mode)

In [18]:
#read and image from drive referenced by the frame number
from PIL import Image

def load_image(image):
  im = Image.open('/gdrive/My Drive/bicycle/giro1_'+str(image)+'.jpg')
  return im


In [19]:
#preak at the data
annot.head(5)

Unnamed: 0,frame,ID,X1,Y1,X2,Y2,object
0,215,1,538,503,608,705,1F
1,215,2,638,506,713,705,1F
2,215,3,742,508,810,708,1F
3,215,4,857,507,920,708,1F
4,215,5,967,509,1027,707,1F


In [38]:
class MyDataset(object):
  def __init__(self, size): #the size value controls how big the pictures we want to be
    self.name = "test" #
    self.size = size # 
        # as you would do normally

  def transforms(self, psize, image, boxlist): #whole function defined by me#
    l = (image.size[0]/psize[0])*1.5 #our annotations are for larger pictures than the saved by a ratio of 1.5#
    h = (image.size[1]/psize[1])*1.5

    image = image.resize(psize) #resizes the picture#

    r, g, b = image.split() #getting the channels#
    b = b.point(lambda i: i * 1.2) #modifying the blue channel#
    r = r.point(lambda i: 1*0.9)
    result = Image.merge('RGB', (r, g, b))  #creating a new picture from the modified channels#
    for i in range(len(boxlist.bbox)): #rescaling the boxes#
      if i%2 ==0:
          boxlist.bbox[i] = boxlist.bbox[i]/l
      else:
        boxlist.bbox[i] = boxlist.bbox[i]/h

    return result, boxlist


  def __getitem__(self, idx, im, boxes): #originally the parameters were: self, idx#
        # load the image as a PIL Image
    image = im #I passed the picture when calling the function#

        # load the bounding boxes as a list of list of boxes
        # in this case, for illustrative purposes, we use
        # x1, y1, x2, y2 order.
    labels = None
    boxes = boxes #also passed it when calling#
        # and labels
    #labels = torch.tensor([10, 20]) #we don't need labels (as on now)#

        # create a BoxList from the boxes
    boxlist = BoxList(boxes, image.size, mode="xyxy")
        # add the labels to the boxlist
    boxlist.add_field("labels", labels)

    size = self.size
    if self.transforms: #?#
      image, boxlist = self.transforms(size, image, boxlist) #originally: image, boxlist, could be that way#
      #by getting size in the definition of the function#

        # return the image, the boxlist and the idx in your dataset
    return image, boxlist, idx



  def get_img_info(self, idx):
        # get img_height and img_width. This is used if
        # we want to split the batches according to the aspect ratio
        # of the image, as it can be more efficient than loading the
        # image from disk
      return {"height": img_height, "width": img_width}

In [40]:
size_of_data = (240, 240)
data = MyDataset(size_of_data)

#selecting all the pictures in the dataset#
A = annot.frame.values

#selecting the back view ones#
#A= annot[annot["object"] == '1B']

for a in af[0:3]: #or af_backview#
  p = load_image(a) #loads the references image#
  B = A == a #selects the annotations for it#
  M = annot.loc[B,["X1", "Y1", "X2", "Y2"]].values.tolist() #creates a list of lists for the annotations as expected#
  processed_pic, boxlist, idx = data.__getitem__(a, p, M ) #calls the function#

  #forward it to the network here or save it into a variable?#