In [1]:
# default_exp core

In [2]:
#hide
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
#hide
%ru

In [3]:
#hide
import os
# define pathway to the weights
weight_filenames = {
    "lcd": "21-2-20-94-universal-lcd.pt",
    "digits":'21-2-25 1k-digits YOLOv5-weights.pt'
    }



# detectors = []
# for filename in weight_filenames:
# weights_path = os.path.join(resource_folder, weights_filename)



In [5]:
import os

# safety for re-executions
if not os.path.exists("yolov5"):
  # clone YOLOv5 and reset to a specific git checkpoint that has been verified working
  !git clone https://github.com/ultralytics/yolov5  # clone repo
  !git reset --hard 68211f72c99915a15855f7b99bf5d93f5631330f

# enter the yolov5 directory
%cd yolov5

# install dependencies as necessary
!pip install -qr requirements.txt  # install dependencies (ignore errors)
import torch

from IPython.display import Image, clear_output  # to display images
# from utils.google_utils import gdrive_download  # to download models/datasets

clear_output()

if torch.cuda.is_available():
  print('Setup complete. Using torch %s %s' % (torch.__version__, torch.cuda.get_device_properties(0)))
else:
  raise Exception("You need to enable GPU in this runtime environment")

# return to parent directory
%cd ..

Setup complete. Using torch 1.8.0+cu101 _CudaDeviceProperties(name='Tesla P100-PCIE-16GB', major=6, minor=0, total_memory=16280MB, multi_processor_count=56)
/content/drive/My Drive/Coding/ModelAssistedLabel


# module name here

> API details.

In [5]:
images = ".jpg"
labels = ".txt"

resource_map = {"images": images, "labels": labels}

In [6]:
# export

import glob
from os.path import join

class FileUtilities:
  def collect_files(walk_dir, recursive):
    """
    By default, returns all the ".jpg" and ".txt" files in a directory. The filetypes
    are specified by the :resource_map:.

    Args:
      walk_dir: directory from which to pull resources
      recursive: if `True`, resursively searches the folder for the desired resource.
    
    Returns:
      A dictionary keyed to the :resource_map: with each value being an array of 
      the keyed type.
    """
    res = {}
    for key, extension in resource_map.items():
      resource_generator = glob.iglob(walk_dir + '/**/*' + extension, recursive=recursive)
      res[key] = [{"pair_id": os.path.basename(x)[:-1*len(extension)], "path": x, "basename":os.path.basename(x)} for x in resource_generator]
    return res

  def matched(file_collection):
    """
    Pairs up an image and label based on a shared resource name.

    Arges:
      res: the result of a 
    """
    bn = lambda x: set([z["pair_id"] for z in x])
    matched = (bn(file_collection["labels"]).intersection(bn(file_collection["images"])))
    pairs = []
    for resource in matched:
      tmp = {}
      for k in resource_map:
        tmp[k] = [x for x in file_collection[k] if x["pair_id"] == resource][0]
      pairs.append(tmp)
      
    return pairs

  def match_files(walk_dir, recursive=True):
    return FileUtilities.matched(FileUtilities.collect_files(walk_dir, recursive=recursive))

  def mkdir(dir):
    import os
    if not os.path.exists(dir):
      os.mkdir(f"{dir}")


In [12]:
# export


from datetime import datetime
import math, random

class Generation:
  """
    Container and organizer of photos for a given repository.
  """

  def default_split_ratio(self):
    return {
              "train": .7,
              "valid": .2,
              "test": .1 
            }

  def __init__(self, repo):
    """
      Args:
        repo: <string> path to the parent directory of the repository.
    """
    self.repo = repo
    self.split = None
    self.data_yaml = data_yaml
    self.out_dir = out_dir

  def split_repo(self, split_ratio = None, MAX_SIZE=None):
    """
    Sets the value of `self.split` 

    Args:
      split_ratio: relative fractions of split between test train and validation
      sets.
      MAX_SIZE: The total number of images to be used in the image set 
    """
    if split_ratio is None:
      split_ratio = self.default_split_ratio()

    files = FileUtilities.match_files(repo)
    random.shuffle(files)
    if MAX_SIZE:
      files = files[:MAX_SIZE]

    train = math.ceil(len(files) * split_ratio["train"])
    valid = train + math.ceil(len(files) * split_ratio["valid"])

    split =  {"train": files[:train],
    "valid": files[train: valid],
    "test": files[valid:]}

    assert sum([len(split[x]) for x in split]) == len(files)
    self.split = split
  
  def write_images(self):
    """
    If the dataset has already been split, then write the files to disk accordingly.
    All resources are present two levels deep. The top folders are named according
    to "test"/"train"/"valid". The mid-level folders are named "images" or "labels".
    Resources can be found in the corresponding folder.

    Returns:
      A list of directories to the test/train/valid split
    """
    assert self.split is not None
    directories = []
    for dirname, pairs in self.split.items(): 
      dir = join("./", dirname) #test/valid/train
      FileUtilities.mkdir(dir)
      directories.append(dir)
      for pair in pairs:
        for resource, data in pair.items():
          subdir = join(dir, resource)
          FileUtilities.mkdir(subdir)

          target = data["path"]
          destination = join(subdir, data["basename"])
          if not os.path.exists(destination): 
            os.system(f"cp {target} {destination}")
    return directories

  def zip_splits_to_destination(self, folder=None):
    assert self.split is not None
    if folder is None:
      folder = self.unified_dirname()
    dirs = self.write_images()
    zipped = self.unify_dirs(folder, dirs)
    os.system(f"mv '{folder}.zip' '{self.out_dir}'")
    return f"{self.out_dir}/{folder}.zip"

  def unify_dirs(self, folder, dirs):
    FileUtilities.mkdir(folder)
    self.write_data_yaml(folder)
    for subdir in self.split:
      os.system(f"mv './{subdir}' '{folder}/'")

    os.system(f'zip -r "{folder}.zip" "{folder}"')
    os.system(f'rm -f -r "{folder}"')
    return f"{folder}.zip"
    
  def unified_dirname(self, prefix=""):
    now = datetime.now() # current date and time
    timestamp = now.strftime(" %y-%m-%d %H-%M-%S")
    zipname = self.repo.split("/")[-1] + prefix + timestamp
    return zipname

  def write_data_yaml(self, folder="./"):
    f = open(join(folder, "data.yaml"),"w+")
    f.writelines(self.data_yaml)
    f.close()
  

In [14]:

data_yaml = """train: ../train/images
  val: ../valid/images

  nc: 10
  names: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0']"""

out_dir = "/content/drive/MyDrive/Coding/01_train"
repo = "/content/drive/MyDrive/Coding/Roboflow Export (841)"

g = Generation(repo)
g.split_repo(MAX_SIZE=3)
print([{x: len(g.split[x])} for x in g.split])


[{'train': 0}, {'valid': 0}, {'test': 0}]


In [16]:
!ls "{g.repo}"

ls: cannot access '/content/drive/MyDrive/Coding/Roboflow Export (841)': No such file or directory


In [19]:
ls /content/drive/MyDrive/Coding

'1st Period Resources.gsite'
'2.1.3 agenda - Pie chart 1.gsheet'
'2:30pm, 12 3 20.gdoc'
 23_1.m4v
'3.1.3 shared workspace template.gslides'
 4.1.1.gslides
'4.1.2 Resource Page.gdoc'
'4-1 Template.gdoc'
'6th (mgg-kydu-mwg - Oct 23, 2020).gjam'
'Answer Key: 1-3 through 1-7 - Column chart 1.gsheet'
 Attendance.gsheet
'Attendance & Participation.gsheet'
'Blakeney Parent Emails.gsheet'
'Brockman Parents 12 9.gsheet'
[0m[01;34m"Brockman's LIVE file uploader"[0m/
[01;34m'calling parents'[0m/
 [01;34mClassroom[0m/
'Copy of 2:30pm, 12 3 20.gdoc'
'Copy of Callahan EquatIO Mathspace Links - Texthelp Public Information (1).gdoc'
'Copy of Callahan EquatIO Mathspace Links - Texthelp Public Information.gdoc'
'Copy of Copy of Guide (DRAFT) for Teaching CPM’s PC3, Calculus, Statistics Remotely (1).gsheet'
'Copy of Copy of Guide (DRAFT) for Teaching CPM’s PC3, Calculus, Statistics Remotely.gsheet'
'Copy of Observation Conference-Brockman.gdoc'
'Copy of Return Rosters.xlsx'
'Copy of Test Correctio

In [None]:
folder_name = g.unified_dirname(prefix=" no augments")
d = g.zip_splits_to_destination(folder_name)

In [None]:
!cp "{d}" "./{folder_name}.zip"
!rm -f "{d}"

In [None]:
!unzip "{folder_name}.zip" -d "{folder_name}"

Archive:  Roboflow Export (841) no augments 21-03-14 18-19-19.zip
   creating: Roboflow Export (841) no augments 21-03-14 18-19-19/Roboflow Export (841) no augments 21-03-14 18-19-19/
   creating: Roboflow Export (841) no augments 21-03-14 18-19-19/Roboflow Export (841) no augments 21-03-14 18-19-19/train/
   creating: Roboflow Export (841) no augments 21-03-14 18-19-19/Roboflow Export (841) no augments 21-03-14 18-19-19/train/labels/
  inflating: Roboflow Export (841) no augments 21-03-14 18-19-19/Roboflow Export (841) no augments 21-03-14 18-19-19/train/labels/digittake-23-jpg_jpg.rf.00548b600c00b9a0159307cccf3347f1.txt  
  inflating: Roboflow Export (841) no augments 21-03-14 18-19-19/Roboflow Export (841) no augments 21-03-14 18-19-19/train/labels/save_dirrsave_dirr36a73c74ef0b76639e12488651f587fb06a9baab-jpg-jpg_jpg.rf.cd4c51c05581425f5ff10b194f12f1e0.txt  
  inflating: Roboflow Export (841) no augments 21-03-14 18-19-19/Roboflow Export (841) no augments 21-03-14 18-19-19/train/la

In [None]:
!ls "{folder_name}/{folder_name}/train/images"

digittake-23-jpg_jpg.rf.00548b600c00b9a0159307cccf3347f1.jpg
save_dirrsave_dirr36a73c74ef0b76639e12488651f587fb06a9baab-jpg-jpg_jpg.rf.cd4c51c05581425f5ff10b194f12f1e0.jpg
save_dirrtake-12_jpg_cropped-jpg_jpg.rf.52431525a8df960d1e777c49ac53bd81.jpg


#####architecture

In [None]:
# define number of classes based on YAML
import yaml
with open("data.yaml", 'r') as stream:
    num_classes = str(yaml.safe_load(stream)['nc'])

In [None]:
#this is the model configuration we will use for our tutorial 
%cat /content/yolov5/models/yolov5s.yaml

# parameters
nc: 80  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple

# anchors
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, C3, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, C3, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, C3, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, C3, [1024, False]],  # 9
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C

In [None]:
#customize iPython writefile so we can write variables
from IPython.core.magic import register_line_cell_magic

@register_line_cell_magic
def writetemplate(line, cell):
    with open(line, 'w') as f:
        f.write(cell.format(**globals()))

In [None]:
%%writetemplate /content/yolov5/models/custom_yolov5s.yaml

# parameters
nc: {num_classes}  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple

# anchors
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  # [from, number, module, args]
  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
   [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
   [-1, 3, BottleneckCSP, [128]],
   [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
   [-1, 9, BottleneckCSP, [256]],
   [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
   [-1, 9, BottleneckCSP, [512]],
   [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
   [-1, 1, SPP, [1024, [5, 9, 13]]],
   [-1, 3, BottleneckCSP, [1024, False]],  # 9
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 6], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, BottleneckCSP, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 14], 1, Concat, [1]],  # cat head P4
   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P5
   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)

   [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]

In [None]:
# train yolov5s on custom data for 100 epochs
# time its performance
%%time
%cd /content/yolov5/
!python train.py --img 416 --batch 16 --epochs 10 --data '../data.yaml' --cfg ./models/custom_yolov5s.yaml --weights '' --name yolov5s_results  --cache

/content/yolov5
remote: Enumerating objects: 11, done.[K
remote: Counting objects: 100% (11/11), done.[K
remote: Compressing objects: 100% (11/11), done.[K
remote: Total 11 (delta 3), reused 0 (delta 0), pack-reused 0[K
Unpacking objects: 100% (11/11), done.
From https://github.com/ultralytics/yolov5
   747c265..6f718ce  master     -> origin/master
YOLOv5 v4.0-130-g747c265 torch 1.8.0+cu101 CUDA:0 (Tesla P100-PCIE-16GB, 16280.875MB)

Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='./models/custom_yolov5s.yaml', data='../data.yaml', device='', entity=None, epochs=10, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[416, 416], linear_lr=False, local_rank=-1, log_artifacts=False, log_imgs=16, multi_scale=False, name='yolov5s_results', noautoanchor=False, nosave=False, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/yolov5s_results2', single_cls=False, sync_bn