In [None]:
# default_exp train

In [None]:
#hide
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
#hide 
# %load_ext autoreload 
# %autoreload 2
%cd "/content/drive/MyDrive/Coding/ModelAssistedLabel"

/content/drive/MyDrive/Coding/ModelAssistedLabel


# Training Models
> wrapping `yolov5/train.py`

We're building towards generating a model with a single call.

`Trainer` is a fairly bare-bones wrapper built around `train.py`.

`AutoWeights` is a more robust wrapper and allows for custom naming/placement of the results folder.

In [None]:
# export
from ModelAssistedLabel.config import Defaults
import os

class Trainer():
  """A wrapper for Ultralytic's `test.py`
  
  Write the backbone of the model to file and then run YOLOv5's train file."""

  def __init__(self, name, yaml_file = "models/custom_yolov5s.yaml"):
    """
    sets the current directory to the project's root as defined in Defaults.

    Args:
      name: identifier for results
      yaml_file: path to write the file 
    """
    os.chdir(Defaults().root)
    self.yaml_file = yaml_file
    self.name = name
    self.template = Defaults().trainer_template

  def write_yaml(self):
    """
    Records YOLOv5 architecture
    """
    yaml = f"yolov5/{self.yaml_file}"
    if os.path.exists(yaml):
      os.remove(yaml)
    f = open(yaml,"w")
    f.writelines(self.template)
    f.close()

  def train(self, epochs):
    """
    wrapper for train.py.

    Args:
      epochs: number of iterations
    """
    self.write_yaml()
    os.chdir("yolov5")
    os.system("pip install -r requirements.txt")
    os.system(f"python train.py --img 416 --batch 16 --epochs {epochs} --data '../data.yaml' --cfg '{self.yaml_file}' --weights '' --name '{self.name}'  --cache")
    os.chdir("..")

In [None]:
#export
class Resource:
  ""
  def __init__(self, path, file_type):
    allowed_types = ["file", "folder"]
    assert file_type in allowed_types
    self.file_type = file_type
    self.path = path

In [None]:
# export
from ModelAssistedLabel.train import Trainer
from ModelAssistedLabel.fileManagement import Generation
from datetime import datetime

class AutoWeights():
  """Given a bag of images (.jpg) and labels (.txt) in YOLOv5 format in a repository,
  initialize the ROOT directory with a train-valid-test split and a file needed 
  by the Ultralytics repository. Pairs are identified via having a common filename.

  Then call `generate_weights` to run `train.py`. The resultant file will try to 
  be moved to the `out_dir` and if a conflict exists, a new name will be made.
  """
  def __init__(self, name="AutoWeight <name>", out_dir=".", MAX_SIZE=5, custom_split=None, data_yaml=None, verbose=True, train_path = "yolov5/runs/train"):
    """
    Args:
      name: helps identify this processes's generated files
      out_dir: where the results of train.py are moved
      MAX_SIZE: parameter for `Generation`
      custom_spilt: paramater for `Generation`
      data_yaml: see `Defaults`'s `data_yaml` attribute for the default value
      verbose: Print summary information
      train_path: path to Ultralytic's default output folder
    """
    self.resource_paths = []
    self.name = name
    self.out_dir = out_dir
    self.train_path = train_path
    self.data_yaml = data_yaml
    self.verbose = verbose
    self.custom_split=custom_split
    self.MAX_SIZE = MAX_SIZE

  def traverse_resources(self, dirs=None):
    if dirs is None:
      dirs = self.resource_paths
    for d in dirs:
      self.__traverse_resources__(d, level=0)
  
  def __traverse_resources__(self, dir, level):
    "Iterate through the levels of each of the resource paths"
    if os.path.isfile(dir):
      return dir
    
    #check to see if there is a directory in the dir
    if False in [not os.path.isdir(os.path.join(dir, x)) for x in os.listdir(dir)]:
      for x in os.listdir(dir):
        path = os.path.join(dir, x)
        if not os.path.isfile(path):
          no_folders = False
          print("\t"*level+path)
          traverse_resources(path, level=level+1)
    #otherwise print the number of files on this leaf
    else:
      print("\t"*level, ">", len(os.listdir(dir)),"files")
    

  def generate_weights(self, epochs, tidy_results=True):
    """
    Creates a `Trainer` object and trains for a given amount of time.

    Args:
      epochs: number of iterations (according to docs, over 3000 is not uncommon)
      tidy_weights: if True, remove all of the resources in `self.resources`
    
    Returns:
      path to the output folder of train.py
    """
    t = Trainer(self.name)
    ldir = lambda path: set(os.listdir(path))

    before = ldir(self.train_path)
    t.train(epochs)
    after = ldir(self.train_path)
    
    assert len(after) == len(before)+1, {f"files in {self.train_path}": {"before": before, "after":after}} #only should have made one new file
    diff = list(after - before)[0]

    results_path = os.path.join(self.train_path, diff)

    if tidy_results:
      results_path = self.__tidy_results__(results_path = results_path)

    self.__cleanup__()
    self.last_results_path = results_path
    return results_path

  def initialize_images_from_zip(self, zipped):
    """
    Assume zip file is of the following structure:
      * data.yaml
      * train/
        - images/
        - labels/
      * valid/
        - images/
        - labels/
      * test/
        - images/
        - labels/

    Extract these 3 folders to the ROOT directorynd and move the data.yaml file to
    the yolov5 clone. Then remove the extracted folder structure.

    Args:
      zipped: path to the zip file
    """
    assert os.path.exists(zipped)
    os.system(f'unzip "{zipped}"') #grab data
    folder = zipped[:-4] #remove the ".zip from the filename

    #move the contents of the zip file into postion within the ROOT directory
    for content in os.listdir(folder):
      os.system(f"mv '{os.path.join(folder, content)}' .")
      if os.path.isfile(os.path.join(folder, content)):
        outpath = f"./yolov5/{content}"
        os.system(f"mv '{content}' '{datayaml_path}'")
      else:
        outpath = content
      self.resource_paths.append(outpath)

    #removed the folder that was taken out of the zip
    os.system(f"rm -f -r '{folder}'")

  def initialize_images_from_bag(self, bag_of_images_and_labels):
    """Converts a folder than contains images and labels to a format acceptable
    by the Ultralytics.

    Args:
      bag_of_images_and_labels
    """

    g = Generation(repo=bag_of_images_and_labels, 
                  out_dir=self.out_dir,
                  data_yaml=self.data_yaml,
                  verbose=self.verbose)
    g.set_split(split_ratio=self.custom_split, MAX_SIZE=self.MAX_SIZE)  
    g.get_split()
    zipped = g.write_split_to_disk(descriptor=self.name)
    self.initialize_images_from_zip(zipped)
    os.system(f'rm -f -r "{zipped}"')
    self.g = g

  def __cleanup__(self):
    """
    Removes all resources in `self.resource_paths` from the filesystem.
    """
    for r in self.resource_paths:
      if os.path.exists(r):
        print('Removing: ', r)
        os.system(f"rm -f -r {r}")

  def __tidy_results__(self, results_path):
    """
    Moves the results to a desired directly while ensuring that no data is overwritten

    Args:
      results_path: path to the folder that has desired information
    
    Returns:
      Path to the newly-moved results
    """      
    default_name = os.path.join(self.out_dir, os.path.basename(results_path))
    out = Defaults._itername(f"{default_name} - ", "")

    os.system(f"mv '{results_path}' '{out}'")
    return out

In [None]:
!nbdev_build_lib

Converted 00_config.ipynb.
Converted 01_split.ipynb.
Converted 02_train.ipynb.
Converted 03_detect.ipynb.
Converted index.ipynb.


In [None]:
!nbdev_build_lib

Converted 00_config.ipynb.
Converted 01_split.ipynb.
Converted 02_train.ipynb.
Converted 03_detect.ipynb.
Converted index.ipynb.


In [None]:
aw = AutoWeights(out_dir="ipynb_tests/02_train_datadump", MAX_SIZE=10)

In [None]:
aw.resource_paths = ["train", "valid", "test", "yolov5/data.yaml"]
aw.traverse_resources()

train/images
	 > 7 files
train/labels
	 > 7 files
valid/images
	 > 2 files
valid/labels
	 > 2 files
test/images
	 > 1 files
test/labels
	 > 1 files


FileNotFoundError: ignored

In [None]:
aw.__cleanup__()

Removing:  train
Removing:  valid
Removing:  test
Removing:  yolov5/data.yaml


In [None]:
aw.initialize_images_from_bag(bag_of_images_and_labels = "./Image Repo/labeled/Final Roboflow Export (841)")


dirs ['./train', './valid', './test']
yaml ipynb_tests/02_train_datadump/Final Roboflow Export (841)AutoWeight <name> 21-03-21 15-26-02/data.yaml
subdir train
	outdir ipynb_tests/02_train_datadump/Final Roboflow Export (841)AutoWeight <name> 21-03-21 15-26-02
subdir valid
	outdir ipynb_tests/02_train_datadump/Final Roboflow Export (841)AutoWeight <name> 21-03-21 15-26-02
subdir test
	outdir ipynb_tests/02_train_datadump/Final Roboflow Export (841)AutoWeight <name> 21-03-21 15-26-02
os.listdir ['train', 'valid', 'test', 'data.yaml']


In [None]:
%%time
current = aw.generate_weights(3)

reading defaults from: ModelAssistedLabel config.json
reading defaults from: ModelAssistedLabel config.json
CPU times: user 1.83 ms, sys: 9.67 ms, total: 11.5 ms
Wall time: 6.41 s


In [None]:
  def traverse_resources(dir, level=0):
    "Iterate through the levels of each of the resource paths"
    if os.path.isfile(dir):
      return dir
    #check to see if there is a directory in the dir

    if False in [not os.path.isdir(os.path.join(dir, x)) for x in os.listdir(dir)]:
      for x in os.listdir(dir):
        path = os.path.join(dir, x)
        if not os.path.isfile(path):
          no_folders = False
          print("\t"*level+path)
          traverse_resources(path, level=level+1)
    else:
      print("\t"*level, ">", len(os.listdir(dir)),"files")
  

In [None]:
traverse_resources("data.yaml")

'data.yaml'