In [None]:
# default_exp train

In [None]:
from ModelAssistedLabel.config import Defaults
Defaults().to_root()

moving to /content/drive/MyDrive/Coding/ModelAssistedLabel/


# Training Models
> wrapping `yolov5/train.py`

We're building towards generating a model with a single call.

`Trainer` is a fairly bare-bones wrapper built around `train.py`.

`AutoWeights` is a more robust wrapper and allows for custom naming/placement of the results folder.

In [None]:
# export
from ModelAssistedLabel.config import Defaults
import os

class Trainer():
  """A wrapper for Ultralytic's `test.py`
  
  Write the backbone of the model to file and then run YOLOv5's train file."""

  def __init__(self, name):
    """
    sets the current directory to the project's root as defined in Defaults.

    Args:
      name: identifier for results
    """
    os.chdir(Defaults().root)
    self.yaml_file = "models/custom_yolov5s.yaml"
    self.name = name
    self.template = Defaults().trainer_template

  def write_yaml(self):
    """
    Records YOLOv5 architecture
    """
    yaml = f"yolov5/{self.yaml_file}"
    if os.path.exists(yaml):
      os.remove(yaml)
    f = open(yaml,"w+")
    f.writelines(self.template)
    f.close()

  def train(self, epochs):
    """
    wrapper for train.py.

    Args:
      epochs: number of iterations
    """
    self.write_yaml()
    os.chdir("yolov5")
    os.system("pip install -r requirements.txt")
    os.system(f"python train.py --img 416 --batch 16 --epochs {epochs} --data '../data.yaml' --cfg '{self.yaml_file}' --weights '' --name '{self.name}'  --cache")
    os.chdir("..")

In [None]:
# export
from ModelAssistedLabel.train import Trainer
from ModelAssistedLabel.fileManagement import Generation
from datetime import datetime
import zipfile, shutil, os, glob

class AutoWeights():
  """Given a bag of images (.jpg) and labels (.txt) in YOLOv5 format in a repository,
  initialize the ROOT directory with a train-valid-test split and a file needed 
  by the Ultralytics repository. Pairs are identified via having a common filename.

  Then call `generate_weights` to run `train.py`. The resultant file will try to 
  be moved to the `out_dir` and if a conflict exists, a new name will be made.
  """
  def __init__(self, name="AutoWeight <name>", out_dir=".", MAX_SIZE=5, custom_split=None, data_yaml=None, verbose=True, train_path = "yolov5/runs/train"):
    """
    Args:
      name: helps identify this processes's generated files
      out_dir: where the results of train.py are moved
      MAX_SIZE: parameter for `Generation`
      custom_spilt: paramater for `Generation`
      data_yaml: see `Defaults`'s `data_yaml` attribute for the default value
      verbose: Print summary information
      train_path: path to Ultralytic's default output folder
    """
    self.resource_paths = []
    self.name = name
    self.out_dir = out_dir
    self.train_path = train_path
    self.data_yaml = data_yaml
    self.verbose = verbose
    self.custom_split=custom_split
    self.MAX_SIZE = MAX_SIZE

  def traverse_resources(self, dirs=None):
    if dirs is None:
      dirs = self.resource_paths
    for d in dirs:
      if os.path.isdir(d):
        self.__traverse_resources__(d, level=0)
      else:
        print("File: ", os.path.exists("data.yaml"))
  
  def __traverse_resources__(self, dir, level):
    "Iterate through the levels of each of the resource paths"
    if os.path.isfile(dir):
      return dir
    
    #check to see if there is a directory in the dir
    if False in [not os.path.isdir(os.path.join(dir, x)) for x in os.listdir(dir)]:
      for x in os.listdir(dir):
        path = os.path.join(dir, x)
        print("\t"*level+path)
        if not os.path.isfile(path):
          self.__traverse_resources__(path, level=level+1)
    #otherwise print the number of files on this leaf
    else:
      print("\t"*level, ">", len(os.listdir(dir)),"files")
    

  def generate_weights(self, epochs, rm_local_files=False):
    """
    Creates a `Trainer` object and trains for a given amount of time.

    Args:
      epochs: number of iterations (according to docs, over 3000 is not uncommon)
      rm_local_files: if True, deletes the folders recursively in ROOT/train, ROOT/valid
      and ROOT/test and removes ROOT/data.yaml as well.
    
    Returns:
      path to the output folder of train.py
    """
    t = Trainer(self.name)
    t.train(epochs)
    
    most_recent = max(glob.glob(os.path.join("yolov5/runs/train/", '*/')), key=os.path.getmtime)

    self.last_results_path = most_recent

    if rm_local_files:
      self.__cleanup__()
    return most_recent

  def initialize_images_from_zip(self, zipped):
    """
    Assume zip file is of the following structure:
      * data.yaml
      * train/
        - images/
        - labels/
      * valid/
        - images/
        - labels/
      * test/
        - images/
        - labels/

    Extract these 3 folders to the ROOT directorynd and move the data.yaml file to
    the yolov5 clone. Then remove the extracted folder structure.

    Args:
      zipped: path to the zip file
    """
    assert os.path.exists(zipped)

    #move the contents of the zip file into postion within the ROOT directory
    with zipfile.ZipFile(zipped, 'r') as zip_ref:
      zip_ref.extractall("unzipped")

    resources = f"unzipped/{os.path.dirname(zipped)}/{os.path.basename(zipped)[:-4]}/"

    for content in os.listdir(resources):
      movement = f"mv '{os.path.join(resources, content)}' ."
      print(movement)
      os.system(movement)
      self.resource_paths.append(content)

    #removed the folder that was taken out of the zip
    shutil.rmtree("unzipped")

  def initialize_images_from_bag(self, bag_of_images_and_labels):
    """Converts a folder than contains images and labels to a format acceptable
    by the Ultralytics.

    Args:
      bag_of_images_and_labels
    """

    g = Generation(repo=bag_of_images_and_labels, 
                  out_dir=self.out_dir,
                  data_yaml=self.data_yaml,
                  verbose=self.verbose)
    g.set_split(split_ratio=self.custom_split, MAX_SIZE=self.MAX_SIZE)  
    g.get_split()
    zipped = g.write_split_to_disk(descriptor=self.name)
    self.initialize_images_from_zip(zipped)
    os.system(f'rm -f -r "{zipped}"')
    self.g = g

  def __cleanup__(self):
    """
    Removes all resources in `self.resource_paths` from the filesystem.
    """
    for r in self.resource_paths:
      if os.path.exists(r):
        print('Removing: ', r)
        os.system(f"rm -f -r {r}")

In [None]:
aw = AutoWeights(name="<AutoWeight>", out_dir="ipynb_tests/02_train_datadump", MAX_SIZE=10)

In [None]:
# aw.initialize_images_from_bag(bag_of_images_and_labels = "./Image Repo/labeled/Final Roboflow Export (841)")
aw.initialize_images_from_zip("ipynb_tests/01_split_datadump/Final Roboflow Export (841)<01_split_all> 21-03-24 17-23-14.zip")

mv 'unzipped/ipynb_tests/01_split_datadump/Final Roboflow Export (841)<01_split_all> 21-03-24 17-23-14/train' .
mv 'unzipped/ipynb_tests/01_split_datadump/Final Roboflow Export (841)<01_split_all> 21-03-24 17-23-14/valid' .
mv 'unzipped/ipynb_tests/01_split_datadump/Final Roboflow Export (841)<01_split_all> 21-03-24 17-23-14/test' .
mv 'unzipped/ipynb_tests/01_split_datadump/Final Roboflow Export (841)<01_split_all> 21-03-24 17-23-14/data.yaml' .


In [None]:
aw.resource_paths

['train', 'valid', 'test', 'data.yaml']

In [None]:
aw.traverse_resources()

train/images
	 > 231 files
train/labels
	 > 231 files
valid/images
	 > 169 files
valid/labels
	 > 169 files
test/images
	 > 83 files
test/labels
	 > 83 files
File:  True


In [None]:
%%time

current = aw.generate_weights(1, rm_local_files=True)

Removing:  train
Removing:  valid
Removing:  test
Removing:  data.yaml
CPU times: user 143 ms, sys: 62.4 ms, total: 206 ms
Wall time: 42.2 s


In [None]:
%cat "{current}/results.txt"

       0/0     1.83G    0.1097   0.06886   0.06383    0.2424        67       416 1.971e-05  0.003488  1.45e-06 1.508e-07   0.09795   0.05896   0.05925


In [None]:
%rm -f -r yolov5