# **Install Ultralytics and roboflow**


In [None]:
!pip install ultralytics

In [None]:
import ultralytics
from IPython import display

display.clear_output()
ultralytics.checks()

# **Download Dataset**

In [None]:
!wget https://app.roboflow.com/ds/UQgYXPhPF3?key=SzmvRo43bC
!7z x /content/UQgYXPhPF3?key=SzmvRo43bC
!pip install roboflow


# **Preprocess Dataset**

In [None]:
import os
import glob
import numpy as np

def preprocessing(path):
  mask = os.path.join(path, r'images/*.[jp][pn]g')
  files = glob.glob(mask)
  num_of_files = len(files)
  shuffle = np.random.permutation(num_of_files)
  num_max = int(num_of_files * 5 / 100)
  count = 0
  for idx in shuffle:
    image = files[idx]
    label_file = image.replace('/images', '/labels').replace('.jpg', '.txt')
    label_file = label_file.replace('.png', '.txt')
    with open(label_file, 'r') as f:
      lines = f.readlines()
      if len(lines) == 0:
        count += 1
        if count > num_max:
          os.remove(image)
          os.remove(label_file)

def get_stats(train = True):
  if train:
    mask = r'/content/train/images/*.jpg'
  else:
    mask = r'/content/valid/images/*.jpg'
  count_classless = 0
  count_cars = 0
  num_images = 0
  for image in glob.glob(mask):
    num_images += 1
    label_file = image.replace('/images', '/labels').replace('.jpg', '.txt')
    label_file = label_file.replace('.png', '.txt')
    with open(label_file, 'r') as f:
      labels = f.readlines()
      if len(labels) == 0:
        count_classless += 1
      else:
        for label in labels:
          count_cars += 1
  print(f'Number of images: {num_images}')
  print(f'Images without annotations: {count_classless}')
  print(f'Cars: {count_cars}')

In [None]:
get_stats()
path = '/content/train'
preprocessing(path)
get_stats()

# **Building and Training Yolov8**

In [None]:
from ultralytics import YOLO
from PIL import Image

def set_res_dir(train = True):
  res_dir_count = len(glob.glob('/content/runs/detect/results_*'))
  if train:
    res_dir = f'results_{res_dir_count + 1}'
  else:
    res_dir = f'results_{res_dir_count}'
  return res_dir

def train(model = 'yolov8n.pt', data = 'coco128.yaml', epochs = 25):
  res_dir = set_res_dir()
  model = YOLO(model = model)
  model.train(data = data, epochs = epochs, name = res_dir)
  return f'/content/runs/detect/{res_dir}/results.png', f'/content/runs/detect/{res_dir}/weights/best.pt'

def inference(model_name, data):
  value = model_name.split('/')[-3].split('_')[-1]
  image = data.split('/')[-1]
  inference_dir = f'inference_{value}/{image}'
  model = YOLO(model_name)
  model.predict(data, name = inference_dir, save = True, save_txt = True)
  return f'/content/runs/detect/{inference_dir}/{image}', f'/content/runs/detect/{inference_dir}/labels/{image}'

def show_result(paths):
  for _, label in paths:
    cars = 0
    with open(label, 'r') as f:
      lines = len(f.readlines())
      cars += lines
    inference = label.split('/')[-3]
    print(f'{inference}: {cars}')

In [None]:
models = [
    'yolov8n.pt',
    'yolov8s.pt',
    'yolov8m.pt',
    'yolov8l.pt',
    'yolov8x.pt'
]
data = '/content/data.yaml'
sources = r'/content/test/images/*.[jp][pn]g'
results = []
inferences = []
best_models = []
for model in models:
  result, best_model = train(model, data, 30)
  results.append(result)
  best_models.append(best_model)
  image_inference = []
  label_inference = []
  for source in glob.glob(sources):
    image, label = inference(best_model, source)
    image_inference.append(image)
    label_inference.append(image)
  inferences.append((image_inference, label_inference))