In [5]:
import yaml
import os
from os import listdir
from os.path import isfile, join
from pathlib import Path
import cv2
import glob
import shutil
import time

with open(r'utilities/configs.yml') as file:
    # The FullLoader parameter handles the conversion from YAML
    # scalar values to Python the dictionary format
    config = yaml.load(file, Loader=yaml.FullLoader)


    
class Helper:
    
    def get_frames_margin(self, videos, total):
        '''
        claculate the margin between the frames we save by calculating the total number of frames we have and
        the total we want save
        '''
        total_frames = 0
        for v in videos:
            cap = cv2.VideoCapture(v)
            total_frames += int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        return total_frames // total

    
class RetainingModel:
    '''
    Take list of videos, convert them into frames, using previous model to generate annotaions so 
    we can adjust them using labelimg and retrain them so we increase the accuarcy
    '''
    def __init__(self, config = config):
        self.config = config
        self.herlper = Helper()
        self.videos = []
        self.frames_margin = 5
        
    def preparing_data(self):
        '''
        1. change path to project path if exist
        2. prepare videos patha
        '''
        
        if self.config.get('project_path'):
            os.chdir(self.config['project_path'])

        videos = [f"{self.config['videos_folder_name']}/{f}" for f in listdir(self.config['videos_folder_name']) if isfile(join(self.config['videos_folder_name'], f))]
        self.videos = [f for f  in videos if f[-3:] in self.config['videos_extensions']]

        # create saving folder if not exist
        Path(self.config['saving_path']).mkdir(parents=True, exist_ok=True)

        # get the frames margin
        self.frames_margin = self.herlper.get_frames_margin(self.videos, self.config['frames_number'])
        
    def preprocessing(self):
        '''
        read videos and convert them into frames using the configrations
        '''
        count = 0
        i = 0

        # Create a video capture object, in this case we are reading the video from a file
        for video in self.videos:
            vid_capture = cv2.VideoCapture(video)

            while(vid_capture.isOpened()):
                ret, frame = vid_capture.read()
                if ret == True:
                    if i % self.frames_margin == 0:
                        cv2.imwrite(os.path.join(self.config['saving_path'], f'frame{count}.jpg'), frame)
                        key = cv2.waitKey(20)
                        count += 1
                        if count % 10 == 0:
                            print(count)
                    i += 1
                    if key == ord('q') or count >= self.config['frames_number']:
                        break
                else:
                    break

        # Release the video capture object
        vid_capture.release()
        cv2.destroyAllWindows()

    def processing(self):
        '''
        generate annotaions using previous model
        '''
        !python yolov5/detect.py --weights {self.config['model_path']} --img {int(self.config['resolution'])} --conf {float(self.config['conf'])} --source {self.config['saving_path']} --save-txt --project {self.config['saving_path']}

    def post_processing(self):
        '''
        organize files so we can use labeling directly without need of any manual work
        '''

        # move text files to frames folder
        data_path = os.path.join(os.path.join(Path(self.config['saving_path']), '**\\labels') ,'*txt')
        all_data = glob.glob(data_path)
        for file in all_data:
            try:
                shutil.move(file, Path(self.config['saving_path']))
            except Exception as e:
                print(e)

        # move classes.txt file to frames folder
        shutil.copy(self.config['classes_file_path'], self.config['saving_path'])

        # remove any folder
        files = os.listdir(Path(self.config['saving_path']))
        folders = [f for f in files if not (f.endswith('jpg') or f.endswith('txt'))]
        for folder in folders:
            shutil.rmtree(os.path.join(Path(self.config['saving_path']), folder))

    def run(self):
        print("------------------------------- preparing required data/ configrations -------------------------------")
        self.preparing_data()
        
        print("------------------------------- read videos and convert them into frames -------------------------------")
        self.preprocessing()
        
        print("------------------------------- generate annotaions using previous model -------------------------------")
        self.processing()
        
        try:
            time.sleep(2)
            print("------------------------------- organize files -------------------------------")
            self.post_processing()
        except:
            time.sleep(5)
            print("------------------------------- re-organize files -------------------------------")
            self.post_processing()


In [6]:
retrianing = RetainingModel(config = config)

In [7]:
retrianing.processing()
time.sleep(5)
retrianing.post_processing()


[34m[1mdetect: [0mweights=['models/best.pt'], source=C:\Users\mahmo\freelance\mohamed\training_pipeline\frames, data=yolov5\data\coco128.yaml, imgsz=[640, 640], conf_thres=0.1, iou_thres=0.45, max_det=1000, device=, view_img=False, save_txt=True, save_conf=False, save_crop=False, nosave=False, classes=None, agnostic_nms=False, augment=False, visualize=False, update=False, project=C:\Users\mahmo\freelance\mohamed\training_pipeline\frames, name=exp, exist_ok=False, line_thickness=3, hide_labels=False, hide_conf=False, half=False, dnn=False, vid_stride=1
YOLOv5  v6.2-105-g4e8504a Python-3.9.12 torch-1.12.1 CUDA:0 (NVIDIA GeForce GTX 1050, 4096MiB)

Fusing layers... 
Model summary: 213 layers, 1761871 parameters, 0 gradients
image 1/482 C:\Users\mahmo\freelance\mohamed\training_pipeline\frames\frame101.jpg: 512x640 (no detections), 12.0ms
image 2/482 C:\Users\mahmo\freelance\mohamed\training_pipeline\frames\frame102.jpg: 512x640 (no detections), 10.0ms
image 3/482 C:\Users\mahmo\freelan

In [4]:
retrianing.run()

## After Adjusting annotaion, we will use this enhanced dataset to train more powerful model

In [1]:
import torch
from IPython.display import Image, clear_output  # to display images
from pathlib import Path
from tqdm import tqdm
import numpy as np
import json
import urllib
import PIL.Image as Image
import cv2
import torch
import torchvision
from IPython.display import display
from sklearn.model_selection import train_test_split
import os
import glob
import shutil
from IPython.display import Image, clear_output  # to display images
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import yaml

%matplotlib inline
%config InlineBackend.figure_format='retina'
sns.set(style='whitegrid', palette='muted', font_scale=1.2)
rcParams['figure.figsize'] = 16, 10

np.random.seed(42)
clear_output()
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

Setup complete. Using torch 1.12.1 (NVIDIA GeForce GTX 1050)


In [34]:
# Read files' path in folder and save them in all data
data_path = os.path.join(config['saving_path'] ,'*g')
all_data = glob.glob(data_path)
print(len(all_data))

100


In [30]:
train, val = train_test_split(all_data, test_size=0.10, random_state=42)

In [36]:
Path('dataset/labels/train').mkdir(parents=True, exist_ok=True)
Path('dataset/labels/val').mkdir(parents=True, exist_ok=True)

Path('dataset/images/train').mkdir(parents=True, exist_ok=True)
Path('dataset/images/val').mkdir(parents=True, exist_ok=True)

In [37]:
print("start preparing Training")

for f1, file in enumerate(train):
    try:
        # move text file first so we ignore images without text file
        text_path = file.replace('jpg', 'txt')
        shutil.copy(text_path, 'dataset/labels/train')
        shutil.copy(file, 'dataset/images/train')
    except Exception as e:
        print(e)

print("start preparing validation")
for f1, file in enumerate(val):
    try:
        text_path = file.replace('jpg', 'txt')
        shutil.copy(text_path , 'dataset/labels/val')
        shutil.copy(file, 'dataset/images/val')

    except Exception as e:
        print(e)

start preparing Training
[Errno 2] No such file or directory: 'C:\\Users\\mahmo\\freelance\\mohamed\\training_pipeline\\frames\\frame26.txt'
start preparing validation
[Errno 2] No such file or directory: 'C:\\Users\\mahmo\\freelance\\mohamed\\training_pipeline\\frames\\frame0.txt'


In [38]:
os.chdir('yolov5')
# !wget https://github.com/ultralytics/yolov5/releases/download/v6.0/yolov5n6.pt
!wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5m6.pt
# !wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5n.pt
# !wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5n-7.pt
# !wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5n-7-k5.pt

--2022-09-16 14:56:59--  https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5m6.pt
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/264818686/2328d51e-5f81-49d6-b225-d704addbf92d?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20220916%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20220916T125658Z&X-Amz-Expires=300&X-Amz-Signature=483e9511d6d1aa7ed4ca9a8d1bbd0a5bfc5c54c223020a2970808e2d6d358375&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=264818686&response-content-disposition=attachment%3B%20filename%3Dyolov5m6.pt&response-content-type=application%2Foctet-stream [following]
--2022-09-16 14:57:00--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/264818686/2328d51e-5f81-49d6-b225-d704addbf92d?X-Amz-Algorithm=

 56900K .......... .......... .......... .......... .......... 80% 3.29M 4s
 56950K .......... .......... .......... .......... .......... 80% 3.50M 4s
 57000K .......... .......... .......... .......... .......... 80% 2.63M 4s
 57050K .......... .......... .......... .......... .......... 80% 3.34M 4s
 57100K .......... .......... .......... .......... .......... 80% 3.36M 4s
 57150K .......... .......... .......... .......... .......... 81% 3.35M 4s
 57200K .......... .......... .......... .......... .......... 81% 3.47M 4s
 57250K .......... .......... .......... .......... .......... 81% 3.38M 4s
 57300K .......... .......... .......... .......... .......... 81% 3.63M 4s
 57350K .......... .......... .......... .......... .......... 81% 3.43M 4s
 57400K .......... .......... .......... .......... .......... 81% 2.49M 4s
 57450K .......... .......... .......... .......... .......... 81% 3.41M 4s
 57500K .......... .......... .......... .......... .......... 81% 3.56M 4s
 57550K ....

In [40]:
with open(os.path.join(config['saving_path'],'classes.txt')) as f:
    classes = [line.rstrip() for line in f]

num_classes = len(classes)

num_classes

2

In [84]:
dict_file = dict(train= '../dataset/images/train/',
            val= "../dataset/images/val/",
            nc= num_classes,
            names= classes)

with open('./data/voc.yml', 'w') as outfile:
    yaml.dump(dict_file, outfile, default_flow_style=False)


In [81]:
# conda install -y -c conda-forge tensorboard

%load_ext tensorboard
%tensorboard --logdir runs/train

In [None]:
!python yolov5/train.py --img 640 --batch 8 --epochs 5 --data yolov5/data/voc.yaml --weights yolov5/yolov5m6.pt --cache