diff --git a/utils/datasets.py b/utils/datasets.py index 4e0c38e..85923e9 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -936,10 +936,11 @@ def verify_image_label(args): return [None, None, None, None, nm, nf, ne, nc, msg] -def dataset_stats(path='coco128.yaml', verbose=False, profile=False, hub=False): +def dataset_stats(path='coco128.yaml', autodownload=False, verbose=False, profile=False, hub=False): """ Return dataset statistics dictionary with images and instances counts per split per class To run in parent directory: export PYTHONPATH="$PWD/yolov5" - Usage: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip') + Usage1: from utils.datasets import *; dataset_stats('coco128.yaml', autodownload=True) + Usage2: from utils.datasets import *; dataset_stats('../datasets/coco128_with_yaml.zip') Arguments path: Path to data.yaml or data.zip (with data.yaml inside data.zip) autodownload: Attempt to download dataset if not found locally @@ -983,7 +984,7 @@ def hub_ops(f, max_dim=1920): data = yaml.safe_load(f) # data dict if zipped: data['path'] = data_dir # TODO: should this be dir.resolve()? - check_dataset(data, autodownload=False) + check_dataset(data, autodownload) # download dataset if missing hub_dir = Path(data['path'] + ('-hub' if hub else '')) stats = {'nc': data['nc'], 'names': data['names']} # statistics dictionary for split in 'train', 'val', 'test':