From 9fc1494471cc84f25c3d2d9c062d47cfe06bb9ac Mon Sep 17 00:00:00 2001 From: Bryon Lews Date: Tue, 15 Dec 2020 15:41:10 -0500 Subject: [PATCH 1/8] client side stuff --- client/platform/web-girder/api/viame.service.ts | 4 ++-- client/viame-web-common/apispec.ts | 2 +- client/viame-web-common/components/RunTrainingMenu.vue | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/client/platform/web-girder/api/viame.service.ts b/client/platform/web-girder/api/viame.service.ts index b51356b95..25ffac00f 100644 --- a/client/platform/web-girder/api/viame.service.ts +++ b/client/platform/web-girder/api/viame.service.ts @@ -87,8 +87,8 @@ async function getTrainingConfigurations(): Promise { return data; } -function runTraining(folderId: string, pipelineName: string, config: string) { - return girderRest.post('/viame/train', null, { params: { folderId, pipelineName, config } }); +function runTraining(folderIds: string[], pipelineName: string, config: string) { + return girderRest.post('/viame/train', null, { params: { folderIds, pipelineName, config } }); } function saveMetadata(folderId: string, metadata: object) { diff --git a/client/viame-web-common/apispec.ts b/client/viame-web-common/apispec.ts index ab50dfe31..0723e4944 100644 --- a/client/viame-web-common/apispec.ts +++ b/client/viame-web-common/apispec.ts @@ -65,7 +65,7 @@ interface Api { runPipeline(itemId: string, pipeline: string): Promise; getTrainingConfigurations(): Promise; - runTraining(folderId: string, pipelineName: string, config: string): Promise; + runTraining(folderIds: string[], pipelineName: string, config: string): Promise; loadDetections(datasetId: string): Promise<{ [key: string]: TrackData }>; saveDetections(datasetId: string, args: SaveDetectionsArgs): Promise; diff --git a/client/viame-web-common/components/RunTrainingMenu.vue b/client/viame-web-common/components/RunTrainingMenu.vue index eb3954d92..33a8e4a04 100644 --- a/client/viame-web-common/components/RunTrainingMenu.vue +++ b/client/viame-web-common/components/RunTrainingMenu.vue @@ -31,7 +31,7 @@ export default defineComponent({ selectedTrainingConfig.value = resp.default; }); - const trainingDisabled = computed(() => props.selectedDatasetIds.length !== 1); + const trainingDisabled = computed(() => props.selectedDatasetIds.length >= 1); const trainingOutputName = ref(null); const menuOpen = ref(false); @@ -46,7 +46,7 @@ export default defineComponent({ try { await runTraining( - props.selectedDatasetIds[0], + props.selectedDatasetIds, trainingOutputName.value, selectedTrainingConfig.value, ); From 4b670cd75ab2564505e91f875cc3c356220a3728 Mon Sep 17 00:00:00 2001 From: Bryon Lews Date: Wed, 16 Dec 2020 10:59:03 -0500 Subject: [PATCH 2/8] working multi training --- .../platform/web-girder/api/viame.service.ts | 2 +- .../components/RunTrainingMenu.vue | 2 +- server/viame_server/viame.py | 22 +++++++++----- server/viame_tasks/tasks.py | 29 +++++++++---------- 4 files changed, 30 insertions(+), 25 deletions(-) diff --git a/client/platform/web-girder/api/viame.service.ts b/client/platform/web-girder/api/viame.service.ts index 29dab8115..6237f7aab 100644 --- a/client/platform/web-girder/api/viame.service.ts +++ b/client/platform/web-girder/api/viame.service.ts @@ -93,7 +93,7 @@ async function getTrainingConfigurations(): Promise { } function runTraining(folderIds: string[], pipelineName: string, config: string) { - return girderRest.post('/viame/train', null, { params: { folderIds, pipelineName, config } }); + return girderRest.post('/viame/train', folderIds, { params: { pipelineName, config } }); } function saveMetadata(folderId: string, metadata: object) { diff --git a/client/viame-web-common/components/RunTrainingMenu.vue b/client/viame-web-common/components/RunTrainingMenu.vue index 33a8e4a04..c862644f1 100644 --- a/client/viame-web-common/components/RunTrainingMenu.vue +++ b/client/viame-web-common/components/RunTrainingMenu.vue @@ -31,7 +31,7 @@ export default defineComponent({ selectedTrainingConfig.value = resp.default; }); - const trainingDisabled = computed(() => props.selectedDatasetIds.length >= 1); + const trainingDisabled = computed(() => props.selectedDatasetIds.length === 0); const trainingOutputName = ref(null); const menuOpen = ref(false); diff --git a/server/viame_server/viame.py b/server/viame_server/viame.py index 6d84e06a4..ac2c36b99 100644 --- a/server/viame_server/viame.py +++ b/server/viame_server/viame.py @@ -108,8 +108,7 @@ def run_pipeline_task(self, folder, pipeline: PipelineDescription): Description("Run training on a folder") .jsonParam( "folderIds", - description="Array container folderIds", - requireObject=True, + description="Array of folderIds to run training on", paramType="body" ) .param( @@ -125,16 +124,23 @@ def run_pipeline_task(self, folder, pipeline: PipelineDescription): required=True, ) ) - def run_training(self, folders, pipelineName, config): + def run_training(self, folderIds, pipelineName, config): user = self.getCurrentUser() token = Token().createToken(user=user, days=14) - delection_list = [] + detection_list = [] folder_list = [] - for folderId in folders: - folder = Folder().findOne({"_id": folderId}) + folder_names = [] + if folderIds is None or len(folderIds) == 0: + raise Exception("No folderIds in param") + + for folderId in folderIds: + folder = Folder().load(folderId, level=AccessType.READ, user=user) + if folder is None: + raise Exception(f"Cannot access folder {folderId}") + folder_names.append(folder['name']) detections = list( - Item().find({"meta.detection": str(folder["_id"])}).sort([("created", -1)]) + Item().find({"meta.detection": str(folderId)}).sort([("created", -1)]) ) detection = detections[0] if detections else None @@ -158,7 +164,7 @@ def run_training(self, folders, pipelineName, config): pipeline_name=pipelineName, config=config, girder_client_token=str(token["_id"]), - girder_job_title=(f"Running training on folder: {str(folder['name'])}"), + girder_job_title=(f"Running training on folder: {', '.join(folder_names)}"), girder_job_type="training", ), ) diff --git a/server/viame_tasks/tasks.py b/server/viame_tasks/tasks.py index dec55d5d0..a771ebd42 100644 --- a/server/viame_tasks/tasks.py +++ b/server/viame_tasks/tasks.py @@ -202,7 +202,6 @@ def train_pipeline( gc: GirderClient = self.girder_client manager: JobManager = self.job_manager - viame_install_path = Path(conf.viame_install_path) pipeline_base_path = Path(conf.pipeline_base_path) training_executable = viame_install_path / "bin" / "viame_train_detector" @@ -213,6 +212,7 @@ def train_pipeline( if len(source_folder_list) != len(groundtruth_list): raise Exception("Ground truth doesn't exist for all folders") + trained_on_list = [] # root_data_dir is the directory passed to `viame_train_detector` with tempfile.TemporaryDirectory() as _temp_dir_string: manager.updateStatus(JobStatus.FETCHING_INPUT) @@ -222,21 +222,20 @@ def train_pipeline( for index in range(len(source_folder_list)): source_folder = source_folder_list[index] groundtruth = groundtruth_list[index] - with tempfile.TemporaryDirectory() as _sub_dir_string: - manager.updateStatus(JobStatus.FETCHING_INPUT) - sub_data_dir = Path(_sub_dir_string) - download_path = Path(tempfile.mkdtemp(dir=sub_data_dir)) + download_path = Path(tempfile.mkdtemp(dir=root_data_dir)) + print(download_path) - training_data = gc.listItem(source_folder["_id"]) + training_data = gc.listItem(source_folder["_id"]) + trained_on_list.append(str(source_folder["_id"])) - # Download data onto server - gc.downloadItem(str(groundtruth["_id"]), download_path) - for item in training_data: - gc.downloadItem(str(item["_id"]), download_path) + # Download data onto server + gc.downloadItem(str(groundtruth["_id"]), download_path) + for item in training_data: + gc.downloadItem(str(item["_id"]), download_path) - # Organize data - groundtruth_path = download_path / groundtruth["name"] - organize_folder_for_training(root_data_dir, download_path, groundtruth_path) + # Organize data + groundtruth_path = download_path / groundtruth["name"] + organize_folder_for_training(root_data_dir, download_path, groundtruth_path) # Completely separate directory from `root_data_dir` with tempfile.TemporaryDirectory() as _training_output_path: @@ -252,7 +251,7 @@ def train_pipeline( process_log_file = tempfile.TemporaryFile() process_err_file = tempfile.TemporaryFile() - + print(" ".join(command)) manager.updateStatus(JobStatus.RUNNING) # Call viame_train_detector process = Popen( @@ -290,7 +289,7 @@ def train_pipeline( pipeline_name, metadata={ "trained_pipeline": True, - "trained_on": str(source_folder["_id"]), + "trained_on": trained_on_list, }, ) From 7e1f6a3bd1d0fb0a43d9bf052d3a8a250c6b125f Mon Sep 17 00:00:00 2001 From: BryonLewis Date: Wed, 16 Dec 2020 13:51:18 -0500 Subject: [PATCH 3/8] mend --- server/viame_server/viame.py | 6 ++++-- server/viame_tasks/tasks.py | 2 -- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/viame_server/viame.py b/server/viame_server/viame.py index ac2c36b99..e7859bef3 100644 --- a/server/viame_server/viame.py +++ b/server/viame_server/viame.py @@ -109,7 +109,7 @@ def run_pipeline_task(self, folder, pipeline: PipelineDescription): .jsonParam( "folderIds", description="Array of folderIds to run training on", - paramType="body" + paramType="body", ) .param( "pipelineName", @@ -164,7 +164,9 @@ def run_training(self, folderIds, pipelineName, config): pipeline_name=pipelineName, config=config, girder_client_token=str(token["_id"]), - girder_job_title=(f"Running training on folder: {', '.join(folder_names)}"), + girder_job_title=( + f"Running training on folder: {', '.join(folder_names)}" + ), girder_job_type="training", ), ) diff --git a/server/viame_tasks/tasks.py b/server/viame_tasks/tasks.py index a771ebd42..4bb76fd4c 100644 --- a/server/viame_tasks/tasks.py +++ b/server/viame_tasks/tasks.py @@ -223,8 +223,6 @@ def train_pipeline( source_folder = source_folder_list[index] groundtruth = groundtruth_list[index] download_path = Path(tempfile.mkdtemp(dir=root_data_dir)) - print(download_path) - training_data = gc.listItem(source_folder["_id"]) trained_on_list.append(str(source_folder["_id"])) From 5b17e1aff622ad46b6261dcb08df517da3d998f6 Mon Sep 17 00:00:00 2001 From: BryonLewis Date: Mon, 21 Dec 2020 12:28:01 -0500 Subject: [PATCH 4/8] Adding in the new folder/groundtruth list --- server/viame_tasks/tasks.py | 23 +++++++++++++++++++---- server/viame_tasks/utils.py | 16 +--------------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/server/viame_tasks/tasks.py b/server/viame_tasks/tasks.py index 4bb76fd4c..1f7280056 100644 --- a/server/viame_tasks/tasks.py +++ b/server/viame_tasks/tasks.py @@ -213,6 +213,7 @@ def train_pipeline( raise Exception("Ground truth doesn't exist for all folders") trained_on_list = [] + input_groundtruth_list = [] # root_data_dir is the directory passed to `viame_train_detector` with tempfile.TemporaryDirectory() as _temp_dir_string: manager.updateStatus(JobStatus.FETCHING_INPUT) @@ -233,7 +234,19 @@ def train_pipeline( # Organize data groundtruth_path = download_path / groundtruth["name"] - organize_folder_for_training(root_data_dir, download_path, groundtruth_path) + groundtruth_file = organize_folder_for_training( + root_data_dir, download_path, groundtruth_path + ) + input_groundtruth_list.append([download_path, groundtruth_file]) + + input_folder_file_list = root_data_dir / "input_folder_list.txt" + ground_truth_file_list = root_data_dir / "input_truth_list.txt" + with open(input_folder_file_list, "w+") as data_list: + folder_paths = [f"{item[0]}\n" for item in input_groundtruth_list] + data_list.writelines(folder_paths) + with open(ground_truth_file_list, "w+") as truth_list: + truth_paths = [f"{item[1]}\n" for item in input_groundtruth_list] + truth_list.writelines(truth_paths) # Completely separate directory from `root_data_dir` with tempfile.TemporaryDirectory() as _training_output_path: @@ -241,15 +254,17 @@ def train_pipeline( command = [ f". {conf.viame_install_path}/setup_viame.sh &&", str(training_executable), - "-i", - str(root_data_dir), + "-il", + str(input_folder_file_list), + "-it", + str(ground_truth_file_list), "-c", str(config_file), + "--no-query", ] process_log_file = tempfile.TemporaryFile() process_err_file = tempfile.TemporaryFile() - print(" ".join(command)) manager.updateStatus(JobStatus.RUNNING) # Call viame_train_detector process = Popen( diff --git a/server/viame_tasks/utils.py b/server/viame_tasks/utils.py index 08d2423cd..c9721e738 100644 --- a/server/viame_tasks/utils.py +++ b/server/viame_tasks/utils.py @@ -66,18 +66,4 @@ def organize_folder_for_training( groundtruth = data_dir / "groundtruth.csv" shutil.move(str(downloaded_groundtruth), groundtruth) - # Generate labels.txt - labels = set() - with open(groundtruth, 'r') as groundtruth_infile: - for line in groundtruth_infile.readlines(): - if not line.strip().startswith('#'): - row = [c.strip() for c in line.split(",")] - - # Confidence pairs start at the 9th index - # 9th index is label, 10th is confidence, 11th is another label, etc. - for label in row[9::2]: - labels.add(label) - - with open(root_training_dir / "labels.txt", "a+") as labels_file: - label_lines = [f"{label}\n" for label in labels] - labels_file.writelines(label_lines) + return groundtruth From c94b7d98e26998e2f75dcc3b676e976b36a3aa74 Mon Sep 17 00:00:00 2001 From: BryonLewis <61746913+BryonLewis@users.noreply.github.com> Date: Mon, 21 Dec 2020 19:05:24 -0500 Subject: [PATCH 5/8] Update server/viame_tasks/tasks.py Co-authored-by: Jacob Nesbitt --- server/viame_tasks/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/viame_tasks/tasks.py b/server/viame_tasks/tasks.py index 1f7280056..cfab5071a 100644 --- a/server/viame_tasks/tasks.py +++ b/server/viame_tasks/tasks.py @@ -192,7 +192,7 @@ def train_pipeline( """ Train a pipeline by making a call to viame_train_detector - :param source_folder_list: The Girder Folder to pull training data from + :param source_folder_list: The Girder Folders to pull training data from :param results_folder: The Girder Folder to place the results of training into :param groundtruth_list: The relative path to either the file containing detections, or the folder containing that file. From ce92ba4a1e98773d626ba937a35266da291dcc8d Mon Sep 17 00:00:00 2001 From: BryonLewis <61746913+BryonLewis@users.noreply.github.com> Date: Mon, 21 Dec 2020 19:05:36 -0500 Subject: [PATCH 6/8] Update server/viame_tasks/tasks.py Co-authored-by: Jacob Nesbitt --- server/viame_tasks/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/viame_tasks/tasks.py b/server/viame_tasks/tasks.py index cfab5071a..d6cea2043 100644 --- a/server/viame_tasks/tasks.py +++ b/server/viame_tasks/tasks.py @@ -194,8 +194,8 @@ def train_pipeline( :param source_folder_list: The Girder Folders to pull training data from :param results_folder: The Girder Folder to place the results of training into - :param groundtruth_list: The relative path to either the file containing detections, - or the folder containing that file. + :param groundtruth_list: A list of relative paths to either a file containing detections, + or a folder containing that file. :param pipeline_name: The base name of the resulting pipeline. """ conf = Config() From bcada290243904c5ed4506de887b66e77c46c45e Mon Sep 17 00:00:00 2001 From: BryonLewis <61746913+BryonLewis@users.noreply.github.com> Date: Mon, 21 Dec 2020 19:06:01 -0500 Subject: [PATCH 7/8] Update server/viame_tasks/tasks.py Co-authored-by: Jacob Nesbitt --- server/viame_tasks/tasks.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/viame_tasks/tasks.py b/server/viame_tasks/tasks.py index d6cea2043..e61ca3d4d 100644 --- a/server/viame_tasks/tasks.py +++ b/server/viame_tasks/tasks.py @@ -219,14 +219,15 @@ def train_pipeline( manager.updateStatus(JobStatus.FETCHING_INPUT) root_data_dir = Path(_temp_dir_string) - # Generator of items for index in range(len(source_folder_list)): source_folder = source_folder_list[index] groundtruth = groundtruth_list[index] - download_path = Path(tempfile.mkdtemp(dir=root_data_dir)) - training_data = gc.listItem(source_folder["_id"]) + download_path = Path(tempfile.mkdtemp(dir=root_data_dir) trained_on_list.append(str(source_folder["_id"])) + # Generator of items + training_data = gc.listItem(source_folder["_id"]) + # Download data onto server gc.downloadItem(str(groundtruth["_id"]), download_path) for item in training_data: From cbd9e993d3039055a0c7e393d19ee564f8b01fa5 Mon Sep 17 00:00:00 2001 From: BryonLewis Date: Mon, 21 Dec 2020 19:56:47 -0500 Subject: [PATCH 8/8] Fixing comments and paranethesis --- server/viame_tasks/tasks.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/server/viame_tasks/tasks.py b/server/viame_tasks/tasks.py index e61ca3d4d..95c4a4d3c 100644 --- a/server/viame_tasks/tasks.py +++ b/server/viame_tasks/tasks.py @@ -212,8 +212,10 @@ def train_pipeline( if len(source_folder_list) != len(groundtruth_list): raise Exception("Ground truth doesn't exist for all folders") - trained_on_list = [] - input_groundtruth_list = [] + # List of folderIds used for training + trained_on_list: List[str] = [] + # List of[input folder / ground truth file] pairs for creating input lists + input_groundtruth_list: List[[Path, Path]] = [] # root_data_dir is the directory passed to `viame_train_detector` with tempfile.TemporaryDirectory() as _temp_dir_string: manager.updateStatus(JobStatus.FETCHING_INPUT) @@ -222,7 +224,7 @@ def train_pipeline( for index in range(len(source_folder_list)): source_folder = source_folder_list[index] groundtruth = groundtruth_list[index] - download_path = Path(tempfile.mkdtemp(dir=root_data_dir) + download_path = Path(tempfile.mkdtemp(dir=root_data_dir)) trained_on_list.append(str(source_folder["_id"])) # Generator of items