From 95b6d721820defc0f63af8ed3a116840c8f0fbdb Mon Sep 17 00:00:00 2001 From: Yongrae Jo Date: Sun, 3 Nov 2019 22:54:31 +0900 Subject: [PATCH 1/7] #452 Fix ValueError --- pytorch_lightning/root_module/memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/root_module/memory.py b/pytorch_lightning/root_module/memory.py index d16d2659405cf..513477406328a 100644 --- a/pytorch_lightning/root_module/memory.py +++ b/pytorch_lightning/root_module/memory.py @@ -202,7 +202,7 @@ def get_memory_profile(mode): min_k = None max_mem = 0 max_k = None - for k, v in memory_map: + for k, v in memory_map.items(): if v > max_mem: max_mem = v max_k = k From 9d7b421b8f53d7505715f466db59c02d65033f1d Mon Sep 17 00:00:00 2001 From: Yongrae Jo Date: Sun, 3 Nov 2019 23:10:32 +0900 Subject: [PATCH 2/7] #452 Use subprocess.run --- pytorch_lightning/root_module/memory.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pytorch_lightning/root_module/memory.py b/pytorch_lightning/root_module/memory.py index 513477406328a..943ba13012dbe 100644 --- a/pytorch_lightning/root_module/memory.py +++ b/pytorch_lightning/root_module/memory.py @@ -224,13 +224,17 @@ def get_gpu_memory_map(): Keys are device ids as integers. Values are memory usage as integers in MB. """ - result = subprocess.check_output( + result = subprocess.run( [ - 'nvidia-smi', '--query-gpu=memory.used', - '--format=csv,nounits,noheader' - ], encoding='utf-8') + 'nvidia-smi', + '--query-gpu=memory.used', + '--format=csv,nounits,noheader', + ], + encoding='utf-8', + capture_output=True, + check=True) # Convert lines into a dictionary - gpu_memory = [int(x) for x in result.strip().split('\n')] + gpu_memory = [int(x) for x in result.stdout.strip().split('\n')] gpu_memory_map = {} for k, v in zip(range(len(gpu_memory)), gpu_memory): k = f'gpu_{k}' From 79c29962b090f175cba571d17ef068c9fa26b755 Mon Sep 17 00:00:00 2001 From: Yongrae Jo Date: Sun, 3 Nov 2019 23:14:20 +0900 Subject: [PATCH 3/7] #452 Simplify code for gpu_memory_map --- pytorch_lightning/root_module/memory.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pytorch_lightning/root_module/memory.py b/pytorch_lightning/root_module/memory.py index 943ba13012dbe..daf250b11ad3e 100644 --- a/pytorch_lightning/root_module/memory.py +++ b/pytorch_lightning/root_module/memory.py @@ -235,10 +235,7 @@ def get_gpu_memory_map(): check=True) # Convert lines into a dictionary gpu_memory = [int(x) for x in result.stdout.strip().split('\n')] - gpu_memory_map = {} - for k, v in zip(range(len(gpu_memory)), gpu_memory): - k = f'gpu_{k}' - gpu_memory_map[k] = v + gpu_memory_map = {f'gpu_{index}': utilization for index, utilization in enumerate(gpu_memory)} return gpu_memory_map From 7ded8fa9744db5ddab96605c4b9379064aa5f185 Mon Sep 17 00:00:00 2001 From: Yongrae Jo Date: Sun, 3 Nov 2019 23:18:26 +0900 Subject: [PATCH 4/7] #452 Simplify code for min max memory --- pytorch_lightning/root_module/memory.py | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/pytorch_lightning/root_module/memory.py b/pytorch_lightning/root_module/memory.py index daf250b11ad3e..3b968bb3bfdeb 100644 --- a/pytorch_lightning/root_module/memory.py +++ b/pytorch_lightning/root_module/memory.py @@ -198,19 +198,10 @@ def get_memory_profile(mode): memory_map = get_gpu_memory_map() if mode == 'min_max': - min_mem = 1000000 - min_k = None - max_mem = 0 - max_k = None - for k, v in memory_map.items(): - if v > max_mem: - max_mem = v - max_k = k - if v < min_mem: - min_mem = v - min_k = k - - memory_map = {min_k: min_mem, max_k: max_mem} + min_index, min_memory = min(memory_map.items(), key=lambda item: item[1]) + max_index, max_memory = max(memory_map.items(), key=lambda item: item[1]) + + memory_map = {min_index: min_memory, max_index: max_memory} return memory_map @@ -235,7 +226,7 @@ def get_gpu_memory_map(): check=True) # Convert lines into a dictionary gpu_memory = [int(x) for x in result.stdout.strip().split('\n')] - gpu_memory_map = {f'gpu_{index}': utilization for index, utilization in enumerate(gpu_memory)} + gpu_memory_map = {f'gpu_{index}': memory for index, memory in enumerate(gpu_memory)} return gpu_memory_map From eab2fe7794de3a8d1fce98df954ba79d9d0d1852 Mon Sep 17 00:00:00 2001 From: Yongrae Jo Date: Sun, 3 Nov 2019 23:25:13 +0900 Subject: [PATCH 5/7] #452 Add test for get_memory_profile --- tests/test_gpu_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_gpu_models.py b/tests/test_gpu_models.py index 118a3f2a521d3..17f8a8fd41b5d 100644 --- a/tests/test_gpu_models.py +++ b/tests/test_gpu_models.py @@ -224,7 +224,7 @@ def test_multi_gpu_model_dp(): testing_utils.run_gpu_model_test(trainer_options, model, hparams) # test memory helper functions - memory.get_gpu_memory_map() + memory.get_memory_profile('min_max') def test_ddp_sampler_error(): From 7c377eb37cab527f690828df01e01296262bc23d Mon Sep 17 00:00:00 2001 From: Yongrae Jo Date: Tue, 5 Nov 2019 00:02:50 +0900 Subject: [PATCH 6/7] #452 Use os.sep --- pytorch_lightning/root_module/memory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pytorch_lightning/root_module/memory.py b/pytorch_lightning/root_module/memory.py index 3b968bb3bfdeb..0d7cdf087799c 100644 --- a/pytorch_lightning/root_module/memory.py +++ b/pytorch_lightning/root_module/memory.py @@ -3,6 +3,7 @@ ''' import gc +import os import subprocess import numpy as np @@ -225,7 +226,7 @@ def get_gpu_memory_map(): capture_output=True, check=True) # Convert lines into a dictionary - gpu_memory = [int(x) for x in result.stdout.strip().split('\n')] + gpu_memory = [int(x) for x in result.stdout.strip().split(os.sep)] gpu_memory_map = {f'gpu_{index}': memory for index, memory in enumerate(gpu_memory)} return gpu_memory_map From dacc4de727c6329a6773eaf0d3e14a209eaab954 Mon Sep 17 00:00:00 2001 From: Yongrae Jo Date: Tue, 5 Nov 2019 00:15:01 +0900 Subject: [PATCH 7/7] #452 Use os.linesep --- pytorch_lightning/root_module/memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch_lightning/root_module/memory.py b/pytorch_lightning/root_module/memory.py index 0d7cdf087799c..53203e7ae6a01 100644 --- a/pytorch_lightning/root_module/memory.py +++ b/pytorch_lightning/root_module/memory.py @@ -226,7 +226,7 @@ def get_gpu_memory_map(): capture_output=True, check=True) # Convert lines into a dictionary - gpu_memory = [int(x) for x in result.stdout.strip().split(os.sep)] + gpu_memory = [int(x) for x in result.stdout.strip().split(os.linesep)] gpu_memory_map = {f'gpu_{index}': memory for index, memory in enumerate(gpu_memory)} return gpu_memory_map