From f78ce4e60aa4db678149705737cb104e7d83e387 Mon Sep 17 00:00:00 2001
From: John Calderon <81483067+johncalesp@users.noreply.github.com>
Date: Tue, 9 May 2023 11:29:35 -0400
Subject: [PATCH] added error messages and memory reset (#53)

* added error messages and memory reset

* renamed utils
---
 deepview_profile/analysis/runner.py        |  12 +-
 deepview_profile/analysis/session.py       | 269 +++++++++++----------
 deepview_profile/profiler/iteration.py     |   4 +-
 deepview_profile/protocol_gen/innpv_pb2.py | 110 +++++----
 deepview_profile/utils.py                  |  10 +
 protocol/innpv.proto                       |   2 +
 6 files changed, 229 insertions(+), 178 deletions(-)
 create mode 100644 deepview_profile/utils.py

diff --git a/deepview_profile/analysis/runner.py b/deepview_profile/analysis/runner.py
index d73101c..bd5aac8 100644
--- a/deepview_profile/analysis/runner.py
+++ b/deepview_profile/analysis/runner.py
@@ -5,23 +5,23 @@
 import torch
 from deepview_profile.analysis.session import AnalysisSession
 from deepview_profile.nvml import NVML
-
+from deepview_profile.utils import release_memory
 
 def analyze_project(project_root, entry_point, nvml):
-    torch.cuda.empty_cache()
+    release_memory()
     session = AnalysisSession.new_from(project_root, entry_point)
     yield session.measure_breakdown(nvml)
-    torch.cuda.empty_cache()
+    release_memory()
     yield session.measure_throughput()
-    torch.cuda.empty_cache()
+    release_memory()
 
     print("analyze_project: running deepview_predict()")
     yield session.habitat_predict()
-    torch.cuda.empty_cache()
+    release_memory()
 
     print("analyze_project: running energy_compute()")
     yield session.energy_compute()
-    torch.cuda.empty_cache()
+    release_memory()
 
 
 def main():
diff --git a/deepview_profile/analysis/session.py b/deepview_profile/analysis/session.py
index cc12088..3e92150 100644
--- a/deepview_profile/analysis/session.py
+++ b/deepview_profile/analysis/session.py
@@ -150,47 +150,53 @@ def energy_compute(self) -> pm.EnergyResponse:
             for _ in range(iterations):
                 iteration(*inputs)
             energy_measurer.end_measurement()
-        except PermissionError as err:
-            # Remind user to set their CPU permissions
-            print(err)
+            resp.total_consumption = energy_measurer.total_energy()/float(iterations)
+            resp.batch_size = self._batch_size
+
+            components = []
+            components_joules = []
+
+            if energy_measurer.cpu_energy() is not None:
+                cpu_component = pm.EnergyConsumptionComponent()
+                cpu_component.component_type = pm.ENERGY_CPU_DRAM
+                cpu_component.consumption_joules = energy_measurer.cpu_energy()/float(iterations)
+                components.append(cpu_component)
+                components_joules.append(cpu_component.consumption_joules)
+            else:
+                cpu_component = pm.EnergyConsumptionComponent()
+                cpu_component.component_type = pm.ENERGY_CPU_DRAM
+                cpu_component.consumption_joules = 0.0
+                components.append(cpu_component)
+                components_joules.append(cpu_component.consumption_joules)
             
-        resp.total_consumption = energy_measurer.total_energy()/float(iterations)
-        resp.batch_size = self._batch_size
-
-        components = []
-        components_joules = []
-
-        if energy_measurer.cpu_energy() is not None:
-            cpu_component = pm.EnergyConsumptionComponent()
-            cpu_component.component_type = pm.ENERGY_CPU_DRAM
-            cpu_component.consumption_joules = energy_measurer.cpu_energy()/float(iterations)
-            components.append(cpu_component)
-            components_joules.append(cpu_component.consumption_joules)
-        else:
-            cpu_component = pm.EnergyConsumptionComponent()
-            cpu_component.component_type = pm.ENERGY_CPU_DRAM
-            cpu_component.consumption_joules = 0.0
-            components.append(cpu_component)
-            components_joules.append(cpu_component.consumption_joules)
+            gpu_component = pm.EnergyConsumptionComponent()
+            gpu_component.component_type = pm.ENERGY_NVIDIA
+            gpu_component.consumption_joules = energy_measurer.gpu_energy()/float(iterations)
+            components.append(gpu_component)
+            components_joules.append(gpu_component.consumption_joules)
+            
+            resp.components.extend(components)
         
-        gpu_component = pm.EnergyConsumptionComponent()
-        gpu_component.component_type = pm.ENERGY_NVIDIA
-        gpu_component.consumption_joules = energy_measurer.gpu_energy()/float(iterations)
-        components.append(gpu_component)
-        components_joules.append(gpu_component.consumption_joules)
+            # get last 10 runs if they exist
+            path_to_entry_point = os.path.join(self._project_root, self._entry_point)
+            past_runs = self._energy_table_interface.get_latest_n_entries_of_entry_point(10, path_to_entry_point)
+            resp.past_measurements.extend(_convert_to_energy_responses(past_runs))
+
+            # add current run to database
+            current_entry = [path_to_entry_point] + components_joules
+            current_entry.append(self._batch_size)
+            self._energy_table_interface.add_entry(current_entry)
+        except AnalysisError as ex:
+            message = str(ex)
+            logger.error(message)
+            resp.analysis_error.error_message = message
+        except:
+            logger.error("There was an error obtaining energy measurements")
+            resp.analysis_error.error_message = "There was an error obtaining energy measurements"
+        finally:
+            return resp
+            
         
-        resp.components.extend(components)
-    
-        # get last 10 runs if they exist
-        path_to_entry_point = os.path.join(self._project_root, self._entry_point)
-        past_runs = self._energy_table_interface.get_latest_n_entries_of_entry_point(10, path_to_entry_point)
-        resp.past_measurements.extend(_convert_to_energy_responses(past_runs))
-
-        # add current run to database
-        current_entry = [path_to_entry_point] + components_joules
-        current_entry.append(self._batch_size)
-        self._energy_table_interface.add_entry(current_entry)
-        return resp
 
     def habitat_compute_threshold(self, runnable, context):
         tracker = habitat.OperationTracker(context.origin_device)
@@ -210,102 +216,115 @@ def habitat_compute_threshold(self, runnable, context):
 
 
     def habitat_predict(self):
-        resp = pm.HabitatResponse()
+        resp = pm.HabitatResponse() 
         if not habitat_found: 
             logger.debug("Skipping deepview predictions, returning empty response.")
             return resp
 
-        print("deepview_predict: begin")
-        DEVICES = [
-            habitat.Device.P100,
-            habitat.Device.P4000,
-            habitat.Device.RTX2070,
-            habitat.Device.RTX2080Ti,
-            habitat.Device.T4,
-            habitat.Device.V100,
-            habitat.Device.A100,
-            habitat.Device.RTX3090,
-            habitat.Device.A40,
-            habitat.Device.A4000,
-            habitat.Device.RTX4000
-        ]
-
-        # Detect source GPU
-        pynvml.nvmlInit()
-        if pynvml.nvmlDeviceGetCount() == 0:
-            raise Exception("NVML failed to find a GPU. PLease ensure that you have a NVIDIA GPU installed and that the drivers are functioning correctly.")
-
-        # TODO: Consider profiling on not only the first detected GPU
-        nvml_handle = pynvml.nvmlDeviceGetHandleByIndex(0)
-        source_device_name = pynvml.nvmlDeviceGetName(nvml_handle).decode("utf-8")
-        split_source_device_name = re.split(r"-|\s|_|\\|/", source_device_name)
-        source_device = None if logging.root.level > logging.DEBUG else habitat.Device.T4
-        for device in DEVICES:
-            if device.name in split_source_device_name:
-                source_device = device
-        pynvml.nvmlShutdown()
-        if not source_device:
-            logger.debug("Skipping DeepView predictions, source not in list of supported GPUs.")
-            src = pm.HabitatDevicePrediction()
-            src.device_name = 'unavailable'
-            src.runtime_ms = -1
-            resp.predictions.append(src)
-            return resp
-
-        print("deepview_predict: detected source device", source_device.name)
+        try:
+            print("deepview_predict: begin")
+            DEVICES = [
+                habitat.Device.P100,
+                habitat.Device.P4000,
+                habitat.Device.RTX2070,
+                habitat.Device.RTX2080Ti,
+                habitat.Device.T4,
+                habitat.Device.V100,
+                habitat.Device.A100,
+                habitat.Device.RTX3090,
+                habitat.Device.A40,
+                habitat.Device.A4000,
+                habitat.Device.RTX4000
+            ]
+
+            # Detect source GPU
+            pynvml.nvmlInit()
+            if pynvml.nvmlDeviceGetCount() == 0:
+                raise Exception("NVML failed to find a GPU. PLease ensure that you have a NVIDIA GPU installed and that the drivers are functioning correctly.")
+
+            # TODO: Consider profiling on not only the first detected GPU
+            nvml_handle = pynvml.nvmlDeviceGetHandleByIndex(0)
+            source_device_name = pynvml.nvmlDeviceGetName(nvml_handle).decode("utf-8")
+            split_source_device_name = re.split(r"-|\s|_|\\|/", source_device_name)
+            source_device = None if logging.root.level > logging.DEBUG else habitat.Device.T4
+            for device in DEVICES:
+                if device.name in split_source_device_name:
+                    source_device = device
+            pynvml.nvmlShutdown()
+            if not source_device:
+                logger.debug("Skipping DeepView predictions, source not in list of supported GPUs.")
+                src = pm.HabitatDevicePrediction()
+                src.device_name = 'unavailable'
+                src.runtime_ms = -1
+                resp.predictions.append(src)
+                return resp
+
+            print("deepview_predict: detected source device", source_device.name)
+
+            # get model
+            model = self._model_provider()
+            inputs = self._input_provider()
+            iteration = self._iteration_provider(model)
 
-        # get model
-        model = self._model_provider()
-        inputs = self._input_provider()
-        iteration = self._iteration_provider(model)
+            def runnable():
+                iteration(*inputs)
 
-        def runnable():
-            iteration(*inputs)
+            profiler = RunTimeProfiler()
 
-        profiler = RunTimeProfiler()
+            context = Context(
+                origin_device=source_device,
+                profiler=profiler,
+                percentile=99.5
+            )
 
-        context = Context(
-            origin_device=source_device,
-            profiler=profiler,
-            percentile=99.5
-        )
+            threshold = self.habitat_compute_threshold(runnable, context)
+            
+            tracker = habitat.OperationTracker(
+                device=context.origin_device,
+                metrics=[
+                    habitat.Metric.SinglePrecisionFLOPEfficiency,
+                    habitat.Metric.DRAMReadBytes,
+                    habitat.Metric.DRAMWriteBytes,
+                ],
+                metrics_threshold_ms=threshold,
+            )
 
-        threshold = self.habitat_compute_threshold(runnable, context)
         
-        tracker = habitat.OperationTracker(
-            device=context.origin_device,
-            metrics=[
-                habitat.Metric.SinglePrecisionFLOPEfficiency,
-                habitat.Metric.DRAMReadBytes,
-                habitat.Metric.DRAMWriteBytes,
-            ],
-            metrics_threshold_ms=threshold,
-        )
-
-        with tracker.track():
-            iteration(*inputs)
-
-        print("deepview_predict: tracing on origin device")
-        trace = tracker.get_tracked_trace()
-
-        src = pm.HabitatDevicePrediction()
-        src.device_name = 'source'
-        src.runtime_ms = trace.run_time_ms
-        resp.predictions.append(src)
-
-        for device in DEVICES:
-            print("deepview_predict: predicting for", device)
-            predicted_trace = trace.to_device(device)
-
-            pred = pm.HabitatDevicePrediction()
-            pred.device_name = device.name
-            pred.runtime_ms = predicted_trace.run_time_ms
-            resp.predictions.append(pred)
+            with tracker.track():
+                iteration(*inputs)
+            
+            print("deepview_predict: tracing on origin device")
+            trace = tracker.get_tracked_trace()
 
-        print(f"returning {len(resp.predictions)} predictions.")
+            src = pm.HabitatDevicePrediction()
+            src.device_name = 'source'
+            src.runtime_ms = trace.run_time_ms
+            resp.predictions.append(src)
 
-        return resp 
+            for device in DEVICES:
+                print("deepview_predict: predicting for", device)
+                predicted_trace = trace.to_device(device)
+
+                pred = pm.HabitatDevicePrediction()
+                pred.device_name = device.name
+                pred.runtime_ms = predicted_trace.run_time_ms
+                resp.predictions.append(pred)
+
+            print(f"returning {len(resp.predictions)} predictions.")
+        except AnalysisError as ex:
+            message = str(ex)
+            logger.error(message)
+            resp.analysis_error.error_message = message
+        except:
+            logger.error("There was an error running DeepView Predict")
+            resp.analysis_error.error_message = "There was an error running DeepView Predict"
+        finally:
+            return resp
+                
 
+        
+       
+        
 
     def measure_breakdown(self, nvml):
         # 1. Measure the breakdown entries
@@ -361,6 +380,7 @@ def measure_throughput(self):
             )
 
         # 2. Begin filling in the throughput response
+        logger.debug("sampling results", samples)
         measured_throughput = (
             samples[0].batch_size / samples[0].run_time_ms * 1000
         )
@@ -405,18 +425,19 @@ def measure_throughput(self):
         throughput.peak_usage_bytes.bias = peak_usage_model[1]
 
         predicted_max_throughput = 1000.0 / run_time_model[0]
-
+        
         # Our prediction can be inaccurate due to sampling error or incorrect
         # assumptions. In these cases, we ignore our prediction. At the very
         # minimum, a good linear model has a positive slope and bias.
-        if (run_time_model[0] < 1e-3 or run_time_model[1] < 1e-3 or
+        #if (run_time_model[0] < 1e-3 or run_time_model[1] < 1e-3 or
+        if (run_time_model[0] < 1e-3 or
                 measured_throughput > predicted_max_throughput):
             return throughput
 
         throughput.predicted_max_samples_per_second = predicted_max_throughput
         throughput.run_time_ms.slope = run_time_model[0]
         throughput.run_time_ms.bias = run_time_model[1]
-
+        
         return throughput
 
     def measure_peak_usage_bytes(self):
diff --git a/deepview_profile/profiler/iteration.py b/deepview_profile/profiler/iteration.py
index 824b453..831cc11 100644
--- a/deepview_profile/profiler/iteration.py
+++ b/deepview_profile/profiler/iteration.py
@@ -1,10 +1,10 @@
 import collections
 import logging
-
 import torch
 
 from deepview_profile.exceptions import AnalysisError
 from deepview_profile.user_code_utils import user_code_environment
+from deepview_profile.utils import release_memory
 
 logger = logging.getLogger(__name__)
 
@@ -49,6 +49,7 @@ def measure_run_time_ms(self, batch_size, initial_repetitions=None):
         NOTE: This method will raise a RuntimeError if there is not enough GPU
               memory to run the iteration.
         """
+
         with user_code_environment(
                 self._path_to_entry_point_dir, self._project_root):
             inputs = self._input_provider(batch_size=batch_size)
@@ -111,6 +112,7 @@ def measure_run_time_ms_catch_oom(
             self, batch_size, initial_repetitions=None):
         # This function is useful when we want to explicitly handle OOM errors
         # without aborting the profiling.
+        release_memory()
         try:
             return (
                 None,
diff --git a/deepview_profile/protocol_gen/innpv_pb2.py b/deepview_profile/protocol_gen/innpv_pb2.py
index 967c333..e2b266b 100644
--- a/deepview_profile/protocol_gen/innpv_pb2.py
+++ b/deepview_profile/protocol_gen/innpv_pb2.py
@@ -20,7 +20,7 @@
   syntax='proto3',
   serialized_options=None,
   create_key=_descriptor._internal_create_key,
-  serialized_pb=b'\n\x0binnpv.proto\x12\x0einnpv.protocol\"\xcf\x01\n\nFromClient\x12\x17\n\x0fsequence_number\x18\x01 \x01(\r\x12\x37\n\ninitialize\x18\x02 \x01(\x0b\x32!.innpv.protocol.InitializeRequestH\x00\x12\x33\n\x08\x61nalysis\x18\x03 \x01(\x0b\x32\x1f.innpv.protocol.AnalysisRequestH\x00\x12/\n\x07generic\x18\x04 \x01(\x0b\x32\x1c.innpv.protocol.GenericEventH\x00\x42\t\n\x07payload\">\n\x0cGenericEvent\x12\x12\n\nevent_type\x18\x01 \x01(\t\x12\x1a\n\x12optional_arguments\x18\x02 \x01(\t\"X\n\x11InitializeRequest\x12\x18\n\x10protocol_version\x18\x01 \x01(\r\x12\x14\n\x0cproject_root\x18\x02 \x01(\t\x12\x13\n\x0b\x65ntry_point\x18\x03 \x01(\t\"(\n\x0f\x41nalysisRequest\x12\x15\n\rmock_response\x18\x01 \x01(\x08\"\xcf\x03\n\nFromServer\x12\x17\n\x0fsequence_number\x18\x01 \x01(\r\x12.\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x1d.innpv.protocol.ProtocolErrorH\x00\x12\x38\n\ninitialize\x18\x03 \x01(\x0b\x32\".innpv.protocol.InitializeResponseH\x00\x12\x37\n\x0e\x61nalysis_error\x18\x05 \x01(\x0b\x32\x1d.innpv.protocol.AnalysisErrorH\x00\x12\x38\n\nthroughput\x18\x06 \x01(\x0b\x32\".innpv.protocol.ThroughputResponseH\x00\x12\x36\n\tbreakdown\x18\x08 \x01(\x0b\x32!.innpv.protocol.BreakdownResponseH\x00\x12\x32\n\x07habitat\x18\t \x01(\x0b\x32\x1f.innpv.protocol.HabitatResponseH\x00\x12\x30\n\x06\x65nergy\x18\n \x01(\x0b\x32\x1e.innpv.protocol.EnergyResponseH\x00\x42\t\n\x07payloadJ\x04\x08\x04\x10\x05J\x04\x08\x07\x10\x08R\x0cmemory_usageR\x08run_time\"B\n\x17HabitatDevicePrediction\x12\x13\n\x0b\x64\x65vice_name\x18\x01 \x01(\t\x12\x12\n\nruntime_ms\x18\x02 \x01(\x02\"O\n\x0fHabitatResponse\x12<\n\x0bpredictions\x18\x01 \x03(\x0b\x32\'.innpv.protocol.HabitatDevicePrediction\"\xba\x01\n\x0e\x45nergyResponse\x12\x19\n\x11total_consumption\x18\x01 \x01(\x02\x12>\n\ncomponents\x18\x02 \x03(\x0b\x32*.innpv.protocol.EnergyConsumptionComponent\x12\x12\n\nbatch_size\x18\x03 \x01(\x05\x12\x39\n\x11past_measurements\x18\x04 \x03(\x0b\x32\x1e.innpv.protocol.EnergyResponse\"\x80\x01\n\x1a\x45nergyConsumptionComponent\x12\x46\n\x0e\x63omponent_type\x18\x01 \x01(\x0e\x32..innpv.protocol.EnergyConsumptionComponentType\x12\x1a\n\x12\x63onsumption_joules\x18\x02 \x01(\x02\"\x8c\x01\n\x12InitializeResponse\x12\x1b\n\x13server_project_root\x18\x01 \x01(\t\x12)\n\x0b\x65ntry_point\x18\x02 \x01(\x0b\x32\x14.innpv.protocol.Path\x12.\n\x08hardware\x18\x03 \x01(\x0b\x32\x1c.innpv.protocol.HardwareInfo\"[\n\rAnalysisError\x12\x15\n\rerror_message\x18\x01 \x01(\t\x12\x33\n\x0c\x66ile_context\x18\x02 \x01(\x0b\x32\x1d.innpv.protocol.FileReference\"\xa1\x02\n\x12ThroughputResponse\x12\x1a\n\x12samples_per_second\x18\x01 \x01(\x02\x12(\n predicted_max_samples_per_second\x18\x02 \x01(\x02\x12\x30\n\x0brun_time_ms\x18\x03 \x01(\x0b\x32\x1b.innpv.protocol.LinearModel\x12\x35\n\x10peak_usage_bytes\x18\x04 \x01(\x0b\x32\x1b.innpv.protocol.LinearModel\x12\x39\n\x12\x62\x61tch_size_context\x18\x05 \x01(\x0b\x32\x1d.innpv.protocol.FileReference\x12!\n\x19\x63\x61n_manipulate_batch_size\x18\x06 \x01(\x08\"\xea\x01\n\x11\x42reakdownResponse\x12\x18\n\x10peak_usage_bytes\x18\x01 \x01(\x04\x12\x1d\n\x15memory_capacity_bytes\x18\x02 \x01(\x04\x12\x1d\n\x15iteration_run_time_ms\x18\x03 \x01(\x02\x12\x12\n\nbatch_size\x18\x06 \x01(\r\x12\x35\n\x0eoperation_tree\x18\x04 \x03(\x0b\x32\x1d.innpv.protocol.BreakdownNode\x12\x32\n\x0bweight_tree\x18\x05 \x03(\x0b\x32\x1d.innpv.protocol.BreakdownNode\"\xca\x01\n\rProtocolError\x12;\n\nerror_code\x18\x01 \x01(\x0e\x32\'.innpv.protocol.ProtocolError.ErrorCode\"|\n\tErrorCode\x12\x0b\n\x07UNKNOWN\x10\x00\x12 \n\x1cUNSUPPORTED_PROTOCOL_VERSION\x10\x01\x12\x1c\n\x18UNINITIALIZED_CONNECTION\x10\x02\x12\"\n\x1e\x41LREADY_INITIALIZED_CONNECTION\x10\x03\"\x1a\n\x04Path\x12\x12\n\ncomponents\x18\x01 \x03(\t\"M\n\rFileReference\x12\'\n\tfile_path\x18\x01 \x01(\x0b\x32\x14.innpv.protocol.Path\x12\x13\n\x0bline_number\x18\x02 \x01(\r\"\xce\x01\n\rBreakdownNode\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cnum_children\x18\x02 \x01(\r\x12/\n\x08\x63ontexts\x18\x03 \x03(\x0b\x32\x1d.innpv.protocol.FileReference\x12\x32\n\toperation\x18\x04 \x01(\x0b\x32\x1d.innpv.protocol.OperationDataH\x00\x12,\n\x06weight\x18\x05 \x01(\x0b\x32\x1a.innpv.protocol.WeightDataH\x00\x42\x06\n\x04\x64\x61ta\"{\n\x0b\x43ontextInfo\x12.\n\x07\x63ontext\x18\x01 \x01(\x0b\x32\x1d.innpv.protocol.FileReference\x12\x13\n\x0brun_time_ms\x18\x02 \x01(\x02\x12\x12\n\nsize_bytes\x18\x03 \x01(\x04\x12\x13\n\x0binvocations\x18\x04 \x01(\r\"\x83\x01\n\rOperationData\x12\x12\n\nforward_ms\x18\x01 \x01(\x02\x12\x13\n\x0b\x62\x61\x63kward_ms\x18\x02 \x01(\x02\x12\x12\n\nsize_bytes\x18\x03 \x01(\x04\x12\x35\n\x10\x63ontext_info_map\x18\x04 \x03(\x0b\x32\x1b.innpv.protocol.ContextInfo\"9\n\nWeightData\x12\x12\n\nsize_bytes\x18\x01 \x01(\x04\x12\x17\n\x0fgrad_size_bytes\x18\x02 \x01(\x04\"*\n\x0bLinearModel\x12\r\n\x05slope\x18\x01 \x01(\x01\x12\x0c\n\x04\x62ias\x18\x02 \x01(\x01\":\n\x0cHardwareInfo\x12\x10\n\x08hostname\x18\x01 \x01(\t\x12\n\n\x02os\x18\x02 \x01(\t\x12\x0c\n\x04gpus\x18\x03 \x03(\t\"\x1b\n\x13MemoryUsageResponseJ\x04\x08\x01\x10\x65\"\x17\n\x0fRunTimeResponseJ\x04\x08\x01\x10\x65\"\x17\n\x0f\x41\x63tivationEntryJ\x04\x08\x01\x10\x65\"\x13\n\x0bWeightEntryJ\x04\x08\x01\x10\x65\"\x14\n\x0cRunTimeEntryJ\x04\x08\x01\x10\x65*`\n\x1e\x45nergyConsumptionComponentType\x12\x16\n\x12\x45NERGY_UNSPECIFIED\x10\x00\x12\x13\n\x0f\x45NERGY_CPU_DRAM\x10\x01\x12\x11\n\rENERGY_NVIDIA\x10\x02\x62\x06proto3'
+  serialized_pb=b'\n\x0binnpv.proto\x12\x0einnpv.protocol\"\xcf\x01\n\nFromClient\x12\x17\n\x0fsequence_number\x18\x01 \x01(\r\x12\x37\n\ninitialize\x18\x02 \x01(\x0b\x32!.innpv.protocol.InitializeRequestH\x00\x12\x33\n\x08\x61nalysis\x18\x03 \x01(\x0b\x32\x1f.innpv.protocol.AnalysisRequestH\x00\x12/\n\x07generic\x18\x04 \x01(\x0b\x32\x1c.innpv.protocol.GenericEventH\x00\x42\t\n\x07payload\">\n\x0cGenericEvent\x12\x12\n\nevent_type\x18\x01 \x01(\t\x12\x1a\n\x12optional_arguments\x18\x02 \x01(\t\"X\n\x11InitializeRequest\x12\x18\n\x10protocol_version\x18\x01 \x01(\r\x12\x14\n\x0cproject_root\x18\x02 \x01(\t\x12\x13\n\x0b\x65ntry_point\x18\x03 \x01(\t\"(\n\x0f\x41nalysisRequest\x12\x15\n\rmock_response\x18\x01 \x01(\x08\"\xcf\x03\n\nFromServer\x12\x17\n\x0fsequence_number\x18\x01 \x01(\r\x12.\n\x05\x65rror\x18\x02 \x01(\x0b\x32\x1d.innpv.protocol.ProtocolErrorH\x00\x12\x38\n\ninitialize\x18\x03 \x01(\x0b\x32\".innpv.protocol.InitializeResponseH\x00\x12\x37\n\x0e\x61nalysis_error\x18\x05 \x01(\x0b\x32\x1d.innpv.protocol.AnalysisErrorH\x00\x12\x38\n\nthroughput\x18\x06 \x01(\x0b\x32\".innpv.protocol.ThroughputResponseH\x00\x12\x36\n\tbreakdown\x18\x08 \x01(\x0b\x32!.innpv.protocol.BreakdownResponseH\x00\x12\x32\n\x07habitat\x18\t \x01(\x0b\x32\x1f.innpv.protocol.HabitatResponseH\x00\x12\x30\n\x06\x65nergy\x18\n \x01(\x0b\x32\x1e.innpv.protocol.EnergyResponseH\x00\x42\t\n\x07payloadJ\x04\x08\x04\x10\x05J\x04\x08\x07\x10\x08R\x0cmemory_usageR\x08run_time\"B\n\x17HabitatDevicePrediction\x12\x13\n\x0b\x64\x65vice_name\x18\x01 \x01(\t\x12\x12\n\nruntime_ms\x18\x02 \x01(\x02\"\x86\x01\n\x0fHabitatResponse\x12<\n\x0bpredictions\x18\x01 \x03(\x0b\x32\'.innpv.protocol.HabitatDevicePrediction\x12\x35\n\x0e\x61nalysis_error\x18\x02 \x01(\x0b\x32\x1d.innpv.protocol.AnalysisError\"\xf1\x01\n\x0e\x45nergyResponse\x12\x19\n\x11total_consumption\x18\x01 \x01(\x02\x12>\n\ncomponents\x18\x02 \x03(\x0b\x32*.innpv.protocol.EnergyConsumptionComponent\x12\x12\n\nbatch_size\x18\x03 \x01(\x05\x12\x39\n\x11past_measurements\x18\x04 \x03(\x0b\x32\x1e.innpv.protocol.EnergyResponse\x12\x35\n\x0e\x61nalysis_error\x18\x05 \x01(\x0b\x32\x1d.innpv.protocol.AnalysisError\"\x80\x01\n\x1a\x45nergyConsumptionComponent\x12\x46\n\x0e\x63omponent_type\x18\x01 \x01(\x0e\x32..innpv.protocol.EnergyConsumptionComponentType\x12\x1a\n\x12\x63onsumption_joules\x18\x02 \x01(\x02\"\x8c\x01\n\x12InitializeResponse\x12\x1b\n\x13server_project_root\x18\x01 \x01(\t\x12)\n\x0b\x65ntry_point\x18\x02 \x01(\x0b\x32\x14.innpv.protocol.Path\x12.\n\x08hardware\x18\x03 \x01(\x0b\x32\x1c.innpv.protocol.HardwareInfo\"[\n\rAnalysisError\x12\x15\n\rerror_message\x18\x01 \x01(\t\x12\x33\n\x0c\x66ile_context\x18\x02 \x01(\x0b\x32\x1d.innpv.protocol.FileReference\"\xa1\x02\n\x12ThroughputResponse\x12\x1a\n\x12samples_per_second\x18\x01 \x01(\x02\x12(\n predicted_max_samples_per_second\x18\x02 \x01(\x02\x12\x30\n\x0brun_time_ms\x18\x03 \x01(\x0b\x32\x1b.innpv.protocol.LinearModel\x12\x35\n\x10peak_usage_bytes\x18\x04 \x01(\x0b\x32\x1b.innpv.protocol.LinearModel\x12\x39\n\x12\x62\x61tch_size_context\x18\x05 \x01(\x0b\x32\x1d.innpv.protocol.FileReference\x12!\n\x19\x63\x61n_manipulate_batch_size\x18\x06 \x01(\x08\"\xea\x01\n\x11\x42reakdownResponse\x12\x18\n\x10peak_usage_bytes\x18\x01 \x01(\x04\x12\x1d\n\x15memory_capacity_bytes\x18\x02 \x01(\x04\x12\x1d\n\x15iteration_run_time_ms\x18\x03 \x01(\x02\x12\x12\n\nbatch_size\x18\x06 \x01(\r\x12\x35\n\x0eoperation_tree\x18\x04 \x03(\x0b\x32\x1d.innpv.protocol.BreakdownNode\x12\x32\n\x0bweight_tree\x18\x05 \x03(\x0b\x32\x1d.innpv.protocol.BreakdownNode\"\xca\x01\n\rProtocolError\x12;\n\nerror_code\x18\x01 \x01(\x0e\x32\'.innpv.protocol.ProtocolError.ErrorCode\"|\n\tErrorCode\x12\x0b\n\x07UNKNOWN\x10\x00\x12 \n\x1cUNSUPPORTED_PROTOCOL_VERSION\x10\x01\x12\x1c\n\x18UNINITIALIZED_CONNECTION\x10\x02\x12\"\n\x1e\x41LREADY_INITIALIZED_CONNECTION\x10\x03\"\x1a\n\x04Path\x12\x12\n\ncomponents\x18\x01 \x03(\t\"M\n\rFileReference\x12\'\n\tfile_path\x18\x01 \x01(\x0b\x32\x14.innpv.protocol.Path\x12\x13\n\x0bline_number\x18\x02 \x01(\r\"\xce\x01\n\rBreakdownNode\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x14\n\x0cnum_children\x18\x02 \x01(\r\x12/\n\x08\x63ontexts\x18\x03 \x03(\x0b\x32\x1d.innpv.protocol.FileReference\x12\x32\n\toperation\x18\x04 \x01(\x0b\x32\x1d.innpv.protocol.OperationDataH\x00\x12,\n\x06weight\x18\x05 \x01(\x0b\x32\x1a.innpv.protocol.WeightDataH\x00\x42\x06\n\x04\x64\x61ta\"{\n\x0b\x43ontextInfo\x12.\n\x07\x63ontext\x18\x01 \x01(\x0b\x32\x1d.innpv.protocol.FileReference\x12\x13\n\x0brun_time_ms\x18\x02 \x01(\x02\x12\x12\n\nsize_bytes\x18\x03 \x01(\x04\x12\x13\n\x0binvocations\x18\x04 \x01(\r\"\x83\x01\n\rOperationData\x12\x12\n\nforward_ms\x18\x01 \x01(\x02\x12\x13\n\x0b\x62\x61\x63kward_ms\x18\x02 \x01(\x02\x12\x12\n\nsize_bytes\x18\x03 \x01(\x04\x12\x35\n\x10\x63ontext_info_map\x18\x04 \x03(\x0b\x32\x1b.innpv.protocol.ContextInfo\"9\n\nWeightData\x12\x12\n\nsize_bytes\x18\x01 \x01(\x04\x12\x17\n\x0fgrad_size_bytes\x18\x02 \x01(\x04\"*\n\x0bLinearModel\x12\r\n\x05slope\x18\x01 \x01(\x01\x12\x0c\n\x04\x62ias\x18\x02 \x01(\x01\":\n\x0cHardwareInfo\x12\x10\n\x08hostname\x18\x01 \x01(\t\x12\n\n\x02os\x18\x02 \x01(\t\x12\x0c\n\x04gpus\x18\x03 \x03(\t\"\x1b\n\x13MemoryUsageResponseJ\x04\x08\x01\x10\x65\"\x17\n\x0fRunTimeResponseJ\x04\x08\x01\x10\x65\"\x17\n\x0f\x41\x63tivationEntryJ\x04\x08\x01\x10\x65\"\x13\n\x0bWeightEntryJ\x04\x08\x01\x10\x65\"\x14\n\x0cRunTimeEntryJ\x04\x08\x01\x10\x65*`\n\x1e\x45nergyConsumptionComponentType\x12\x16\n\x12\x45NERGY_UNSPECIFIED\x10\x00\x12\x13\n\x0f\x45NERGY_CPU_DRAM\x10\x01\x12\x11\n\rENERGY_NVIDIA\x10\x02\x62\x06proto3'
 )
 
 _ENERGYCONSUMPTIONCOMPONENTTYPE = _descriptor.EnumDescriptor(
@@ -48,8 +48,8 @@
   ],
   containing_type=None,
   serialized_options=None,
-  serialized_start=3202,
-  serialized_end=3298,
+  serialized_start=3313,
+  serialized_end=3409,
 )
 _sym_db.RegisterEnumDescriptor(_ENERGYCONSUMPTIONCOMPONENTTYPE)
 
@@ -89,8 +89,8 @@
   ],
   containing_type=None,
   serialized_options=None,
-  serialized_start=2216,
-  serialized_end=2340,
+  serialized_start=2327,
+  serialized_end=2451,
 )
 _sym_db.RegisterEnumDescriptor(_PROTOCOLERROR_ERRORCODE)
 
@@ -410,6 +410,13 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='analysis_error', full_name='innpv.protocol.HabitatResponse.analysis_error', index=1,
+      number=2, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
   ],
   extensions=[
   ],
@@ -422,8 +429,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=971,
-  serialized_end=1050,
+  serialized_start=972,
+  serialized_end=1106,
 )
 
 
@@ -463,6 +470,13 @@
       message_type=None, enum_type=None, containing_type=None,
       is_extension=False, extension_scope=None,
       serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
+    _descriptor.FieldDescriptor(
+      name='analysis_error', full_name='innpv.protocol.EnergyResponse.analysis_error', index=4,
+      number=5, type=11, cpp_type=10, label=1,
+      has_default_value=False, default_value=None,
+      message_type=None, enum_type=None, containing_type=None,
+      is_extension=False, extension_scope=None,
+      serialized_options=None, file=DESCRIPTOR,  create_key=_descriptor._internal_create_key),
   ],
   extensions=[
   ],
@@ -475,8 +489,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1053,
-  serialized_end=1239,
+  serialized_start=1109,
+  serialized_end=1350,
 )
 
 
@@ -514,8 +528,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1242,
-  serialized_end=1370,
+  serialized_start=1353,
+  serialized_end=1481,
 )
 
 
@@ -560,8 +574,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1373,
-  serialized_end=1513,
+  serialized_start=1484,
+  serialized_end=1624,
 )
 
 
@@ -599,8 +613,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1515,
-  serialized_end=1606,
+  serialized_start=1626,
+  serialized_end=1717,
 )
 
 
@@ -666,8 +680,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1609,
-  serialized_end=1898,
+  serialized_start=1720,
+  serialized_end=2009,
 )
 
 
@@ -733,8 +747,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=1901,
-  serialized_end=2135,
+  serialized_start=2012,
+  serialized_end=2246,
 )
 
 
@@ -766,8 +780,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=2138,
-  serialized_end=2340,
+  serialized_start=2249,
+  serialized_end=2451,
 )
 
 
@@ -798,8 +812,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=2342,
-  serialized_end=2368,
+  serialized_start=2453,
+  serialized_end=2479,
 )
 
 
@@ -837,8 +851,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=2370,
-  serialized_end=2447,
+  serialized_start=2481,
+  serialized_end=2558,
 )
 
 
@@ -902,8 +916,8 @@
       create_key=_descriptor._internal_create_key,
     fields=[]),
   ],
-  serialized_start=2450,
-  serialized_end=2656,
+  serialized_start=2561,
+  serialized_end=2767,
 )
 
 
@@ -955,8 +969,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=2658,
-  serialized_end=2781,
+  serialized_start=2769,
+  serialized_end=2892,
 )
 
 
@@ -1008,8 +1022,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=2784,
-  serialized_end=2915,
+  serialized_start=2895,
+  serialized_end=3026,
 )
 
 
@@ -1047,8 +1061,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=2917,
-  serialized_end=2974,
+  serialized_start=3028,
+  serialized_end=3085,
 )
 
 
@@ -1086,8 +1100,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=2976,
-  serialized_end=3018,
+  serialized_start=3087,
+  serialized_end=3129,
 )
 
 
@@ -1132,8 +1146,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=3020,
-  serialized_end=3078,
+  serialized_start=3131,
+  serialized_end=3189,
 )
 
 
@@ -1157,8 +1171,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=3080,
-  serialized_end=3107,
+  serialized_start=3191,
+  serialized_end=3218,
 )
 
 
@@ -1182,8 +1196,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=3109,
-  serialized_end=3132,
+  serialized_start=3220,
+  serialized_end=3243,
 )
 
 
@@ -1207,8 +1221,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=3134,
-  serialized_end=3157,
+  serialized_start=3245,
+  serialized_end=3268,
 )
 
 
@@ -1232,8 +1246,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=3159,
-  serialized_end=3178,
+  serialized_start=3270,
+  serialized_end=3289,
 )
 
 
@@ -1257,8 +1271,8 @@
   extension_ranges=[],
   oneofs=[
   ],
-  serialized_start=3180,
-  serialized_end=3200,
+  serialized_start=3291,
+  serialized_end=3311,
 )
 
 _FROMCLIENT.fields_by_name['initialize'].message_type = _INITIALIZEREQUEST
@@ -1302,8 +1316,10 @@
   _FROMSERVER.fields_by_name['energy'])
 _FROMSERVER.fields_by_name['energy'].containing_oneof = _FROMSERVER.oneofs_by_name['payload']
 _HABITATRESPONSE.fields_by_name['predictions'].message_type = _HABITATDEVICEPREDICTION
+_HABITATRESPONSE.fields_by_name['analysis_error'].message_type = _ANALYSISERROR
 _ENERGYRESPONSE.fields_by_name['components'].message_type = _ENERGYCONSUMPTIONCOMPONENT
 _ENERGYRESPONSE.fields_by_name['past_measurements'].message_type = _ENERGYRESPONSE
+_ENERGYRESPONSE.fields_by_name['analysis_error'].message_type = _ANALYSISERROR
 _ENERGYCONSUMPTIONCOMPONENT.fields_by_name['component_type'].enum_type = _ENERGYCONSUMPTIONCOMPONENTTYPE
 _INITIALIZERESPONSE.fields_by_name['entry_point'].message_type = _PATH
 _INITIALIZERESPONSE.fields_by_name['hardware'].message_type = _HARDWAREINFO
diff --git a/deepview_profile/utils.py b/deepview_profile/utils.py
new file mode 100644
index 0000000..4f43bc6
--- /dev/null
+++ b/deepview_profile/utils.py
@@ -0,0 +1,10 @@
+import torch 
+import logging
+import gc
+
+logger = logging.getLogger(__name__)
+
+def release_memory():
+    logger.debug("Emptying cache")
+    gc.collect()
+    torch.cuda.empty_cache()
\ No newline at end of file
diff --git a/protocol/innpv.proto b/protocol/innpv.proto
index ed5b73c..c74767b 100644
--- a/protocol/innpv.proto
+++ b/protocol/innpv.proto
@@ -96,6 +96,7 @@ message HabitatDevicePrediction {
 
 message HabitatResponse {
   repeated HabitatDevicePrediction predictions = 1;
+  AnalysisError analysis_error = 2;
 }
 
 // Energy messages
@@ -108,6 +109,7 @@ message EnergyResponse {
 
   // A list of past energy measurements
   repeated EnergyResponse past_measurements = 4;
+  AnalysisError analysis_error = 5;
 }
 
 // Reports the energy consumption of one system component (e.g. CPU+DRAM or GPU)