HDRUK · calmacx · Jun 28, 2021 · Jun 28, 2021 · Jun 28, 2021
diff --git a/coconnect/cdm/model.py b/coconnect/cdm/model.py
@@ -28,7 +28,7 @@ def __init__(self,**kwargs):
         name = self.__class__.__name__
         if 'name' in kwargs:
             name = kwargs['name']
-
+            
         self.logger = Logger(self.__class__.__name__)
         self.logger.info("CommonDataModel created")
 

diff --git a/coconnect/cli/subcommands/display.py b/coconnect/cli/subcommands/display.py
@@ -11,8 +11,9 @@ def display():
 @click.argument('fname')
 @click.option('--drop-na',is_flag=True)
 @click.option('--markdown',is_flag=True)
-def dataframe(fname,drop_na,markdown):
-    df = pandas.read_csv(fname)
+@click.option('--head',type=int,default=None)
+def dataframe(fname,drop_na,markdown,head):
+    df = pandas.read_csv(fname,nrows=head)
     if drop_na:
         df = df.dropna(axis=1,how='all')
     if markdown:

diff --git a/coconnect/cli/subcommands/map.py b/coconnect/cli/subcommands/map.py
@@ -55,7 +55,24 @@ def diff(file1,file2):
 def make_class(name,rules):
     data = tools.load_json(rules)
     tools.extract.make_class(data,name)
+
+
+@click.command(help="flattern a rules json file")
+@click.argument("rules")
+def flatten(rules):
+    data = tools.load_json(rules)
+    objects = data['cdm']
+    for destination_table,rule_set in objects.items():
+        #print (rule_set)
+        df = pd.DataFrame.from_records(rule_set).T
+        #print (df.iloc[1])
+        #print (df.iloc[1][1])
+        print (df.iloc[1])
+        print (df.iloc[1].apply(pd.Series))
+        print (df.iloc[1].apply(pd.Series)['term_mapping'].apply(pd.Series))
 
+        exit(0)
+
 
 @click.command(help="List all the python classes there are available to run")
 def list_classes():
@@ -189,3 +206,4 @@ def run(ctx,
 map.add_command(remove_class,"remove")
 map.add_command(run,"run")
 map.add_command(diff,"diff")
+map.add_command(flatten,"flatten")
diff --git a/coconnect/tools/profiling.py b/coconnect/tools/profiling.py
@@ -8,57 +8,73 @@
 
 class Profiler:
     def __init__(self,name=None,interval=0.1):
-
+        
         if name == None:
             name = self.__class__.__name__
         else:
             name = f"{self.__class__.__name__}_{name}"
 
         self.logger = Logger(self.__class__.__name__)
-
+
+        #retrieve the process id for the current run
         self.pid = os.getpid()
+        #create a psutil instance to montior this
         self.py = psutil.Process(self.pid)
-
+        #set the interval (seconds) of how often to check the cpu and memory
         self.interval = interval
         self.logger.info(f"tracking {self.pid} every {self.interval} seconds")
+        #count the number of cpus the computer running this process has
         self.cpu_count = psutil.cpu_count()
-        self.logger.info(f"{self.cpu_count} cpus being used")
-
+        self.logger.info(f"{self.cepu_count} cpus available")
+        #initiate a threaded function
+        #that will run in a separate process and can monitor CPU/memory in the background
         self.th = threading.Thread(target=self.track)
 
+        #init some global variables
         self.tracking = []
         self.init_time = time.time()
         self._stop = False
         self._df = None
 
     def start(self):
+        #start the thread
         self.logger.info("starting profiling")
         self.th.start()
 
     def stop(self):
+        #stop the thread
         self._stop = True
         self.th.join()
         self.logger.info("finished profiling")
 
     def get_df(self):
+        #build a little dataframe for cpu/memory v.s. time,
+        #if it has not been built already
         if self._df is None:
             self._df = pd.DataFrame(self.tracking)
         return self._df
 
     def summary(self):
+        #print the dataframe created for cpu/memory v.s. time
         self.logger.info(self.get_df())
 
     def track(self):
+        """
+        Main function to profile CPU and memory usage
+        """
+        #while the program has been told to profile the usage
         while self._stop == False:
-
-            #print  (self.py,self.py.status(),self.py.is_running())
-            #print (dir(self.py))
-
+            #from the current process, calculate the current memory usage (in GB)
             memory = self.py.memory_info()[0]/2.**30
+            #also calculate the CPU % in use at this epoch in time
             cpu = self.py.cpu_percent() / self.cpu_count
+            #calcuate the current time - time since the start of the process
             current_time = time.time() - self.init_time
+            #log the data
             info = {'time[s]':current_time,'memory[GB]':memory,'cpu[%]':cpu}
             self.tracking.append(info)
+            #sleep the number of seconds requested
             time.sleep(self.interval)
 
+        #once finished, call the summary function
         self.summary()