Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/small updates jun28 #50

Merged
merged 2 commits into from
Jun 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion coconnect/cdm/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def __init__(self,**kwargs):
name = self.__class__.__name__
if 'name' in kwargs:
name = kwargs['name']

self.logger = Logger(self.__class__.__name__)
self.logger.info("CommonDataModel created")

Expand Down
5 changes: 3 additions & 2 deletions coconnect/cli/subcommands/display.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ def display():
@click.argument('fname')
@click.option('--drop-na',is_flag=True)
@click.option('--markdown',is_flag=True)
def dataframe(fname,drop_na,markdown):
df = pandas.read_csv(fname)
@click.option('--head',type=int,default=None)
def dataframe(fname,drop_na,markdown,head):
df = pandas.read_csv(fname,nrows=head)
if drop_na:
df = df.dropna(axis=1,how='all')
if markdown:
Expand Down
18 changes: 18 additions & 0 deletions coconnect/cli/subcommands/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,24 @@ def diff(file1,file2):
def make_class(name,rules):
data = tools.load_json(rules)
tools.extract.make_class(data,name)


@click.command(help="flattern a rules json file")
@click.argument("rules")
def flatten(rules):
data = tools.load_json(rules)
objects = data['cdm']
for destination_table,rule_set in objects.items():
#print (rule_set)
df = pd.DataFrame.from_records(rule_set).T
#print (df.iloc[1])
#print (df.iloc[1][1])
print (df.iloc[1])
print (df.iloc[1].apply(pd.Series))
print (df.iloc[1].apply(pd.Series)['term_mapping'].apply(pd.Series))

exit(0)


@click.command(help="List all the python classes there are available to run")
def list_classes():
Expand Down Expand Up @@ -189,3 +206,4 @@ def run(ctx,
map.add_command(remove_class,"remove")
map.add_command(run,"run")
map.add_command(diff,"diff")
map.add_command(flatten,"flatten")
34 changes: 25 additions & 9 deletions coconnect/tools/profiling.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,57 +8,73 @@

class Profiler:
def __init__(self,name=None,interval=0.1):

if name == None:
name = self.__class__.__name__
else:
name = f"{self.__class__.__name__}_{name}"

self.logger = Logger(self.__class__.__name__)


#retrieve the process id for the current run
self.pid = os.getpid()
#create a psutil instance to montior this
self.py = psutil.Process(self.pid)

#set the interval (seconds) of how often to check the cpu and memory
self.interval = interval
self.logger.info(f"tracking {self.pid} every {self.interval} seconds")
#count the number of cpus the computer running this process has
self.cpu_count = psutil.cpu_count()
self.logger.info(f"{self.cpu_count} cpus being used")

self.logger.info(f"{self.cepu_count} cpus available")
#initiate a threaded function
#that will run in a separate process and can monitor CPU/memory in the background
self.th = threading.Thread(target=self.track)

#init some global variables
self.tracking = []
self.init_time = time.time()
self._stop = False
self._df = None

def start(self):
#start the thread
self.logger.info("starting profiling")
self.th.start()

def stop(self):
#stop the thread
self._stop = True
self.th.join()
self.logger.info("finished profiling")

def get_df(self):
#build a little dataframe for cpu/memory v.s. time,
#if it has not been built already
if self._df is None:
self._df = pd.DataFrame(self.tracking)
return self._df

def summary(self):
#print the dataframe created for cpu/memory v.s. time
self.logger.info(self.get_df())

def track(self):
"""
Main function to profile CPU and memory usage
"""
#while the program has been told to profile the usage
while self._stop == False:

#print (self.py,self.py.status(),self.py.is_running())
#print (dir(self.py))

#from the current process, calculate the current memory usage (in GB)
memory = self.py.memory_info()[0]/2.**30
#also calculate the CPU % in use at this epoch in time
cpu = self.py.cpu_percent() / self.cpu_count
#calcuate the current time - time since the start of the process
current_time = time.time() - self.init_time
#log the data
info = {'time[s]':current_time,'memory[GB]':memory,'cpu[%]':cpu}
self.tracking.append(info)
#sleep the number of seconds requested
time.sleep(self.interval)

#once finished, call the summary function
self.summary()