Skip to content
This repository has been archived by the owner on Mar 15, 2021. It is now read-only.

Commit

Permalink
better sample_set and dataframe support
Browse files Browse the repository at this point in the history
  • Loading branch information
brifordwylie committed Aug 20, 2014
1 parent 10cdcd1 commit 8f04c37
Showing 1 changed file with 16 additions and 2 deletions.
18 changes: 16 additions & 2 deletions workbench_apps/workbench_cli/workbench_shell.py
Expand Up @@ -106,6 +106,9 @@ def load_sample(self, file_path, tags=None):
print '\n%s %s%s %sLocked and Loaded...%s\n' % \
(self.beer, F.MAGENTA, md5[:6], F.YELLOW, F.RESET)

# Call meta on the sample (might want to think about this)
self.workbench.work_request('meta', md5)

# Store information about the sample into the sesssion
basename = os.path.basename(path)
self.session.filename = basename
Expand All @@ -128,6 +131,12 @@ def pull_df(self, md5):
except zerorpc.exceptions.RemoteError as e:
return repr_to_str_decorator.r_to_s(self._data_not_found)(e)

def flatten_tags(self, my_df):
"""Flatten(vectorize) the tags column in the dataframe"""
tags_df = my_df['tags'].str.join(sep='-').str.get_dummies(sep='-')
my_df['tags'] = [', '.join(tag_list) for tag_list in my_df['tags']]
return my_df.join(tags_df)

def search(self, tags='all'):
"""Wrapper for the Workbench search method
Args:
Expand All @@ -138,10 +147,10 @@ def search(self, tags='all'):

# Fixme: This needs to be improved to handle arbitrary predicates (MongoDB predicates)
if tags == 'all':
return [item['md5'] for item in self.workbench.list_samples()]
return self.workbench.generate_sample_set()
elif isinstance(tags, str):
tags = [tags]
return [item['md5'] for item in self.workbench.list_samples({'tags': {'$in': tags}})]
return self.workbench.generate_sample_set({'tags': {'$in': tags}})

def versions(self):
"""Announce Versions of CLI and Server
Expand Down Expand Up @@ -230,6 +239,10 @@ def _work_request(self, worker, md5=None):
elif not md5:
md5 = self.session.md5

# Is the md5 a sample_set?
if self.workbench.is_sample_set(md5):
return self.workbench.set_work_request(worker, md5)

# Make the work_request with worker and md5 args
try:
return self.workbench.work_request(worker, md5)
Expand Down Expand Up @@ -262,6 +275,7 @@ def _generate_command_dict(self):
'help': self._help,
'load_sample': self.load_sample,
'pull_df': self.pull_df,
'flatten_tags': self.flatten_tags,
'search': self.search,
'reconnect': lambda info=self.server_info: self._connect(info),
'version': self.versions,
Expand Down

0 comments on commit 8f04c37

Please sign in to comment.