Skip to content
This repository has been archived by the owner on Mar 15, 2021. It is now read-only.

Commit

Permalink
Merge pull request #380 from SuperCowPowers/cli_work
Browse files Browse the repository at this point in the history
Cli work
  • Loading branch information
brifordwylie committed Aug 22, 2014
2 parents 450fddd + a04b69e commit 3295ece
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 25 deletions.
3 changes: 2 additions & 1 deletion workbench/clients/short_md5s.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def run():
results = workbench.work_request('meta', md5[:6])
pprint.pprint(results)


import pytest
@pytest.mark.xfail
def test():
"""Executes short md5 test."""
run()
Expand Down
33 changes: 22 additions & 11 deletions workbench/server/data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def clean_for_storage(self, data):

def get_full_md5(self, partial_md5, collection):
"""Support partial/short md5s, return the full md5 with this method"""
# print 'Notice: Performing slow md5 search...'
print 'Notice: Performing slow md5 search...'
starts_with = '%s.*' % partial_md5
sample_info = self.database[collection].find_one({'md5': {'$regex' : starts_with}},{'md5':1})
return sample_info['md5'] if sample_info else None
Expand Down Expand Up @@ -307,6 +307,21 @@ def tag_match(self, tags=None):
cursor = self.database['tags'].find({'tags': {'$in': tags}}, {'_id':0, 'md5':1})
return [item['md5'] for item in cursor]

def tags_all(self):
"""List of the tags and md5s for all samples
Args:
None
Returns:
List of the tags and md5s for all samples
"""
if 'tags' not in self.database.collection_names():
print 'Warning: Searching on non-existance tags collection'
return None

cursor = self.database['tags'].find({}, {'_id':0, 'md5':1, 'tags':1})
return [item for item in cursor]

def store_work_results(self, results, collection, md5):
"""Store the output results of the worker.
Expand All @@ -331,8 +346,9 @@ def store_work_results(self, results, collection, md5):
try:
self.database[collection].update({'md5':md5}, self.clean_for_storage(results), True)
except pymongo.errors.OperationFailure:
self.database[collection].insert({'md5':md5}, self.clean_for_storage(results), True)
print 'Could not update exising object in capped collection, doing an insert...'
#self.database[collection].insert({'md5':md5}, self.clean_for_storage(results), True)
print 'Could not update exising object in capped collection, punting...'
print 'collection: %s md5:%s' % (collection, md5)

def get_work_results(self, collection, md5):
"""Get the results of the worker.
Expand All @@ -344,11 +360,6 @@ def get_work_results(self, collection, md5):
Returns:
Dictionary of the worker result.
"""

# Support 'short' md5s but don't waste performance if the full md5 is provided
if len(md5) < 32:
md5 = self.get_full_md5(md5, collection)

return self.database[collection].find_one({'md5':md5})

def all_sample_md5s(self, type_tag=None):
Expand Down Expand Up @@ -417,6 +428,7 @@ def periodic_ops(self):
all_c.remove('fs.chunks')
all_c.remove('fs.files')
all_c.remove('info')
all_c.remove('tags')
all_c.remove(self.sample_collection)
except ValueError:
print 'Catching a benign exception thats expected...'
Expand All @@ -438,9 +450,8 @@ def periodic_ops(self):
# Add required indexes for samples collection
self.database[self.sample_collection].create_index('import_time')

# If the tags collection exists create an index on tags
if 'tags' in all_c:
self.database['tags'].create_index('tags')
# Create an index on tags
self.database['tags'].create_index('tags')

# Helper functions
def to_unicode(self, s):
Expand Down
13 changes: 8 additions & 5 deletions workbench/server/workbench_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,10 @@ def get_tags(self, md5):
tag_data = self.data_store.get_work_results('tags', md5)
return tag_data['tags'] if tag_data else None

def get_all_tags(self):
"""Get tags for this sample"""
return self.data_store.tags_all()


#######################
# Index Methods
Expand Down Expand Up @@ -540,8 +544,7 @@ def store_sample_set(self, md5_list):

for md5 in md5_list:
if not self.has_sample(md5):
raise RuntimeError('Sample not found all items in sample_set\
must be in the datastore: %s (not found)' % (md5))
raise RuntimeError('%s: Not found! All items in sample_set must be in the datastore' % (md5))
set_md5 = hashlib.md5(str(md5_list)).hexdigest()
self._store_work_results({'md5_list':md5_list}, 'sample_set', set_md5)
return set_md5
Expand Down Expand Up @@ -608,7 +611,7 @@ def help(self, topic=None):
# so we'll catch the exception and push back an object that
# indicates we didn't find what they were asking for
try:
return self.work_request('help_cli', topic)['help_cli']['help']
return self.work_request('help_formatter', topic)['help_formatter']['help']
except WorkBench.DataNotFound as e:

# Okay this is a bit tricky we want to give the user a nice error
Expand Down Expand Up @@ -642,7 +645,7 @@ def _help_commands(self):
""" Help on all the available commands """
help = 'Workbench Commands:'
for command in self.list_all_commands():
full_help = self.work_request('help_cli', command)['help_cli']['help']
full_help = self.work_request('help_formatter', command)['help_formatter']['help']
compact_help = full_help.split('\n')[:2]
help += '\n\n%s' % '\n'.join(compact_help)
return help
Expand All @@ -651,7 +654,7 @@ def _help_workers(self):
""" Help on all the available workers """
help = 'Workbench Workers:'
for worker in self.list_all_workers():
full_help = self.work_request('help_cli', worker)['help_cli']['help']
full_help = self.work_request('help_formatter', worker)['help_formatter']['help']
compact_help = full_help.split('\n')[:4]
help += '\n\n%s' % '\n'.join(compact_help)
return help
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

''' HelpCLI worker '''
''' HelpFormatter worker '''

from colorama import Fore, Style

class HelpCLI(object):
class HelpFormatter(object):
''' This worker does CLI formatting and coloring for any help object '''
dependencies = ['help_base']

Expand All @@ -30,15 +30,15 @@ def execute(self, input_data):

# WTF: Alert on unknown type_tag and return a string of the input_data
else:
print 'Alert: help_cli worker received malformed object: %s' % str(input_data)
print 'Alert: help_formatter worker received malformed object: %s' % str(input_data)
output = '\n%s%s%s' % (Fore.RED, str(input_data), Fore.RESET)

# Return the formatted and colored help
return {'help': output}

# Unit test: Create the class, the proper input and run the execute() method for a test
def test():
''' help_cli.py: Unit test'''
''' help_formatter.py: Unit test'''

# This worker test requires a local server running
import zerorpc
Expand All @@ -51,7 +51,7 @@ def test():
input_data3 = workbench.work_request('help_base', 'store_sample')

# Execute the worker (unit test)
worker = HelpCLI()
worker = HelpFormatter()
output = worker.execute(input_data1)
print '\n<<< Unit Test >>>'
print output['help']
Expand All @@ -63,9 +63,9 @@ def test():
print output['help']

# Execute the worker (server test)
output = workbench.work_request('help_cli', 'meta')
output = workbench.work_request('help_formatter', 'meta')
print '\n<<< Server Test >>>'
print output['help_cli']['help']
print output['help_formatter']['help']

if __name__ == "__main__":
test()
20 changes: 19 additions & 1 deletion workbench_apps/workbench_cli/workbench_shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
import lz4
import inspect
import funcsigs
import operator
import matplotlib.pyplot as plt
plt.ion()
from colorama import Fore as F
import pprint

try:
import pandas as pd
Expand Down Expand Up @@ -122,6 +124,20 @@ def load_sample(self, file_path, tags=None):
self.ipshell.push({'md5': self.session.md5})
self.ipshell.push({'short_md5': self.session.short_md5})

# Dump out tag information
self.tag_info()

def tag_info(self):
tag_df = pd.DataFrame(self.workbench.get_all_tags())
tag_df = self.flatten_tags(tag_df)
del tag_df['md5']
del tag_df['tags']
tag_freq = tag_df.sum().to_dict()
tag_freq = sorted(tag_freq.iteritems(), key=operator.itemgetter(1), reverse=True)
print '\n%sSamples in Database%s' % (F.MAGENTA, F.RESET)
for (tag, count) in tag_freq:
print ' %s%s: %s%s%s' % (F.GREEN, tag, F.BLUE, count, F.RESET)

def pull_df(self, md5):
"""Wrapper for the Workbench get_dataframe method
Args:
Expand Down Expand Up @@ -168,8 +184,9 @@ def run(self):
# Announce versions
self.versions()

# Help
# Help and Sample/Tag info
print '\n%s' % self.workbench.help('cli')
self.tag_info()

# Now that we have the Workbench connection spun up, we register some stuff
# with the embedded IPython interpreter and than spin it up
Expand Down Expand Up @@ -277,6 +294,7 @@ def _generate_command_dict(self):
'load_sample': self.load_sample,
'pull_df': self.pull_df,
'flatten_tags': self.flatten_tags,
'tag_info': self.tag_info,
'search': self.search,
'reconnect': lambda info=self.server_info: self._connect(info),
'version': self.versions,
Expand Down

0 comments on commit 3295ece

Please sign in to comment.