Skip to content

Commit

Permalink
hive parameters use parameter file in Python client (#399)
Browse files Browse the repository at this point in the history
  • Loading branch information
irontablee committed Sep 28, 2016
1 parent 3355532 commit f9e9bf0
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 39 deletions.
30 changes: 23 additions & 7 deletions genie-client/src/main/python/pygenie/jobs/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,14 @@ def cmd_args(self):
elif self._script is not None:
self._add_dependency({'name': filename, 'data': self._script})

params_str = ' '.join([
"-d '{name}={value}'" \
.format(name=k,
value=unicode(v).replace("'", "''")) \
for k, v in self._parameters.items()
])
# put parameters into a parameter file and specify parameter file on command line
# this is to get around weird quoting issues in parameter values, etc
param_str = self._parameter_file
if param_str:
self._add_dependency({
'name': '_hive_parameters.txt',
'data': param_str
})

props_str = ' '.join([
'--hiveconf {name}={value}'.format(name=k, value=v) \
Expand All @@ -83,9 +85,23 @@ def cmd_args(self):
.format(prop_file=prop_file_str,
props=props_str,
filename=filename,
params=params_str) \
params='-i _hive_parameters.txt' if param_str else '') \
.strip()

@property
def _parameter_file(self):
"""Takes specified parameters and creates a string for the parameter file."""

param_file = ""

for name, value in self._parameters.items():
param_file = '{p}SET hivevar:{name}={value};\n' \
.format(p=param_file,
name=name,
value=unicode(value))

return param_file.strip()

def headers(self):
"""
Sets hive.cli.print.header so that if the hive query is outputing
Expand Down
4 changes: 2 additions & 2 deletions genie-client/src/main/python/pygenie/jobs/pig.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def cmd_args(self):
for p in self._parameter_files
])

# put parameters into a parameter file and specify paramter file on command line
# put parameters into a parameter file and specify parameter file on command line
# this is to get around weird quoting issues in parameter values, etc
param_str = self._parameter_file
if param_str:
Expand Down Expand Up @@ -108,7 +108,7 @@ def _parameter_file(self):
name=name,
value=unicode(value).replace('"', '\\"'))

return param_file
return param_file.strip()

@unicodify
@arg_list
Expand Down
2 changes: 1 addition & 1 deletion genie-client/src/main/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

setup(
name='nflx-genie-client',
version='3.0.44',
version='3.0.45',
author='Netflix Inc.',
author_email='genieoss@googlegroups.com',
keywords='genie hadoop cloud netflix client bigdata presto',
Expand Down
60 changes: 34 additions & 26 deletions genie-client/src/main/python/tests/job_tests/test_hivejob.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ def test_cmd_args_constructed_script_code(self):
.property('prop2', 'p2')

assert_equals(
job.cmd_args,
u" ".join([
u"--hiveconf hconf1=h1 --hiveconf prop1=p1 --hiveconf prop2=p2",
u"-d 'foo=fizz' -d 'bar=buzz'",
u"-i _hive_parameters.txt",
u"-f script.hive"
])
]),
job.cmd_args
)

@patch('pygenie.jobs.hive.is_file')
Expand All @@ -84,32 +84,35 @@ def test_cmd_args_constructed_script_file(self, is_file):
job.cmd_args,
u" ".join([
u"--hiveconf p2=v2 --hiveconf p1=v1",
u"-d 'hello=hi' -d 'goodbye=bye'",
u"-i _hive_parameters.txt",
u"-f test.hql"
])
)

def test_cmd_args_constructed_quotes(self):
"""Test HiveJob constructed cmd args with quotes."""

@patch.dict('os.environ', {'GENIE_BYPASS_HOME_CONFIG': '1'})
class TestingHiveJobParameters(unittest.TestCase):
"""Test HiveJob parameters."""

def test_parameter_file(self):
"""Test HiveJob parameters into file."""

job = pygenie.jobs.HiveJob() \
.script('foo') \
.parameter("spaces", "this has spaces") \
.parameter("single_quotes", "test' test'") \
.parameter("escaped_single_quotes", "Barney\\'s Adventure") \
.parameter("escaped_single_quotes", "Barney\\\'s Adventure") \
.parameter("unicode", "\xf3\xf3\xf3") \
.parameter("number", 8)

assert_equals(
job.cmd_args,
u" ".join([
u"-d 'escaped_single_quotes=Barney\\''s Adventure'",
u"-d 'spaces=this has spaces'",
u"-d 'single_quotes=test'' test'''",
u"-d 'unicode=\xf3\xf3\xf3'",
u"-d 'number=8'",
u"-f script.hive"
])
'\n'.join([
"SET hivevar:escaped_single_quotes=Barney\\\'s Adventure;",
"SET hivevar:spaces=this has spaces;",
"SET hivevar:single_quotes=test' test';",
"SET hivevar:unicode=\xf3\xf3\xf3;",
"SET hivevar:number=8;"
]),
job._parameter_file
)


Expand Down Expand Up @@ -238,13 +241,14 @@ def test_genie2_payload_adhoc_script(self, os_isfile, to_att):
u'attachments': [
{u'name': u'hive.file1', u'data': u'file contents'},
{u'name': u'hive.file2', u'data': u'file contents'},
{u'name': u'script.hive', u'data': u'SELECT * FROM DUAL'}
{u'name': u'script.hive', u'data': u'SELECT * FROM DUAL'},
{u'name': u'_hive_parameters.txt', u'data': u'SET hivevar:a=b;'}
],
u'clusterCriterias': [
{u'tags': [u'type:hive.cluster1']},
{u'tags': [u'type:hive']}
],
u'commandArgs': u'-d \'a=b\' -f script.hive',
u'commandArgs': u'-i _hive_parameters.txt -f script.hive',
u'commandCriteria': [u'type:hive.cmd'],
u'description': u'this job is to test hivejob adapter',
u'disableLogArchival': True,
Expand Down Expand Up @@ -296,13 +300,15 @@ def test_genie2_payload_file_script(self, presto_is_file, os_isfile, to_att):
u'attachments': [
{u'name': u'hive.file1', u'data': u'file contents'},
{u'name': u'hive.file2', u'data': u'file contents'},
{u'name': u'script.hql', u'data': u'file contents'}
{u'name': u'script.hql', u'data': u'file contents'},
{u'name': u'_hive_parameters.txt',
u'data': u'SET hivevar:a=1;\nSET hivevar:b=2;'}
],
u'clusterCriterias': [
{u'tags': [u'type:hive.cluster2']},
{u'tags': [u'type:hive']}
],
u'commandArgs': u'-d \'a=1\' -d \'b=2\' -f script.hql',
u'commandArgs': u'-i _hive_parameters.txt -f script.hql',
u'commandCriteria': [u'type:hive.cmd.2'],
u'description': u'this job is to test hivejob adapter',
u'disableLogArchival': True,
Expand Down Expand Up @@ -356,13 +362,14 @@ def test_genie3_payload_adhoc_script(self, os_isfile, file_open):
u'attachments': [
(u'hive.file1', u"open file '/hive.file1'"),
(u'hive.file2', u"open file '/hive.file2'"),
(u'script.hive', u'SELECT * FROM DUAL')
(u'script.hive', u'SELECT * FROM DUAL'),
(u'_hive_parameters.txt', u'SET hivevar:a=a;\nSET hivevar:b=b;')
],
u'clusterCriterias': [
{u'tags': [u'type:hive.cluster-1', u'type:hive.cluster-2']},
{u'tags': [u'type:hive']}
],
u'commandArgs': u'-i properties.conf -d \'a=a\' -d \'b=b\' -f script.hive',
u'commandArgs': u'-i properties.conf -i _hive_parameters.txt -f script.hive',
u'commandCriteria': [u'type:hive.cmd.1', u'type:hive.cmd.2'],
u'dependencies': [u'x://properties.conf'],
u'description': u'this job is to test hivejob adapter',
Expand All @@ -383,7 +390,7 @@ def test_genie3_payload_adhoc_script(self, os_isfile, file_open):
@patch('os.path.isfile')
@patch('pygenie.jobs.hive.is_file')
def test_genie3_payload_file_script(self, presto_is_file, os_isfile, file_open):
"""Test PrestoJob payload for Genie 3 (file script)."""
"""Test HiveJob payload for Genie 3 (file script)."""

os_isfile.return_value = True
presto_is_file.return_value = True
Expand Down Expand Up @@ -420,13 +427,14 @@ def test_genie3_payload_file_script(self, presto_is_file, os_isfile, file_open):
(u'hive.file1', u"open file '/hive.file1'"),
(u'hive.file2', u"open file '/hive.file2'"),
(u'properties.conf', u"open file '/properties.conf'"),
(u'script.hql', u"open file '/script.hql'")
(u'script.hql', u"open file '/script.hql'"),
(u'_hive_parameters.txt', u'SET hivevar:a=a;\nSET hivevar:b=b;')
],
u'clusterCriterias': [
{u'tags': [u'type:hive.cluster-1', u'type:hive.cluster-2']},
{u'tags': [u'type:hive']}
],
u'commandArgs': u'-i properties.conf -d \'a=a\' -d \'b=b\' -f script.hql',
u'commandArgs': u'-i properties.conf -i _hive_parameters.txt -f script.hql',
u'commandCriteria': [u'type:hive.cmd.1', u'type:hive.cmd.2'],
u'dependencies': [],
u'description': u'this job is to test hivejob adapter',
Expand Down
6 changes: 3 additions & 3 deletions genie-client/src/main/python/tests/job_tests/test_pigjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def test_basic(self):
spaces = "this has spaces"
double_quotes = "Something: \\"Episode 189\\""
unicode = "\u0147\u0147\u0147"
escaped_single_quotes = "Barney\\'s Adventure"
escaped_single_quotes = "Barney\\'s Adventure"\
"""

assert_equals(
Expand Down Expand Up @@ -263,7 +263,7 @@ def test_genie2_payload_adhoc_script(self, os_isfile, to_att):
{u'data': u'file contents', u'name': u'pig_param1.params'},
{u'data': u'file contents', u'name': u'pig_param2.params'},
{u'data': u'A = LOAD;', u'name': u'script.pig'},
{u'data': u'param2 = "2"\nparam1 = "1"\n', u'name': u'_pig_parameters.txt'}
{u'data': u'param2 = "2"\nparam1 = "1"', u'name': u'_pig_parameters.txt'}
],
u'clusterCriterias': [
{u'tags': [u'type:pig_cluster_1']},
Expand Down Expand Up @@ -391,7 +391,7 @@ def test_genie3_payload_adhoc_script(self, os_isfile, file_open):
(u'pig_param1.params', u"open file '/pig_param1.params'"),
(u'pig_param2.params', u"open file '/pig_param2.params'"),
(u'script.pig', u'A = LOAD;'),
(u'_pig_parameters.txt', u'param2 = "2"\nparam1 = "1"\n')
(u'_pig_parameters.txt', u'param2 = "2"\nparam1 = "1"')
],
u'clusterCriterias': [
{u'tags': [u'type:pig_cluster_1']},
Expand Down

0 comments on commit f9e9bf0

Please sign in to comment.