InvalidArgumentError: device CUDA:0 not supported by XLA service while setting up XLA_GPU_JIT device number 0 #423

emtropyml · 2019-11-07T10:43:43Z

Inference: 0%| | 0/2 [00:04<?, ?it/s]

InvalidArgumentError Traceback (most recent call last)
in
----> 1 calculate_absa(pos_review, neg_review)

in calculate_absa(pos_review, neg_review)
16
17 def calculate_absa(pos_review, neg_review):
---> 18 comp = model.predict([pos_review, neg_review], threshold=0.5)
19 pos_comp = comp[0]
20 neg_comp = comp[1]

~/.local/lib/python3.5/site-packages/finetune/target_models/multi_label_classifier.py in predict(self, X, threshold)
49 self.config._threshold = threshold or self.config.multi_label_threshold
50 all_labels = []
---> 51 for _, start_of_doc, end_of_doc, _, proba in self.process_long_sequence(X):
52 if start_of_doc:
53 # if this is the first chunk in a document, start accumulating from scratch

~/.local/lib/python3.5/site-packages/finetune/base.py in process_long_sequence(self, X)
810
811 labels, batch_probas = [], []
--> 812 for pred in self._inference(X, predict_keys=[PredictMode.PROBAS, PredictMode.NORMAL], n_examples=len(flat_array_encoded)):
813 normal_pred = pred[PredictMode.NORMAL]
814 if not hasattr(self, 'multi_label'):

~/.local/lib/python3.5/site-packages/finetune/base.py in _inference(self, Xs, predict_keys, n_examples)
425 if self._cached_predict:
426 return self._cached_inference(
--> 427 Xs=Xs, predict_keys=predict_keys, n_examples=n_examples
428 )
429 else:

~/.local/lib/python3.5/site-packages/finetune/base.py in _cached_inference(self, Xs, predict_keys, n_examples)
409 predictions = [None] * n
410 for i in tqdm.tqdm(range(n), total=n, desc="Inference"):
--> 411 y = next(self._predictions)
412 try:
413 y = y[predict_keys[0]] if len(predict_keys) == 1 else y

~/.local/lib/python3.5/site-packages/tensorflow_estimator/python/estimator/estimator.py in predict(self, input_fn, predict_keys, hooks, checkpoint_path, yield_single_examples)
633 scaffold=estimator_spec.scaffold,
634 config=self._session_config),
--> 635 hooks=all_hooks) as mon_sess:
636 while not mon_sess.should_stop():
637 preds_evaluated = mon_sess.run(predictions)

~/.local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in init(self, session_creator, hooks, stop_grace_period_secs)
1005 hooks,
1006 should_recover=True,
-> 1007 stop_grace_period_secs=stop_grace_period_secs)
1008
1009

~/.local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in init(self, session_creator, hooks, should_recover, stop_grace_period_secs)
723 stop_grace_period_secs=stop_grace_period_secs)
724 if should_recover:
--> 725 self._sess = _RecoverableSession(self._coordinated_creator)
726 else:
727 self._sess = self._coordinated_creator.create_session()

~/.local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in init(self, sess_creator)
1198 """
1199 self._sess_creator = sess_creator
-> 1200 _WrappedSession.init(self, self._create_session())
1201
1202 def _create_session(self):

~/.local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in _create_session(self)
1203 while True:
1204 try:
-> 1205 return self._sess_creator.create_session()
1206 except _PREEMPTION_ERRORS as e:
1207 logging.info(

~/.local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in create_session(self)
869 """Creates a coordinated session."""
870 # Keep the tf_sess for unit testing.
--> 871 self.tf_sess = self._session_creator.create_session()
872 # We don't want coordinator to suppress any exception.
873 self.coord = coordinator.Coordinator(clean_stop_exception_types=[])

~/.local/lib/python3.5/site-packages/tensorflow/python/training/monitored_session.py in create_session(self)
645 init_op=self._scaffold.init_op,
646 init_feed_dict=self._scaffold.init_feed_dict,
--> 647 init_fn=self._scaffold.init_fn)
648
649

~/.local/lib/python3.5/site-packages/tensorflow/python/training/session_manager.py in prepare_session(self, master, init_op, saver, checkpoint_dir, checkpoint_filename_with_path, wait_for_checkpoint, max_wait_secs, config, init_feed_dict, init_fn)
288 wait_for_checkpoint=wait_for_checkpoint,
289 max_wait_secs=max_wait_secs,
--> 290 config=config)
291 if not is_loaded_from_checkpoint:
292 if init_op is None and not init_fn and self._local_init_op is None:

~/.local/lib/python3.5/site-packages/tensorflow/python/training/session_manager.py in _restore_checkpoint(self, master, saver, checkpoint_dir, checkpoint_filename_with_path, wait_for_checkpoint, max_wait_secs, config)
192 strategy.extended._experimental_initialize_system() # pylint: disable=protected-access
193
--> 194 sess = session.Session(self._target, graph=self._graph, config=config)
195 if checkpoint_dir and checkpoint_filename_with_path:
196 raise ValueError("Can not provide both checkpoint_dir and "

~/.local/lib/python3.5/site-packages/tensorflow/python/client/session.py in init(self, target, graph, config)
1568
1569 """
-> 1570 super(Session, self).init(target, graph, config=config)
1571 # NOTE(mrry): Create these on first __enter__ to avoid a reference cycle.
1572 self._default_graph_context_manager = None

~/.local/lib/python3.5/site-packages/tensorflow/python/client/session.py in init(self, target, graph, config)
691 try:
692 # pylint: disable=protected-access
--> 693 self._session = tf_session.TF_NewSessionRef(self._graph._c_graph, opts)
694 # pylint: enable=protected-access
695 finally:

InvalidArgumentError: device CUDA:0 not supported by XLA service
while setting up XLA_GPU_JIT device number 0

The text was updated successfully, but these errors were encountered:

benleetownsend · 2019-11-07T12:54:47Z

Can you send over the output of pip freeze | grep "tensorflow\|finetune" and provide a minimum reproducible example if possible. We suspect this is a GPU memory error in disguise, but if you are able to reproduce this then there is probably more too it.

emtropyml · 2019-11-07T13:09:53Z

finetune==0.8.4
mesh-tensorflow==0.0.5
tensorflow==1.14.0
tensorflow-datasets==1.0.2
tensorflow-estimator==1.14.0
tensorflow-gpu==1.14.0
tensorflow-hub==0.5.0
tensorflow-metadata==0.13.0
tensorflow-probability==0.7.0rc0
tensorflow-serving-api-gpu==1.14.0

The code just loads a 'DistilBERT.model' file and then executes model.predict() function. (It was working fine with 'GPT2.model' file and Finetune 0.8.3 previously)

madisonmay · 2019-11-07T13:56:41Z

This happens consistently? And you can confirm your GPU isn't running something else when you kick off the script (by inspecting output of nvidia-smi)?

emtropyml · 2019-11-07T14:26:45Z

It was indeed a GPU memory error. Resolved now. Thanks!

ShangWeiKuo · 2023-05-08T09:32:33Z

Excuse me @emtropyml
How did you do to resolve the InvalidArgumentError problem ?

emtropyml closed this as completed Nov 7, 2019

peisun1115 mentioned this issue Apr 7, 2020

Compile Error? waymo-research/waymo-open-dataset#123

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

InvalidArgumentError: device CUDA:0 not supported by XLA service while setting up XLA_GPU_JIT device number 0 #423

InvalidArgumentError: device CUDA:0 not supported by XLA service while setting up XLA_GPU_JIT device number 0 #423

emtropyml commented Nov 7, 2019

benleetownsend commented Nov 7, 2019 •

edited

emtropyml commented Nov 7, 2019 •

edited

madisonmay commented Nov 7, 2019

emtropyml commented Nov 7, 2019

ShangWeiKuo commented May 8, 2023

InvalidArgumentError: device CUDA:0 not supported by XLA service while setting up XLA_GPU_JIT device number 0 #423

InvalidArgumentError: device CUDA:0 not supported by XLA service while setting up XLA_GPU_JIT device number 0 #423

Comments

emtropyml commented Nov 7, 2019

benleetownsend commented Nov 7, 2019 • edited

emtropyml commented Nov 7, 2019 • edited

madisonmay commented Nov 7, 2019

emtropyml commented Nov 7, 2019

ShangWeiKuo commented May 8, 2023

benleetownsend commented Nov 7, 2019 •

edited

emtropyml commented Nov 7, 2019 •

edited