Skip to content

Commit

Permalink
Merge pull request #265 from JohnSnowLabs/release/532
Browse files Browse the repository at this point in the history
Release/532
  • Loading branch information
C-K-Loan committed May 21, 2024
2 parents 76161f0 + 8564e7a commit 506860e
Show file tree
Hide file tree
Showing 6 changed files with 24 additions and 27 deletions.
4 changes: 2 additions & 2 deletions nlu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '5.3.1'
__version__ = '5.3.2'


import nlu.utils.environment.env_utils as env_utils
Expand Down Expand Up @@ -325,7 +325,7 @@ def load_nlu_pipe_from_hdd(pipe_path, request) -> NLUPipeline:
If it is a component_list, load the component_list and return it.
If it is a singular model_anno_obj, load it to the correct AnnotatorClass and NLU component_to_resolve and then generate pipeline for it
"""
if is_running_in_databricks():
if is_running_in_databricks_runtime():
return load_nlu_pipe_from_hdd_in_databricks(pipe_path, request)
pipe = NLUPipeline()
pipe.nlu_ref = request
Expand Down
4 changes: 2 additions & 2 deletions nlu/pipe/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from nlu.pipe.utils.resolution.storage_ref_utils import StorageRefUtils
from nlu.universe.feature_node_ids import NLP_NODE_IDS
from nlu.universe.universes import Licenses
from nlu.utils.environment.env_utils import is_running_in_databricks, try_import_streamlit
from nlu.utils.environment.env_utils import is_running_in_databricks_runtime, try_import_streamlit

logger = logging.getLogger('nlu')

Expand Down Expand Up @@ -608,7 +608,7 @@ def viz(self, text_to_viz: str, viz_type='', labels_to_viz=None, viz_colors={},
from nlu.utils.environment.env_utils import install_and_import_package
install_and_import_package('spark-nlp-display', import_name='sparknlp_display')
if self.vanilla_transformer_pipe is None: self.fit()
is_databricks_env = is_running_in_databricks()
is_databricks_env = is_running_in_databricks_runtime()
if return_html: is_databricks_env = True
# self.configure_light_pipe_usage(1, force=True)
from nlu.pipe.viz.vis_utils import VizUtils
Expand Down
12 changes: 6 additions & 6 deletions nlu/pipe/utils/audio_data_conversion_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import pandas as pd
import pyspark
from johnsnowlabs.utils.env_utils import is_running_in_databricks
from johnsnowlabs.utils.env_utils import is_running_in_databricks_runtime
from pyspark.sql.types import *

logger = logging.getLogger('nlu')
Expand Down Expand Up @@ -34,7 +34,7 @@ def validate_paths(data):
@staticmethod
def check_iterable_paths_are_valid(iterable_paths):
"""Validate for iterable data input if all elements point to file or jsl_folder"""
if is_running_in_databricks():
if is_running_in_databricks_runtime():
iterable_paths = [f'/dbfs{p}' for p in iterable_paths]
paths_validness = []
for p in iterable_paths:
Expand Down Expand Up @@ -86,18 +86,18 @@ def glob_files_of_accepted_type(paths, file_types):
1. paths point to a file which is suffixed with one of the accepted file_types, i.e. path/to/file.type
2. path points to a jsl_folder, in this case jsl_folder is recursively searched for valid files and accepted paths will be in return result
"""
if is_running_in_databricks():
if is_running_in_databricks_runtime():
paths = [f'/dbfs{p}' for p in paths]
accepted_file_paths = []
for p in paths:
for t in file_types:
t = t.lower()
if os.path.isfile(p) or is_running_in_databricks() and os.path.isfile(f'/dbfs{p}'):
if os.path.isfile(p) or is_running_in_databricks_runtime() and os.path.isfile(f'/dbfs{p}'):
if p.lower().split('.')[-1] == t:
if is_running_in_databricks():
if is_running_in_databricks_runtime():
p = p.replace('/dbfs', '', 1)
accepted_file_paths.append(p)
elif os.path.isdir(p) or is_running_in_databricks() and os.path.isdir(f'/dbfs{p}'):
elif os.path.isdir(p) or is_running_in_databricks_runtime() and os.path.isdir(f'/dbfs{p}'):
accepted_file_paths += glob.glob(p + f'/**/*.{t}', recursive=True)
else:
print(f"Invalid path = {p} pointing neither to file or jsl_folder on this machine")
Expand Down
12 changes: 6 additions & 6 deletions nlu/pipe/utils/ocr_data_conversion_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd
import pyspark
from johnsnowlabs.utils.env_utils import is_running_in_databricks
from johnsnowlabs.utils.env_utils import is_running_in_databricks_runtime

logger = logging.getLogger('nlu')

Expand All @@ -32,7 +32,7 @@ def validate_OCR_compatible_inputs(data):
@staticmethod
def check_iterable_paths_are_valid(iterable_paths):
"""Validate for iterable data input if all elements point to file or jsl_folder"""
if is_running_in_databricks():
if is_running_in_databricks_runtime():
iterable_paths = [f'/dbfs{p}' for p in iterable_paths]
paths_validness = []
for p in iterable_paths:
Expand All @@ -58,18 +58,18 @@ def glob_files_of_accepted_type(paths, file_types):
2. path points to a jsl_folder, in this case jsl_folder is recurisvely searched for valid files and accepted paths will be in return result
"""

if is_running_in_databricks():
if is_running_in_databricks_runtime():
paths = [f'/dbfs{p}' for p in paths]
accepted_file_paths = []
for p in paths:
for t in file_types:
t = t.lower()
if os.path.isfile(p) or is_running_in_databricks() and os.path.isfile(f'/dbfs{p}'):
if os.path.isfile(p) or is_running_in_databricks_runtime() and os.path.isfile(f'/dbfs{p}'):
if p.lower().split('.')[-1] == t:
if is_running_in_databricks():
if is_running_in_databricks_runtime():
p = p.replace('/dbfs', '', 1)
accepted_file_paths.append(p)
elif os.path.isdir(p) or is_running_in_databricks() and os.path.isdir(f'/dbfs{p}'):
elif os.path.isdir(p) or is_running_in_databricks_runtime() and os.path.isdir(f'/dbfs{p}'):
accepted_file_paths += glob.glob(p + f'/*.{t.upper()}', recursive=True) + glob.glob(p + f'/*.{t}',
recursive=True)
else:
Expand Down
8 changes: 4 additions & 4 deletions nlu/pipe/utils/pipe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from nlu.pipe.utils.component_utils import ComponentUtils
from typing import List, Union, Dict
from nlu.universe.annotator_class_universe import AnnoClassRef
from nlu.utils.environment.env_utils import is_running_in_databricks
from nlu.utils.environment.env_utils import is_running_in_databricks_runtime
import os
import glob
import json
Expand Down Expand Up @@ -140,12 +140,12 @@ def set_column_values_on_components_from_pretrained_pipe_from_disk_data(componen
pipe_path = glob.glob(f'{pipe_path}*')
if len(pipe_path) == 0:
# try databricks env path
if is_running_in_databricks():
if is_running_in_databricks_runtime():
pipe_path = [f'dbfs:/root/cache_pretrained/{nlp_ref}_{lang}']
else:
raise FileNotFoundError(f"Could not find downloaded Pipeline at path={pipe_path}")
pipe_path = pipe_path[0]
if not os.path.exists(pipe_path) and not is_running_in_databricks():
if not os.path.exists(pipe_path) and not is_running_in_databricks_runtime():
raise FileNotFoundError(f"Could not find downloaded Pipeline at path={pipe_path}")

# Find HDD location of component_list and read out input/output cols
Expand All @@ -155,7 +155,7 @@ def set_column_values_on_components_from_pretrained_pipe_from_disk_data(componen

for c in component_list:
model_name = c.model.uid.split('_')[0]
if is_running_in_databricks():
if is_running_in_databricks_runtime():
data = PipeUtils.get_json_data_for_pipe_model_at_stage_number_on_databricks(nlp_ref, lang, digit_str)
else:
data = PipeUtils.get_json_data_for_pipe_model_at_stage_number(pipe_path, digit_str)
Expand Down
11 changes: 4 additions & 7 deletions nlu/utils/environment/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,10 @@ def try_import_streamlit():
print("You need to install Streamlit to run this functionality.")


def is_running_in_databricks():
""" Check if the currently running Python Process is running in Databricks or not
If any Environment Variable name contains 'DATABRICKS' this will return True, otherwise False"""
for k in os.environ.keys():
if 'DATABRICKS' in k:
return True
return False
def is_running_in_databricks_runtime():
""" Check if the currently running Python Process is running in Databricks runtime or not
"""
return "DATABRICKS_RUNTIME_VERSION" in os.environ


def install_and_import_package(pkg_name, version='', import_name=''):
Expand Down

0 comments on commit 506860e

Please sign in to comment.