Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Release/532 #265

Merged
merged 4 commits into from
May 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions nlu/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = '5.3.1'
__version__ = '5.3.2'


import nlu.utils.environment.env_utils as env_utils
Expand Down Expand Up @@ -325,7 +325,7 @@ def load_nlu_pipe_from_hdd(pipe_path, request) -> NLUPipeline:
If it is a component_list, load the component_list and return it.
If it is a singular model_anno_obj, load it to the correct AnnotatorClass and NLU component_to_resolve and then generate pipeline for it
"""
if is_running_in_databricks():
if is_running_in_databricks_runtime():
return load_nlu_pipe_from_hdd_in_databricks(pipe_path, request)
pipe = NLUPipeline()
pipe.nlu_ref = request
Expand Down
4 changes: 2 additions & 2 deletions nlu/pipe/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from nlu.pipe.utils.resolution.storage_ref_utils import StorageRefUtils
from nlu.universe.feature_node_ids import NLP_NODE_IDS
from nlu.universe.universes import Licenses
from nlu.utils.environment.env_utils import is_running_in_databricks, try_import_streamlit
from nlu.utils.environment.env_utils import is_running_in_databricks_runtime, try_import_streamlit

logger = logging.getLogger('nlu')

Expand Down Expand Up @@ -608,7 +608,7 @@ def viz(self, text_to_viz: str, viz_type='', labels_to_viz=None, viz_colors={},
from nlu.utils.environment.env_utils import install_and_import_package
install_and_import_package('spark-nlp-display', import_name='sparknlp_display')
if self.vanilla_transformer_pipe is None: self.fit()
is_databricks_env = is_running_in_databricks()
is_databricks_env = is_running_in_databricks_runtime()
if return_html: is_databricks_env = True
# self.configure_light_pipe_usage(1, force=True)
from nlu.pipe.viz.vis_utils import VizUtils
Expand Down
12 changes: 6 additions & 6 deletions nlu/pipe/utils/audio_data_conversion_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
import pandas as pd
import pyspark
from johnsnowlabs.utils.env_utils import is_running_in_databricks
from johnsnowlabs.utils.env_utils import is_running_in_databricks_runtime
from pyspark.sql.types import *

logger = logging.getLogger('nlu')
Expand Down Expand Up @@ -34,7 +34,7 @@ def validate_paths(data):
@staticmethod
def check_iterable_paths_are_valid(iterable_paths):
"""Validate for iterable data input if all elements point to file or jsl_folder"""
if is_running_in_databricks():
if is_running_in_databricks_runtime():
iterable_paths = [f'/dbfs{p}' for p in iterable_paths]
paths_validness = []
for p in iterable_paths:
Expand Down Expand Up @@ -86,18 +86,18 @@ def glob_files_of_accepted_type(paths, file_types):
1. paths point to a file which is suffixed with one of the accepted file_types, i.e. path/to/file.type
2. path points to a jsl_folder, in this case jsl_folder is recursively searched for valid files and accepted paths will be in return result
"""
if is_running_in_databricks():
if is_running_in_databricks_runtime():
paths = [f'/dbfs{p}' for p in paths]
accepted_file_paths = []
for p in paths:
for t in file_types:
t = t.lower()
if os.path.isfile(p) or is_running_in_databricks() and os.path.isfile(f'/dbfs{p}'):
if os.path.isfile(p) or is_running_in_databricks_runtime() and os.path.isfile(f'/dbfs{p}'):
if p.lower().split('.')[-1] == t:
if is_running_in_databricks():
if is_running_in_databricks_runtime():
p = p.replace('/dbfs', '', 1)
accepted_file_paths.append(p)
elif os.path.isdir(p) or is_running_in_databricks() and os.path.isdir(f'/dbfs{p}'):
elif os.path.isdir(p) or is_running_in_databricks_runtime() and os.path.isdir(f'/dbfs{p}'):
accepted_file_paths += glob.glob(p + f'/**/*.{t}', recursive=True)
else:
print(f"Invalid path = {p} pointing neither to file or jsl_folder on this machine")
Expand Down
12 changes: 6 additions & 6 deletions nlu/pipe/utils/ocr_data_conversion_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pandas as pd
import pyspark
from johnsnowlabs.utils.env_utils import is_running_in_databricks
from johnsnowlabs.utils.env_utils import is_running_in_databricks_runtime

logger = logging.getLogger('nlu')

Expand All @@ -32,7 +32,7 @@ def validate_OCR_compatible_inputs(data):
@staticmethod
def check_iterable_paths_are_valid(iterable_paths):
"""Validate for iterable data input if all elements point to file or jsl_folder"""
if is_running_in_databricks():
if is_running_in_databricks_runtime():
iterable_paths = [f'/dbfs{p}' for p in iterable_paths]
paths_validness = []
for p in iterable_paths:
Expand All @@ -58,18 +58,18 @@ def glob_files_of_accepted_type(paths, file_types):
2. path points to a jsl_folder, in this case jsl_folder is recurisvely searched for valid files and accepted paths will be in return result
"""

if is_running_in_databricks():
if is_running_in_databricks_runtime():
paths = [f'/dbfs{p}' for p in paths]
accepted_file_paths = []
for p in paths:
for t in file_types:
t = t.lower()
if os.path.isfile(p) or is_running_in_databricks() and os.path.isfile(f'/dbfs{p}'):
if os.path.isfile(p) or is_running_in_databricks_runtime() and os.path.isfile(f'/dbfs{p}'):
if p.lower().split('.')[-1] == t:
if is_running_in_databricks():
if is_running_in_databricks_runtime():
p = p.replace('/dbfs', '', 1)
accepted_file_paths.append(p)
elif os.path.isdir(p) or is_running_in_databricks() and os.path.isdir(f'/dbfs{p}'):
elif os.path.isdir(p) or is_running_in_databricks_runtime() and os.path.isdir(f'/dbfs{p}'):
accepted_file_paths += glob.glob(p + f'/*.{t.upper()}', recursive=True) + glob.glob(p + f'/*.{t}',
recursive=True)
else:
Expand Down
8 changes: 4 additions & 4 deletions nlu/pipe/utils/pipe_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from nlu.pipe.utils.component_utils import ComponentUtils
from typing import List, Union, Dict
from nlu.universe.annotator_class_universe import AnnoClassRef
from nlu.utils.environment.env_utils import is_running_in_databricks
from nlu.utils.environment.env_utils import is_running_in_databricks_runtime
import os
import glob
import json
Expand Down Expand Up @@ -140,12 +140,12 @@ def set_column_values_on_components_from_pretrained_pipe_from_disk_data(componen
pipe_path = glob.glob(f'{pipe_path}*')
if len(pipe_path) == 0:
# try databricks env path
if is_running_in_databricks():
if is_running_in_databricks_runtime():
pipe_path = [f'dbfs:/root/cache_pretrained/{nlp_ref}_{lang}']
else:
raise FileNotFoundError(f"Could not find downloaded Pipeline at path={pipe_path}")
pipe_path = pipe_path[0]
if not os.path.exists(pipe_path) and not is_running_in_databricks():
if not os.path.exists(pipe_path) and not is_running_in_databricks_runtime():
raise FileNotFoundError(f"Could not find downloaded Pipeline at path={pipe_path}")

# Find HDD location of component_list and read out input/output cols
Expand All @@ -155,7 +155,7 @@ def set_column_values_on_components_from_pretrained_pipe_from_disk_data(componen

for c in component_list:
model_name = c.model.uid.split('_')[0]
if is_running_in_databricks():
if is_running_in_databricks_runtime():
data = PipeUtils.get_json_data_for_pipe_model_at_stage_number_on_databricks(nlp_ref, lang, digit_str)
else:
data = PipeUtils.get_json_data_for_pipe_model_at_stage_number(pipe_path, digit_str)
Expand Down
11 changes: 4 additions & 7 deletions nlu/utils/environment/env_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,13 +84,10 @@ def try_import_streamlit():
print("You need to install Streamlit to run this functionality.")


def is_running_in_databricks():
""" Check if the currently running Python Process is running in Databricks or not
If any Environment Variable name contains 'DATABRICKS' this will return True, otherwise False"""
for k in os.environ.keys():
if 'DATABRICKS' in k:
return True
return False
def is_running_in_databricks_runtime():
""" Check if the currently running Python Process is running in Databricks runtime or not
"""
return "DATABRICKS_RUNTIME_VERSION" in os.environ


def install_and_import_package(pkg_name, version='', import_name=''):
Expand Down
Loading