Deprecate: remove custom scripts (#650)

Azure · Aug 18, 2018 · 442228a · 442228a
1 parent 293f297
commit 442228a
Show file tree

Hide file tree

Showing 20 changed files with 10 additions and 172 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,4 @@
 # custom
-my-custom-scripts/
 .aztk
 
 # Environments

diff --git a/README.md b/README.md
@@ -115,9 +115,7 @@ By default, we port forward the Spark Web UI to *localhost:8080*, Spark Jobs UI
 
 You can configure these settings in the *.aztk/ssh.yaml* file.
 
-NOTE: When working interactively, you may want to use tools like Jupyter or RStudio-Server depending on whether or not you are a python or R user. To do so, you need to setup your cluster with the appropriate docker image and custom scripts:
- - [how to setup Jupyter with Pyspark](https://github.com/Azure/aztk/wiki/PySpark-on-Azure-with-AZTK)
- - [how to setup RStudio-Server with Sparklyr](https://github.com/Azure/aztk/wiki/SparklyR-on-Azure-with-AZTK)
+NOTE: When working interactively, you may want to use tools like Jupyter or RStudio-Server. To do so, you need to setup your cluster with the appropriate docker image and plugin. See [Plugins](./docs/15-plugins.md) for more information.
 
 ### 5. Manage and Monitor your Spark Cluster
 

diff --git a/aztk/internal/cluster_data/node_data.py b/aztk/internal/cluster_data/node_data.py
@@ -4,17 +4,17 @@
 import zipfile
 from pathlib import Path
 from typing import List
+
 import yaml
+
 from aztk import models
-from aztk.utils import constants, file_utils, secure_utils
 from aztk.error import InvalidCustomScriptError
+from aztk.utils import constants, file_utils, secure_utils
 
 ROOT_PATH = constants.ROOT_PATH
 
 # Constants for node data
 NODE_SCRIPT_FOLDER = "aztk"
-CUSTOM_SCRIPT_FOLDER = "custom-scripts"
-CUSTOM_SCRIPT_METADATA_FILE = "custom-scripts.yaml"
 PLUGIN_FOLDER = "plugins"
 
 
@@ -30,7 +30,6 @@ def __init__(self, cluster_config: models.ClusterConfiguration):
 
     def add_core(self):
         self._add_node_scripts()
-        self._add_custom_scripts()
         self._add_plugins()
         self._add_spark_configuration()
         self._add_user_conf()
@@ -72,30 +71,6 @@ def add_dir(self, path: str, dest: str = None, exclude: List[str] = None):
                 if self._includeFile(file, exclude):
                     self.add_file(os.path.join(base, file), os.path.join(dest, relative_folder), binary=False)
 
-    def _add_custom_scripts(self):
-        data = []
-        if not self.cluster_config.custom_scripts:
-            return
-
-        for index, custom_script in enumerate(self.cluster_config.custom_scripts):
-            if isinstance(custom_script.script, (str, bytes)):
-                new_file_name = str(index) + '_' + os.path.basename(custom_script.script)
-                data.append(dict(script=new_file_name, runOn=str(custom_script.run_on)))
-                try:
-                    with io.open(custom_script.script, 'r', encoding='UTF-8') as f:
-                        self.zipf.writestr(
-                            os.path.join(CUSTOM_SCRIPT_FOLDER, new_file_name),
-                            f.read().replace('\r\n', '\n'))
-                except FileNotFoundError:
-                    raise InvalidCustomScriptError("Custom script '{0}' doesn't exists.".format(custom_script.script))
-            elif isinstance(custom_script.script, models.File):
-                new_file_name = str(index) + '_' + custom_script.script.name
-                self.zipf.writestr(
-                    os.path.join('custom-scripts', new_file_name), custom_script.script.payload.getvalue())
-
-        self.zipf.writestr(
-            os.path.join(CUSTOM_SCRIPT_FOLDER, CUSTOM_SCRIPT_METADATA_FILE), yaml.dump(data, default_flow_style=False))
-
     def _add_spark_configuration(self):
         spark_configuration = self.cluster_config.spark_configuration
         if not spark_configuration:

diff --git a/aztk/models/cluster_configuration.py b/aztk/models/cluster_configuration.py
@@ -35,7 +35,6 @@ class ClusterConfiguration(Model):
 
     subnet_id = fields.String(default=None)
     plugins = fields.List(PluginConfiguration)
-    custom_scripts = fields.List(CustomScript)
     file_shares = fields.List(FileShare)
     user_configuration = fields.Model(UserConfiguration, default=None)
     scheduling_target = fields.Enum(SchedulingTarget, default=None)

diff --git a/aztk/node_scripts/docker_main.sh b/aztk/node_scripts/docker_main.sh
@@ -6,15 +6,10 @@ set -e
 source ~/.bashrc
 echo "Initializing spark container"
 
-# --------------------
-# Setup custom scripts
-# --------------------
-custom_script_dir=$AZTK_WORKING_DIR/custom-scripts
 aztk_dir=$AZTK_WORKING_DIR/aztk
 
 # -----------------------
 # Preload jupyter samples
-# TODO: remove when we support uploading random (non-executable) files as part custom-scripts
 # -----------------------
 mkdir -p /mnt/samples
 

diff --git a/aztk/node_scripts/install/install.py b/aztk/node_scripts/install/install.py
@@ -81,6 +81,5 @@ def setup_spark_container():
         spark.start_spark_worker()
 
     plugins.setup_plugins(target=PluginTarget.SparkContainer, is_master=is_master, is_worker=is_worker)
-    scripts.run_custom_scripts(is_master=is_master, is_worker=is_worker)
 
     open("/tmp/setup_complete", 'a').close()
diff --git a/aztk/node_scripts/install/scripts.py b/aztk/node_scripts/install/scripts.py
diff --git a/aztk/spark/models/models.py b/aztk/spark/models/models.py
@@ -1,10 +1,12 @@
 from typing import List
-from Cryptodome.PublicKey import RSA
+
 import azure.batch.models as batch_models
+from Cryptodome.PublicKey import RSA
+
 import aztk.models
 from aztk import error
-from aztk.utils import constants, helpers
 from aztk.core.models import Model, fields
+from aztk.utils import constants, helpers
 
 
 class SparkToolkit(aztk.models.Toolkit):
@@ -192,7 +194,6 @@ def __init__(self,
                  id=None,
                  applications=None,
                  vm_size=None,
-                 custom_scripts=None,
                  spark_configuration=None,
                  toolkit=None,
                  max_dedicated_nodes=0,
@@ -203,7 +204,6 @@ def __init__(self,
 
         self.id = id
         self.applications = applications
-        self.custom_scripts = custom_scripts
         self.spark_configuration = spark_configuration
         self.vm_size = vm_size
         self.gpu_enabled = None
@@ -219,7 +219,6 @@ def __init__(self,
     def to_cluster_config(self):
         return ClusterConfiguration(
             cluster_id=self.id,
-            custom_scripts=self.custom_scripts,
             toolkit=self.toolkit,
             vm_size=self.vm_size,
             size=self.max_dedicated_nodes,

diff --git a/aztk/utils/constants.py b/aztk/utils/constants.py
@@ -39,8 +39,6 @@
 DEFAULT_SPARK_JARS_DEST = os.path.join(ROOT_PATH, 'node_scripts', 'jars')
 DEFAULT_SPARK_JOB_CONFIG = os.path.join(os.getcwd(), '.aztk', 'job.yaml')
 GLOBAL_SPARK_JOB_CONFIG = os.path.join(HOME_DIRECTORY_PATH, '.aztk', 'job.yaml')
-
-CUSTOM_SCRIPTS_DEST = os.path.join(ROOT_PATH, 'node_scripts', 'custom-scripts')
 """
     Source and destination paths for spark init
 """

diff --git a/aztk_cli/config.py b/aztk_cli/config.py
@@ -160,7 +160,6 @@ class JobConfig():
     def __init__(self):
         self.id = None
         self.applications = []
-        self.custom_scripts = None
         self.spark_configuration = None
         self.vm_size = None
         self.toolkit = None
@@ -187,7 +186,6 @@ def _merge_dict(self, config):
                 self.max_dedicated_nodes = cluster_configuration.get('size')
             if cluster_configuration.get('size_low_priority') is not None:
                 self.max_low_pri_nodes = cluster_configuration.get('size_low_priority')
-            self.custom_scripts = cluster_configuration.get('custom_scripts')
             self.subnet_id = cluster_configuration.get('subnet_id')
             self.worker_on_master = cluster_configuration.get("worker_on_master")
             scheduling_target = cluster_configuration.get("scheduling_target")

diff --git a/aztk_cli/config/cluster.yaml b/aztk_cli/config/cluster.yaml
@@ -29,13 +29,6 @@ size: 2
 # username: <username for the linux user to be created> (optional)
 username: spark
 
-# **DEPRECATED** Use plugins instead
-# custom_scripts:
-#   - script: </path/to/script.sh or /path/to/script/directory/>
-#     runOn: <master/worker/all-nodes>
-#   - script: <./relative/path/to/other/script.sh or ./relative/path/to/other/script/directory/>
-#     runOn: <master/worker/all-nodes>
-
 # To add your cluster to a virtual network provide the full arm resource id below
 # subnet_id: /subscriptions/********-****-****-****-************/resourceGroups/********/providers/Microsoft.Network/virtualNetworks/*******/subnets/******
 

diff --git a/aztk_cli/spark/endpoints/job/submit.py b/aztk_cli/spark/endpoints/job/submit.py
@@ -39,7 +39,6 @@ def execute(args: typing.NamedTuple):
     job_configuration = aztk.spark.models.JobConfiguration(
         id=job_conf.id,
         applications=job_conf.applications,
-        custom_scripts=job_conf.custom_scripts,
         spark_configuration=spark_configuration,
         vm_size=job_conf.vm_size,
         toolkit=job_conf.toolkit,

diff --git a/aztk_cli/utils.py b/aztk_cli/utils.py
@@ -399,7 +399,6 @@ def print_cluster_conf(cluster_conf: ClusterConfiguration, wait: bool):
     log.info(">        dedicated:      %s", cluster_conf.size)
     log.info(">     low priority:      %s", cluster_conf.size_low_priority)
     log.info("cluster vm size:         %s", cluster_conf.vm_size)
-    log.info("custom scripts:          %s", len(cluster_conf.custom_scripts) if cluster_conf.custom_scripts else 0)
     log.info("subnet ID:               %s", cluster_conf.subnet_id)
     log.info("file shares:             %s",
              len(cluster_conf.file_shares) if cluster_conf.file_shares is not None else 0)

diff --git a/docs/10-clusters.md b/docs/10-clusters.md
@@ -197,5 +197,5 @@ By default, the `aztk spark cluster ssh` command port forwards the Spark Web UI
 
 ## Next Steps
 - [Run a Spark job](20-spark-submit.html)
-- [Configure the Spark cluster using custom commands](11-custom-scripts.html)
+- [Configure the Spark cluster using custom plugins](51-define-plugin.html)
 - [Bring your own Docker image or choose between a variety of our supported base images to manage your Spark and Python versions](12-docker-image.html)
diff --git a/docs/15-plugins.md b/docs/15-plugins.md
@@ -1,8 +1,6 @@
 # Plugins
 
-Plugins are a successor to [custom scripts](11-custom-scripts.html) and are the recommended way of running custom code on the cluster.
-
-Plugins can either be one of the Aztk [supported plugins](#supported-plugins) or the path to a [local file](#custom-script-plugin).
+Plugins can either be one of the `aztk` [supported plugins](#supported-plugins) or the path to a [local file](#custom-script-plugin).
 
 ## Supported Plugins
 AZTK ships with a library of default plugins that enable auxiliary services to use with your Spark cluster.

diff --git a/docs/70-jobs.md b/docs/70-jobs.md
@@ -46,13 +46,6 @@ Jobs also require a definition of the cluster on which the Applications will run
       software: spark
       version: 2.2
     subnet_id: <resource ID of a subnet to use (optional)>
-    custom_scripts:
-      - List
-      - of
-      - paths
-      - to
-      - custom
-      - scripts
 ```
 _Please Note: For more information about Azure VM sizes, see [Azure Batch Pricing](https://azure.microsoft.com/en-us/pricing/details/batch/). And for more information about Docker repositories see [Docker](./12-docker-image.html)._
 

diff --git a/tests/integration_tests/spark/sdk/cluster/test_cluster.py b/tests/integration_tests/spark/sdk/cluster/test_cluster.py
@@ -74,7 +74,6 @@ def test_create_cluster():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)
@@ -105,7 +104,6 @@ def test_list_clusters():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)
@@ -130,7 +128,6 @@ def test_get_remote_login_settings():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)
@@ -158,7 +155,6 @@ def test_submit():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)
@@ -199,7 +195,6 @@ def test_get_application_log():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)
@@ -260,7 +255,6 @@ def test_get_application_status_complete():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)
@@ -304,7 +298,6 @@ def test_delete_cluster():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)
@@ -330,7 +323,6 @@ def test_spark_processes_up():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)
@@ -357,7 +349,6 @@ def test_debug_tool():
         size_low_priority=0,
         vm_size="standard_f2",
         subnet_id=None,
-        custom_scripts=None,
         file_shares=None,
         toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
         spark_configuration=None)