Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Deprecate: remove custom scripts (#650)
Browse files Browse the repository at this point in the history
  • Loading branch information
jafreck authored Aug 18, 2018
1 parent 293f297 commit 442228a
Show file tree
Hide file tree
Showing 20 changed files with 10 additions and 172 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# custom
my-custom-scripts/
.aztk

# Environments
Expand Down
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,7 @@ By default, we port forward the Spark Web UI to *localhost:8080*, Spark Jobs UI

You can configure these settings in the *.aztk/ssh.yaml* file.

NOTE: When working interactively, you may want to use tools like Jupyter or RStudio-Server depending on whether or not you are a python or R user. To do so, you need to setup your cluster with the appropriate docker image and custom scripts:
- [how to setup Jupyter with Pyspark](https://github.com/Azure/aztk/wiki/PySpark-on-Azure-with-AZTK)
- [how to setup RStudio-Server with Sparklyr](https://github.com/Azure/aztk/wiki/SparklyR-on-Azure-with-AZTK)
NOTE: When working interactively, you may want to use tools like Jupyter or RStudio-Server. To do so, you need to setup your cluster with the appropriate docker image and plugin. See [Plugins](./docs/15-plugins.md) for more information.

### 5. Manage and Monitor your Spark Cluster

Expand Down
31 changes: 3 additions & 28 deletions aztk/internal/cluster_data/node_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
import zipfile
from pathlib import Path
from typing import List

import yaml

from aztk import models
from aztk.utils import constants, file_utils, secure_utils
from aztk.error import InvalidCustomScriptError
from aztk.utils import constants, file_utils, secure_utils

ROOT_PATH = constants.ROOT_PATH

# Constants for node data
NODE_SCRIPT_FOLDER = "aztk"
CUSTOM_SCRIPT_FOLDER = "custom-scripts"
CUSTOM_SCRIPT_METADATA_FILE = "custom-scripts.yaml"
PLUGIN_FOLDER = "plugins"


Expand All @@ -30,7 +30,6 @@ def __init__(self, cluster_config: models.ClusterConfiguration):

def add_core(self):
self._add_node_scripts()
self._add_custom_scripts()
self._add_plugins()
self._add_spark_configuration()
self._add_user_conf()
Expand Down Expand Up @@ -72,30 +71,6 @@ def add_dir(self, path: str, dest: str = None, exclude: List[str] = None):
if self._includeFile(file, exclude):
self.add_file(os.path.join(base, file), os.path.join(dest, relative_folder), binary=False)

def _add_custom_scripts(self):
data = []
if not self.cluster_config.custom_scripts:
return

for index, custom_script in enumerate(self.cluster_config.custom_scripts):
if isinstance(custom_script.script, (str, bytes)):
new_file_name = str(index) + '_' + os.path.basename(custom_script.script)
data.append(dict(script=new_file_name, runOn=str(custom_script.run_on)))
try:
with io.open(custom_script.script, 'r', encoding='UTF-8') as f:
self.zipf.writestr(
os.path.join(CUSTOM_SCRIPT_FOLDER, new_file_name),
f.read().replace('\r\n', '\n'))
except FileNotFoundError:
raise InvalidCustomScriptError("Custom script '{0}' doesn't exists.".format(custom_script.script))
elif isinstance(custom_script.script, models.File):
new_file_name = str(index) + '_' + custom_script.script.name
self.zipf.writestr(
os.path.join('custom-scripts', new_file_name), custom_script.script.payload.getvalue())

self.zipf.writestr(
os.path.join(CUSTOM_SCRIPT_FOLDER, CUSTOM_SCRIPT_METADATA_FILE), yaml.dump(data, default_flow_style=False))

def _add_spark_configuration(self):
spark_configuration = self.cluster_config.spark_configuration
if not spark_configuration:
Expand Down
1 change: 0 additions & 1 deletion aztk/models/cluster_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ class ClusterConfiguration(Model):

subnet_id = fields.String(default=None)
plugins = fields.List(PluginConfiguration)
custom_scripts = fields.List(CustomScript)
file_shares = fields.List(FileShare)
user_configuration = fields.Model(UserConfiguration, default=None)
scheduling_target = fields.Enum(SchedulingTarget, default=None)
Expand Down
5 changes: 0 additions & 5 deletions aztk/node_scripts/docker_main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,10 @@ set -e
source ~/.bashrc
echo "Initializing spark container"

# --------------------
# Setup custom scripts
# --------------------
custom_script_dir=$AZTK_WORKING_DIR/custom-scripts
aztk_dir=$AZTK_WORKING_DIR/aztk

# -----------------------
# Preload jupyter samples
# TODO: remove when we support uploading random (non-executable) files as part custom-scripts
# -----------------------
mkdir -p /mnt/samples

Expand Down
1 change: 0 additions & 1 deletion aztk/node_scripts/install/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,5 @@ def setup_spark_container():
spark.start_spark_worker()

plugins.setup_plugins(target=PluginTarget.SparkContainer, is_master=is_master, is_worker=is_worker)
scripts.run_custom_scripts(is_master=is_master, is_worker=is_worker)

open("/tmp/setup_complete", 'a').close()
73 changes: 0 additions & 73 deletions aztk/node_scripts/install/scripts.py

This file was deleted.

9 changes: 4 additions & 5 deletions aztk/spark/models/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from typing import List
from Cryptodome.PublicKey import RSA

import azure.batch.models as batch_models
from Cryptodome.PublicKey import RSA

import aztk.models
from aztk import error
from aztk.utils import constants, helpers
from aztk.core.models import Model, fields
from aztk.utils import constants, helpers


class SparkToolkit(aztk.models.Toolkit):
Expand Down Expand Up @@ -192,7 +194,6 @@ def __init__(self,
id=None,
applications=None,
vm_size=None,
custom_scripts=None,
spark_configuration=None,
toolkit=None,
max_dedicated_nodes=0,
Expand All @@ -203,7 +204,6 @@ def __init__(self,

self.id = id
self.applications = applications
self.custom_scripts = custom_scripts
self.spark_configuration = spark_configuration
self.vm_size = vm_size
self.gpu_enabled = None
Expand All @@ -219,7 +219,6 @@ def __init__(self,
def to_cluster_config(self):
return ClusterConfiguration(
cluster_id=self.id,
custom_scripts=self.custom_scripts,
toolkit=self.toolkit,
vm_size=self.vm_size,
size=self.max_dedicated_nodes,
Expand Down
2 changes: 0 additions & 2 deletions aztk/utils/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,6 @@
DEFAULT_SPARK_JARS_DEST = os.path.join(ROOT_PATH, 'node_scripts', 'jars')
DEFAULT_SPARK_JOB_CONFIG = os.path.join(os.getcwd(), '.aztk', 'job.yaml')
GLOBAL_SPARK_JOB_CONFIG = os.path.join(HOME_DIRECTORY_PATH, '.aztk', 'job.yaml')

CUSTOM_SCRIPTS_DEST = os.path.join(ROOT_PATH, 'node_scripts', 'custom-scripts')
"""
Source and destination paths for spark init
"""
Expand Down
2 changes: 0 additions & 2 deletions aztk_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,6 @@ class JobConfig():
def __init__(self):
self.id = None
self.applications = []
self.custom_scripts = None
self.spark_configuration = None
self.vm_size = None
self.toolkit = None
Expand All @@ -187,7 +186,6 @@ def _merge_dict(self, config):
self.max_dedicated_nodes = cluster_configuration.get('size')
if cluster_configuration.get('size_low_priority') is not None:
self.max_low_pri_nodes = cluster_configuration.get('size_low_priority')
self.custom_scripts = cluster_configuration.get('custom_scripts')
self.subnet_id = cluster_configuration.get('subnet_id')
self.worker_on_master = cluster_configuration.get("worker_on_master")
scheduling_target = cluster_configuration.get("scheduling_target")
Expand Down
7 changes: 0 additions & 7 deletions aztk_cli/config/cluster.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,6 @@ size: 2
# username: <username for the linux user to be created> (optional)
username: spark

# **DEPRECATED** Use plugins instead
# custom_scripts:
# - script: </path/to/script.sh or /path/to/script/directory/>
# runOn: <master/worker/all-nodes>
# - script: <./relative/path/to/other/script.sh or ./relative/path/to/other/script/directory/>
# runOn: <master/worker/all-nodes>

# To add your cluster to a virtual network provide the full arm resource id below
# subnet_id: /subscriptions/********-****-****-****-************/resourceGroups/********/providers/Microsoft.Network/virtualNetworks/*******/subnets/******

Expand Down
1 change: 0 additions & 1 deletion aztk_cli/spark/endpoints/job/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ def execute(args: typing.NamedTuple):
job_configuration = aztk.spark.models.JobConfiguration(
id=job_conf.id,
applications=job_conf.applications,
custom_scripts=job_conf.custom_scripts,
spark_configuration=spark_configuration,
vm_size=job_conf.vm_size,
toolkit=job_conf.toolkit,
Expand Down
1 change: 0 additions & 1 deletion aztk_cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,6 @@ def print_cluster_conf(cluster_conf: ClusterConfiguration, wait: bool):
log.info("> dedicated: %s", cluster_conf.size)
log.info("> low priority: %s", cluster_conf.size_low_priority)
log.info("cluster vm size: %s", cluster_conf.vm_size)
log.info("custom scripts: %s", len(cluster_conf.custom_scripts) if cluster_conf.custom_scripts else 0)
log.info("subnet ID: %s", cluster_conf.subnet_id)
log.info("file shares: %s",
len(cluster_conf.file_shares) if cluster_conf.file_shares is not None else 0)
Expand Down
2 changes: 1 addition & 1 deletion docs/10-clusters.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,5 +197,5 @@ By default, the `aztk spark cluster ssh` command port forwards the Spark Web UI

## Next Steps
- [Run a Spark job](20-spark-submit.html)
- [Configure the Spark cluster using custom commands](11-custom-scripts.html)
- [Configure the Spark cluster using custom plugins](51-define-plugin.html)
- [Bring your own Docker image or choose between a variety of our supported base images to manage your Spark and Python versions](12-docker-image.html)
4 changes: 1 addition & 3 deletions docs/15-plugins.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# Plugins

Plugins are a successor to [custom scripts](11-custom-scripts.html) and are the recommended way of running custom code on the cluster.

Plugins can either be one of the Aztk [supported plugins](#supported-plugins) or the path to a [local file](#custom-script-plugin).
Plugins can either be one of the `aztk` [supported plugins](#supported-plugins) or the path to a [local file](#custom-script-plugin).

## Supported Plugins
AZTK ships with a library of default plugins that enable auxiliary services to use with your Spark cluster.
Expand Down
7 changes: 0 additions & 7 deletions docs/70-jobs.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,6 @@ Jobs also require a definition of the cluster on which the Applications will run
software: spark
version: 2.2
subnet_id: <resource ID of a subnet to use (optional)>
custom_scripts:
- List
- of
- paths
- to
- custom
- scripts
```
_Please Note: For more information about Azure VM sizes, see [Azure Batch Pricing](https://azure.microsoft.com/en-us/pricing/details/batch/). And for more information about Docker repositories see [Docker](./12-docker-image.html)._
Expand Down
9 changes: 0 additions & 9 deletions tests/integration_tests/spark/sdk/cluster/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def test_create_cluster():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand Down Expand Up @@ -105,7 +104,6 @@ def test_list_clusters():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand All @@ -130,7 +128,6 @@ def test_get_remote_login_settings():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand Down Expand Up @@ -158,7 +155,6 @@ def test_submit():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand Down Expand Up @@ -199,7 +195,6 @@ def test_get_application_log():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand Down Expand Up @@ -260,7 +255,6 @@ def test_get_application_status_complete():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand Down Expand Up @@ -304,7 +298,6 @@ def test_delete_cluster():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand All @@ -330,7 +323,6 @@ def test_spark_processes_up():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand All @@ -357,7 +349,6 @@ def test_debug_tool():
size_low_priority=0,
vm_size="standard_f2",
subnet_id=None,
custom_scripts=None,
file_shares=None,
toolkit=aztk.spark.models.SparkToolkit(version="2.3.0"),
spark_configuration=None)
Expand Down
Loading

0 comments on commit 442228a

Please sign in to comment.