Skip to content
This repository has been archived by the owner on Feb 3, 2021. It is now read-only.

Commit

Permalink
Feature: Added custom scripts functionality for plugins with the cli(…
Browse files Browse the repository at this point in the history
…Deprecate custom scripts) (#517)
  • Loading branch information
timotheeguerin authored Apr 27, 2018
1 parent 07ac9b7 commit c98df7d
Show file tree
Hide file tree
Showing 16 changed files with 284 additions and 68 deletions.
11 changes: 8 additions & 3 deletions aztk/internal/configuration_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,17 @@ def from_dict(cls, args: dict):
The dict is cleaned from null values and passed expanded to the constructor
"""
try:
clean = dict((k, v) for k, v in args.items() if v)
return cls(**clean)
except TypeError as e:
return cls._from_dict(args)
except (ValueError, TypeError) as e:
pretty_args = yaml.dump(args, default_flow_style=False)
raise AztkError("{0} {1}\n{2}".format(cls.__name__, str(e), pretty_args))


@classmethod
def _from_dict(cls, args: dict):
clean = dict((k, v) for k, v in args.items() if v)
return cls(**clean)

def validate(self):
raise NotImplementedError("Validate not implemented")

Expand Down
3 changes: 3 additions & 0 deletions aztk/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,9 @@ def validate(self) -> bool:
"You must configure a VNET to use AZTK in mixed mode (dedicated and low priority nodes). Set the VNET's subnet_id in your cluster.yaml."
)

if self.custom_scripts:
logging.warning("Custom scripts are DEPRECATED and will be removed in 0.8.0. Use plugins instead See https://aztk.readthedocs.io/en/latest/15-plugins.html")


class RemoteLogin:
def __init__(self, ip_address, port):
Expand Down
58 changes: 54 additions & 4 deletions aztk/models/plugins/internal/plugin_reference.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,71 @@
from aztk.error import InvalidPluginConfigurationError, InvalidModelError
import os

from aztk.error import InvalidModelError
from aztk.internal import ConfigurationBase
from aztk.models import PluginConfiguration
from aztk.models.plugins import PluginFile, PluginTarget, PluginTargetRole

from .plugin_manager import plugin_manager


class PluginReference(ConfigurationBase):
"""
Contains the configuration to use a plugin
Args:
name (str): Name of the plugin(Must be the name of one of the provided plugins if no script provided)
script (str): Path to a custom script to run as the plugin
target_role (PluginTarget): Target for the plugin. Default to SparkContainer.
This can only be used if providing a script
target_role (PluginTargetRole): Target role default to All nodes. This can only be used if providing a script
args: (dict): If using name this is the arguments to pass to the plugin
"""
def __init__(self, name, args: dict = None):
def __init__(self,
name: str = None,
script: str = None,
target: PluginTarget = None,
target_role: PluginTargetRole = None,
args: dict = None):
super().__init__()
self.name = name
self.script = script
self.target = target
self.target_role = target_role
self.args = args or dict()

@classmethod
def _from_dict(cls, args: dict):
if "target" in args:
args["target"] = PluginTarget(args["target"])
if "target_role" in args:
args["target_role"] = PluginTargetRole(args["target_role"])

return super()._from_dict(args)

def get_plugin(self) -> PluginConfiguration:
self.validate()

if self.script:
return self._plugin_from_script()

return plugin_manager.get_plugin(self.name, self.args)

def validate(self) -> bool:
if not self.name:
raise InvalidModelError("Plugin is missing a name")
if not self.name and not self.script:
raise InvalidModelError("Plugin must either specify a name of an existing plugin or the path to a script.")

if self.script and not os.path.isfile(self.script):
raise InvalidModelError("Plugin script file doesn't exists: '{0}'".format(self.script))

def _plugin_from_script(self):
script_filename = os.path.basename(self.script)
name = self.name or os.path.splitext(script_filename)[0]
return PluginConfiguration(
name=name,
execute=script_filename,
target=self.target,
target_role=self.target_role or PluginConfiguration,
files=[
PluginFile(script_filename, self.script),
],
)
19 changes: 9 additions & 10 deletions aztk/models/plugins/plugin_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ class PluginTarget(Enum):
"""
Where this plugin should run
"""
SparkContainer = "spark-container",
Host = "host",
SparkContainer = "spark-container"
Host = "host"


class PluginTargetRole(Enum):
Expand All @@ -18,7 +18,6 @@ class PluginTargetRole(Enum):
All = "all-nodes"



class PluginPort:
"""
Definition for a port that should be opened on node
Expand Down Expand Up @@ -54,17 +53,17 @@ class PluginConfiguration(ConfigurationBase):

def __init__(self,
name: str,
ports: List[PluginPort]=None,
files: List[PluginFile]=None,
execute: str=None,
ports: List[PluginPort] = None,
files: List[PluginFile] = None,
execute: str = None,
args=None,
env=None,
target_role: PluginTargetRole=PluginTargetRole.Master,
target: PluginTarget=PluginTarget.SparkContainer):
target_role: PluginTargetRole = None,
target: PluginTarget = None):
self.name = name
# self.docker_image = docker_image
self.target = target
self.target_role = target_role
self.target = target or PluginTarget.SparkContainer
self.target_role = target_role or PluginTargetRole.Master
self.ports = ports or []
self.files = files or []
self.args = args or []
Expand Down
1 change: 0 additions & 1 deletion aztk/spark/models/plugins/jupyter/configuration.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import os
from aztk.models.plugins.plugin_configuration import PluginConfiguration, PluginPort, PluginTargetRole
from aztk.models.plugins.plugin_file import PluginFile
from aztk.utils import constants

dir_path = os.path.dirname(os.path.realpath(__file__))

Expand Down
5 changes: 2 additions & 3 deletions aztk_cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
DockerConfiguration,
ClusterConfiguration,
UserConfiguration,
PluginConfiguration,
)
from aztk.models.plugins.internal import PluginReference

Expand Down Expand Up @@ -127,7 +126,7 @@ def read_cluster_config(
Reads the config file in the .aztk/ directory (.aztk/cluster.yaml)
"""
if not os.path.isfile(path):
return
return None

with open(path, 'r', encoding='UTF-8') as stream:
try:
Expand All @@ -137,7 +136,7 @@ def read_cluster_config(
"Error in cluster.yaml: {0}".format(err))

if config_dict is None:
return
return None

return cluster_config_from_dict(config_dict)

Expand Down
1 change: 0 additions & 1 deletion aztk_cli/spark/endpoints/cluster/cluster_create.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
import os
import typing

import aztk.spark
Expand Down
11 changes: 7 additions & 4 deletions docs/11-custom-scripts.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
# Custom scripts

**Custom scripts are _DEPRECATED_. Use [plugins](15-plugins.html) instead.**

Custom scripts allow for additional cluster setup steps when the cluster is being provisioned. This is useful
if you want to install additional software, and if you need to modify the default cluster configuration for things such as modifying spark.conf, adding jars or downloading any files you need in the cluster.

Expand All @@ -18,7 +21,7 @@ custom_scripts:
The first script, simple.sh, will run on all nodes and will be executed first. The next script, master-only.sh will run only on nodes that are Spark masters and after simple.sh. The next script, worker-only.sh, will run last and only on nodes that are Spark workers.
Directories may also be provided in the custom_scripts section of `.aztk/cluster.yaml`.
Directories may also be provided in the custom_scripts section of `.aztk/cluster.yaml`.

```yaml
custom_scripts:
Expand Down Expand Up @@ -50,11 +53,11 @@ A custom-script to install HDFS (2.8.2) is provided at `custom-scripts/hdfs.sh`
To enable HDFS, add this snippet to the custom_scripts section of your `.aztk/cluster.yaml` configuration file:

```yaml
custom_scripts:
custom_scripts:
- script: ./custom-scripts/hdfs.sh
runOn: all-nodes
```

When SSHing into the cluster, you will have access to the Namenode UI at the default port 50070. This port can be changed in the ssh.yaml file in your `.aztk/` directory, or by passing the `--namenodeui` flag to the `aztk spark cluster ssh` command.
When SSHing into the cluster, you will have access to the Namenode UI at the default port 50070. This port can be changed in the ssh.yaml file in your `.aztk/` directory, or by passing the `--namenodeui` flag to the `aztk spark cluster ssh` command.

When enabled on the cluster, HDFS can be used to read or write data locally during program execution.
When enabled on the cluster, HDFS can be used to read or write data locally during program execution.
30 changes: 29 additions & 1 deletion docs/15-plugins.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Plugins

Plugins are a successor to [custom scripts](11-custom-scripts.html) and are the reconmmended way of running custom code on the cluster.

Plugins can either be one of the Aztk [supported plugins](#supported-plugins) or the path to a [local file](#custom-script-plugin).

## Supported Plugins
AZTK ships with a library of default plugins that enable auxillary services to use with your Spark cluster.

Expand All @@ -22,7 +26,8 @@ plugins:
- name: hdfs
- name: spark_ui_proxy
- name: rsutio_server
version: "1.1.383"
args:
version: "1.1.383"
```
### Enable a plugin using the SDK
Expand All @@ -38,3 +43,26 @@ cluster_config = ClusterConfiguration(
]
)
```


## Custom script plugin

This allows you to run your custom code on the cluster
### Run a custom script plugin with the CLI

#### Example
```yaml
plugins:
- script: path/to/my/script.sh
- name: friendly-name
script: path/to/my-other/script.sh
target: host
target_role: all-nodes
```

#### Options

* `script`: **Required** Path to the script you want to run
* `name`: **Optional** Friendly name. By default will be the name of the script file
* `target`: **Optional** Target on where to run the plugin(Default: `spark-container`). Can be `spark-container` or `host`
* `target_role`: **Optional** What should be the role of the node where this script run(Default: `master`). Can be `master`, `worker` or `all-nodes`
62 changes: 40 additions & 22 deletions docs/51-define-plugin.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,34 +37,52 @@ cluster_config = ClusterConfiguration(
## Parameters

### `PluginConfiguration`
| Name | Required? | Type | Description |
|--------------|-----------|---------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `name` | required | string | Name of your plugin(This will be used for creating folder, it is recommended to have a simple letter, dash, underscore only name) |
| `files` | required | List[PluginFile|PluginTextFile] | List of files to upload |
| `execute` | required | str | Script to execute. This script must be defined in the files above and must match its remote path |
| `args` | optional | List[str] | List of arguments to be passed to your execute scripts |
| `env` | optional | dict | List of environment variables to access in the script(This can be used to pass arguments to your script instead of args) |
| `ports` | optional | List[PluginPort] | List of ports to open if the script is running in a container. A port can also be specific public and it will then be accessible when ssh into the master node. |
| `target` | optional | PluginTarget | Define where the execute script should be running. Potential values are `PluginTarget.SparkContainer(Default)` and `PluginTarget.Host` |
| `taget_role` | optional | PluginTargetRole | If the plugin should be run only on the master worker or all. You can use environment variables(See below to have different master/worker config) | |

#### name `required` | `string`
Name of your plugin(This will be used for creating folder, it is recommended to have a simple letter, dash, underscore only name)

#### files `required` | `List[PluginFile|PluginTextFile]`
List of files to upload

#### execute `required` | `str`
Script to execute. This script must be defined in the files above and must match its remote path

#### args `optional` | List[str]
List of arguments to be passed to your execute scripts

#### env `optional` | dict
List of environment variables to access in the script(This can be used to pass arguments to your script instead of args)

#### ports `optional` | `List[PluginPort]`
List of ports to open if the script is running in a container. A port can also be specific public and it will then be accessible when ssh into the master node.

#### target | `optional` | `PluginTarget`
Define where the execute script should be running. Potential values are `PluginTarget.SparkContainer(Default)` and `PluginTarget.Host`

#### `taget_role` | `optional` | `PluginTargetRole`
If the plugin should be run only on the master worker or all. You can use environment variables(See below to have different master/worker config)

### `PluginFile`
| Name | Required? | Type | Description |
|--------------|-----------|------|------------------------------------------------------------------------------|
| `target` | required | str | Where the file should be dropped relative to the plugin working directory |
| `local_path` | required | str | Path to the local file you want to upload(Could form the plugins parameters) |

#### `target` `required` | `str`
Where the file should be dropped relative to the plugin working directory

#### `local_path` | `required` | `str`
Path to the local file you want to upload(Could form the plugins parameters)

### `TextPluginFile`
| Name | Required? | Type | Description |
|-----------|-----------|-------------------|------------------------------------------------------------------------------|
| `target` | required | str | Where the file should be dropped relative to the plugin working directory |
| `content` | required | str | io.StringIO | Path to the local file you want to upload(Could form the plugins parameters) |

#### target | `required` | `str`
Where the file should be dropped relative to the plugin working directory

#### content | `required` | `str` | `io.StringIO`
Path to the local file you want to upload(Could form the plugins parameters)

### `PluginPort`
| Name | Required? | Type | Description |
|------------|-----------|------|-------------------------------------------------------|
| `internal` | required | int | Internal port to open on the docker container |
| `public` | optional | bool | If the port should be open publicly(Default: `False`) |
#### internal | `required` | `int`
Internal port to open on the docker container
#### public | `optional` | `bool`
If the port should be open publicly(Default: `False`)

## Environment variables availables in the plugin

Expand Down
Loading

0 comments on commit c98df7d

Please sign in to comment.