diff --git a/.github/workflows/pythonbuild.yml b/.github/workflows/pythonbuild.yml index dd099fd667..9f7f341546 100644 --- a/.github/workflows/pythonbuild.yml +++ b/.github/workflows/pythonbuild.yml @@ -131,6 +131,7 @@ jobs: - flytekit-spark - flytekit-sqlalchemy - flytekit-vaex + - flytekit-vscode - flytekit-whylogs exclude: # flytekit-modin depends on ray which does not have a 3.11 wheel yet. diff --git a/Dockerfile.dev b/Dockerfile.dev index f3dfd5280a..d7d9b46308 100644 --- a/Dockerfile.dev +++ b/Dockerfile.dev @@ -23,6 +23,7 @@ COPY . /flytekit RUN pip install -e /flytekit RUN pip install -e /flytekit/plugins/flytekit-k8s-pod RUN pip install -e /flytekit/plugins/flytekit-deck-standard +RUN pip install -e /flytekit/plugins/flytekit-vscode RUN pip install scikit-learn ENV PYTHONPATH "/flytekit:/flytekit/plugins/flytekit-k8s-pod:/flytekit/plugins/flytekit-deck-standard:" diff --git a/plugins/flytekit-vscode/README.md b/plugins/flytekit-vscode/README.md new file mode 100644 index 0000000000..3297828f62 --- /dev/null +++ b/plugins/flytekit-vscode/README.md @@ -0,0 +1,41 @@ +# Flytekit VSCode Plugin + +The Flytekit VSCode plugin offers an easy solution for users to run Python tasks within an interactive VSCode server, compatible with any image. `@vscode` is a decorator which users can put within @task and user function. With `@vscode`, the task will install vscode dependencies (skip if they already exist) and run a vscode server instead of the user defined functions. + +To install the plugin, run the following command: + +```bash +pip install flytekitplugins-vscode +``` + +## Task Example +```python +from flytekit import task +from flytekitplugins.vscode import vscode + +@task +@vscode +def train(): + ... +``` + +## User Guide +1. Build the image with Dockerfile.dev `docker build --push . -f Dockerfile.dev -t localhost:30000/flytekit:dev --build-arg PYTHON_VERSION=3.8` +2. Run the decorated task on the remote. For example: `pyflyte run --remote --image localhost:30000/flytekit:dev [PYTHONFILE] [WORKFLOW|TASK] [ARGS]...` +3. Once the code server is prepared, you can forward a local port to the pod. For example: `kubectl port-forward -n [NAMESPACE] [PODNAME] 8080:8080`. +4. You can access the server by opening a web browser and navigating to `localhost:8080`. + +VSCode example screenshot: + + +## Build Custom Image with VSCode Plugin +If users want to skip the vscode downloading process at runtime, they have the option to create a custom image with vscode by including the following lines in their Dockerfile. +```Dockerfile +# Include this line if the image does not already have 'curl' installed. ++ RUN apt-get -y install curl +# Download and extract the binary, and ensure it's added to the system's $PATH. ++ RUN mkdir /tmp/code-server ++ RUN curl -kfL -o /tmp/code-server/code-server-4.18.0-linux-amd64.tar.gz https://github.com/coder/code-server/releases/download/v4.18.0/code-server-4.18.0-linux-amd64.tar.gz ++ RUN tar -xzf /tmp/code-server/code-server-4.18.0-linux-amd64.tar.gz -C /tmp/code-server/ ++ ENV PATH="/tmp/code-server/code-server-4.18.0-linux-amd64/bin:${PATH}" +``` diff --git a/plugins/flytekit-vscode/docs/example.png b/plugins/flytekit-vscode/docs/example.png new file mode 100644 index 0000000000..0f98ab3773 Binary files /dev/null and b/plugins/flytekit-vscode/docs/example.png differ diff --git a/plugins/flytekit-vscode/flytekitplugins/vscode/__init__.py b/plugins/flytekit-vscode/flytekitplugins/vscode/__init__.py new file mode 100644 index 0000000000..ab5643676b --- /dev/null +++ b/plugins/flytekit-vscode/flytekitplugins/vscode/__init__.py @@ -0,0 +1,13 @@ +""" +.. currentmodule:: flytekitplugins.vscode + +This package contains things that are useful when extending Flytekit. + +.. autosummary:: + :template: custom.rst + :toctree: generated/ + + vscode +""" + +from .decorator import vscode diff --git a/plugins/flytekit-vscode/flytekitplugins/vscode/constants.py b/plugins/flytekit-vscode/flytekitplugins/vscode/constants.py new file mode 100644 index 0000000000..7eb7c0cad8 --- /dev/null +++ b/plugins/flytekit-vscode/flytekitplugins/vscode/constants.py @@ -0,0 +1,9 @@ +# Where the code-server tar and plugins are downloaded to +EXECUTABLE_NAME = "code-server" +DOWNLOAD_DIR = "/tmp/code-server" +HOURS_TO_SECONDS = 60 * 60 +DEFAULT_UP_SECONDS = 10 * HOURS_TO_SECONDS # 10 hours +DEFAULT_CODE_SERVER_REMOTE_PATH = ( + "https://github.com/coder/code-server/releases/download/v4.18.0/code-server-4.18.0-linux-amd64.tar.gz" +) +DEFAULT_CODE_SERVER_DIR_NAME = "code-server-4.18.0-linux-amd64" diff --git a/plugins/flytekit-vscode/flytekitplugins/vscode/decorator.py b/plugins/flytekit-vscode/flytekitplugins/vscode/decorator.py new file mode 100644 index 0000000000..005bd6445d --- /dev/null +++ b/plugins/flytekit-vscode/flytekitplugins/vscode/decorator.py @@ -0,0 +1,179 @@ +import multiprocessing +import os +import shutil +import subprocess +import sys +import tarfile +import time +from functools import wraps +from typing import Callable, Optional + +import fsspec + +from flytekit.loggers import logger + +from .constants import ( + DEFAULT_CODE_SERVER_DIR_NAME, + DEFAULT_CODE_SERVER_REMOTE_PATH, + DEFAULT_UP_SECONDS, + DOWNLOAD_DIR, + EXECUTABLE_NAME, +) + + +def execute_command(cmd): + """ + Execute a command in the shell. + """ + + process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + logger.info(f"cmd: {cmd}") + stdout, stderr = process.communicate() + if process.returncode != 0: + raise RuntimeError(f"Command {cmd} failed with error: {stderr}") + logger.info(f"stdout: {stdout}") + logger.info(f"stderr: {stderr}") + + +def download_file(url, target_dir="."): + """ + Download a file from a given URL using fsspec. + + Args: + url (str): The URL of the file to download. + target_dir (str, optional): The directory where the file should be saved. Defaults to current directory. + + Returns: + str: The path to the downloaded file. + """ + + if not url.startswith("http"): + raise ValueError(f"URL {url} is not valid. Only http/https is supported.") + + # Derive the local filename from the URL + local_file_name = os.path.join(target_dir, os.path.basename(url)) + + fs = fsspec.filesystem("http") + + # Use fsspec to get the remote file and save it locally + logger.info(f"Downloading {url}... to {os.path.abspath(local_file_name)}") + fs.get(url, local_file_name) + logger.info("File downloaded successfully!") + + return local_file_name + + +def download_vscode( + code_server_remote_path: str, + code_server_dir_name: str, +): + """ + Download vscode server and plugins from remote to local and add the directory of binary executable to $PATH. + + Args: + code_server_remote_path (str): The URL of the code-server tarball. + code_server_dir_name (str): The name of the code-server directory. + """ + + # If the code server already exists in the container, skip downloading + executable_path = shutil.which(EXECUTABLE_NAME) + if executable_path is not None: + logger.info(f"Code server binary already exists at {executable_path}") + logger.info("Skipping downloading code server...") + return + + logger.info("Code server is not in $PATH, start downloading code server...") + + # Create DOWNLOAD_DIR if not exist + logger.info(f"DOWNLOAD_DIR: {DOWNLOAD_DIR}") + os.makedirs(DOWNLOAD_DIR, exist_ok=True) + + logger.info(f"Start downloading files to {DOWNLOAD_DIR}") + + # Download remote file to local + code_server_tar_path = download_file(code_server_remote_path, DOWNLOAD_DIR) + + # Extract the tarball + with tarfile.open(code_server_tar_path, "r:gz") as tar: + tar.extractall(path=DOWNLOAD_DIR) + + code_server_dir_path = os.path.join(DOWNLOAD_DIR, code_server_dir_name) + + code_server_bin_dir = os.path.join(code_server_dir_path, "bin") + + # Add the directory of code-server binary to $PATH + os.environ["PATH"] = code_server_bin_dir + os.pathsep + os.environ["PATH"] + + +def vscode( + _task_function: Optional[Callable] = None, + server_up_seconds: Optional[int] = DEFAULT_UP_SECONDS, + port: Optional[int] = 8080, + enable: Optional[bool] = True, + code_server_remote_path: Optional[str] = DEFAULT_CODE_SERVER_REMOTE_PATH, + # The untarred directory name may be different from the tarball name + code_server_dir_name: Optional[str] = DEFAULT_CODE_SERVER_DIR_NAME, + pre_execute: Optional[Callable] = None, + post_execute: Optional[Callable] = None, +): + """ + vscode decorator modifies a container to run a VSCode server: + 1. Overrides the user function with a VSCode setup function. + 2. Download vscode server and plugins from remote to local. + 3. Launches and monitors the VSCode server. + 4. Terminates after server_up_seconds seconds. + + Args: + _task_function (function, optional): The user function to be decorated. Defaults to None. + port (int, optional): The port to be used by the VSCode server. Defaults to 8080. + enable (bool, optional): Whether to enable the VSCode decorator. Defaults to True. + code_server_remote_path (str, optional): The URL of the code-server tarball. + code_server_dir_name (str, optional): The name of the code-server directory. + pre_execute (function, optional): The function to be executed before the vscode setup function. + post_execute (function, optional): The function to be executed before the vscode is self-terminated. + """ + + def wrapper(fn): + if not enable: + return fn + + @wraps(fn) + def inner_wrapper(*args, **kwargs): + # 0. Executes the pre_execute function if provided. + if pre_execute is not None: + pre_execute() + logger.info("Pre execute function executed successfully!") + + # 1. Downloads the VSCode server from Internet to local. + download_vscode( + code_server_remote_path=code_server_remote_path, + code_server_dir_name=code_server_dir_name, + ) + + # 2. Launches and monitors the VSCode server. + # Run the function in the background + logger.info(f"Start the server for {server_up_seconds} seconds...") + child_process = multiprocessing.Process( + target=execute_command, kwargs={"cmd": f"code-server --bind-addr 0.0.0.0:{port} --auth none"} + ) + + child_process.start() + time.sleep(server_up_seconds) + + # 3. Terminates the server after server_up_seconds + logger.info(f"{server_up_seconds} seconds passed. Terminating...") + if post_execute is not None: + post_execute() + logger.info("Post execute function executed successfully!") + child_process.terminate() + child_process.join() + sys.exit(0) + + return inner_wrapper + + # for the case when the decorator is used without arguments + if _task_function is not None: + return wrapper(_task_function) + # for the case when the decorator is used with arguments + else: + return wrapper diff --git a/plugins/flytekit-vscode/requirements.in b/plugins/flytekit-vscode/requirements.in new file mode 100644 index 0000000000..bcbde20949 --- /dev/null +++ b/plugins/flytekit-vscode/requirements.in @@ -0,0 +1,2 @@ +. +-e file:.#egg=flytekitplugins-vscode diff --git a/plugins/flytekit-vscode/requirements.txt b/plugins/flytekit-vscode/requirements.txt new file mode 100644 index 0000000000..f2ddb8cb65 --- /dev/null +++ b/plugins/flytekit-vscode/requirements.txt @@ -0,0 +1,364 @@ +# +# This file is autogenerated by pip-compile with Python 3.8 +# by the following command: +# +# pip-compile requirements.in +# +-e file:.#egg=flytekitplugins-vscode + # via -r requirements.in +adlfs==2023.10.0 + # via flytekit +aiobotocore==2.5.4 + # via s3fs +aiohttp==3.8.6 + # via + # adlfs + # aiobotocore + # gcsfs + # s3fs +aioitertools==0.11.0 + # via aiobotocore +aiosignal==1.3.1 + # via aiohttp +arrow==1.3.0 + # via cookiecutter +async-timeout==4.0.3 + # via aiohttp +attrs==23.1.0 + # via aiohttp +azure-core==1.29.5 + # via + # adlfs + # azure-identity + # azure-storage-blob +azure-datalake-store==0.0.53 + # via adlfs +azure-identity==1.15.0 + # via adlfs +azure-storage-blob==12.18.3 + # via adlfs +binaryornot==0.4.4 + # via cookiecutter +botocore==1.31.17 + # via aiobotocore +cachetools==5.3.2 + # via google-auth +certifi==2023.7.22 + # via + # kubernetes + # requests +cffi==1.16.0 + # via + # azure-datalake-store + # cryptography +chardet==5.2.0 + # via binaryornot +charset-normalizer==3.3.2 + # via + # aiohttp + # requests +click==8.1.7 + # via + # cookiecutter + # flytekit + # rich-click +cloudpickle==3.0.0 + # via flytekit +cookiecutter==2.4.0 + # via flytekit +croniter==2.0.1 + # via flytekit +cryptography==41.0.5 + # via + # azure-identity + # azure-storage-blob + # msal + # pyjwt + # pyopenssl + # secretstorage +dataclasses-json==0.5.9 + # via flytekit +decorator==5.1.1 + # via gcsfs +deprecated==1.2.14 + # via flytekit +diskcache==5.6.3 + # via flytekit +docker==6.1.3 + # via flytekit +docker-image-py==0.1.12 + # via flytekit +docstring-parser==0.15 + # via flytekit +flyteidl==1.10.0 + # via flytekit +flytekit==1.10.1b0 + # via flytekitplugins-vscode +frozenlist==1.4.0 + # via + # aiohttp + # aiosignal +fsspec==2023.9.2 + # via + # adlfs + # flytekit + # gcsfs + # s3fs +gcsfs==2023.9.2 + # via flytekit +gitdb==4.0.11 + # via gitpython +gitpython==3.1.40 + # via flytekit +google-api-core==2.12.0 + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.23.4 + # via + # gcsfs + # google-api-core + # google-auth-oauthlib + # google-cloud-core + # google-cloud-storage + # kubernetes +google-auth-oauthlib==1.1.0 + # via gcsfs +google-cloud-core==2.3.3 + # via google-cloud-storage +google-cloud-storage==2.13.0 + # via gcsfs +google-crc32c==1.5.0 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.6.0 + # via google-cloud-storage +googleapis-common-protos==1.61.0 + # via + # flyteidl + # flytekit + # google-api-core + # grpcio-status +grpcio==1.59.2 + # via + # flytekit + # grpcio-status +grpcio-status==1.59.2 + # via flytekit +idna==3.4 + # via + # requests + # yarl +importlib-metadata==6.8.0 + # via + # flytekit + # keyring +importlib-resources==6.1.1 + # via keyring +isodate==0.6.1 + # via azure-storage-blob +jaraco-classes==3.3.0 + # via keyring +jeepney==0.8.0 + # via + # keyring + # secretstorage +jinja2==3.1.2 + # via cookiecutter +jmespath==1.0.1 + # via botocore +joblib==1.3.2 + # via flytekit +jsonpickle==3.0.2 + # via flytekit +keyring==24.2.0 + # via flytekit +kubernetes==28.1.0 + # via flytekit +markdown-it-py==3.0.0 + # via rich +markupsafe==2.1.3 + # via jinja2 +marshmallow==3.20.1 + # via + # dataclasses-json + # marshmallow-enum + # marshmallow-jsonschema +marshmallow-enum==1.5.1 + # via + # dataclasses-json + # flytekit +marshmallow-jsonschema==0.13.0 + # via flytekit +mashumaro==3.10 + # via flytekit +mdurl==0.1.2 + # via markdown-it-py +more-itertools==10.1.0 + # via jaraco-classes +msal==1.25.0 + # via + # azure-datalake-store + # azure-identity + # msal-extensions +msal-extensions==1.0.0 + # via azure-identity +multidict==6.0.4 + # via + # aiohttp + # yarl +mypy-extensions==1.0.0 + # via typing-inspect +natsort==8.4.0 + # via flytekit +numpy==1.24.4 + # via + # flytekit + # pandas + # pyarrow +oauthlib==3.2.2 + # via + # kubernetes + # requests-oauthlib +packaging==23.2 + # via + # docker + # marshmallow +pandas==1.5.3 + # via flytekit +portalocker==2.8.2 + # via msal-extensions +protobuf==4.25.0 + # via + # flyteidl + # google-api-core + # googleapis-common-protos + # grpcio-status + # protoc-gen-swagger +protoc-gen-swagger==0.1.0 + # via flyteidl +pyarrow==10.0.1 + # via flytekit +pyasn1==0.5.0 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.3.0 + # via google-auth +pycparser==2.21 + # via cffi +pygments==2.16.1 + # via rich +pyjwt[crypto]==2.8.0 + # via msal +pyopenssl==23.3.0 + # via flytekit +python-dateutil==2.8.2 + # via + # arrow + # botocore + # croniter + # flytekit + # kubernetes + # pandas +python-json-logger==2.0.7 + # via flytekit +python-slugify==8.0.1 + # via cookiecutter +pytimeparse==1.1.8 + # via flytekit +pytz==2023.3.post1 + # via + # croniter + # flytekit + # pandas +pyyaml==6.0.1 + # via + # cookiecutter + # flytekit + # kubernetes +regex==2023.10.3 + # via docker-image-py +requests==2.31.0 + # via + # azure-core + # azure-datalake-store + # cookiecutter + # docker + # flytekit + # gcsfs + # google-api-core + # google-cloud-storage + # kubernetes + # msal + # requests-oauthlib +requests-oauthlib==1.3.1 + # via + # google-auth-oauthlib + # kubernetes +rich==13.6.0 + # via + # cookiecutter + # flytekit + # rich-click +rich-click==1.7.1 + # via flytekit +rsa==4.9 + # via google-auth +s3fs==2023.9.2 + # via flytekit +secretstorage==3.3.3 + # via keyring +six==1.16.0 + # via + # azure-core + # isodate + # kubernetes + # python-dateutil +smmap==5.0.1 + # via gitdb +sortedcontainers==2.4.0 + # via flytekit +statsd==3.3.0 + # via flytekit +text-unidecode==1.3 + # via python-slugify +types-python-dateutil==2.8.19.14 + # via arrow +typing-extensions==4.8.0 + # via + # aioitertools + # azure-core + # azure-storage-blob + # flytekit + # mashumaro + # rich + # rich-click + # typing-inspect +typing-inspect==0.9.0 + # via dataclasses-json +urllib3==1.26.18 + # via + # botocore + # docker + # flytekit + # kubernetes + # requests +websocket-client==1.6.4 + # via + # docker + # kubernetes +wheel==0.41.3 + # via flytekit +wrapt==1.15.0 + # via + # aiobotocore + # deprecated + # flytekit +yarl==1.9.2 + # via aiohttp +zipp==3.17.0 + # via + # importlib-metadata + # importlib-resources diff --git a/plugins/flytekit-vscode/setup.py b/plugins/flytekit-vscode/setup.py new file mode 100644 index 0000000000..700f489e67 --- /dev/null +++ b/plugins/flytekit-vscode/setup.py @@ -0,0 +1,37 @@ +from setuptools import setup + +PLUGIN_NAME = "vscode" + +microlib_name = f"flytekitplugins-{PLUGIN_NAME}" + +plugin_requires = ["flytekit>=1.1.0b0,<2.0.0"] + +__version__ = "0.0.0+develop" + +setup( + name=microlib_name, + version=__version__, + author="flyteorg", + author_email="admin@flyte.org", + description="This package holds the vscode plugins for flytekit", + namespace_packages=["flytekitplugins"], + packages=[f"flytekitplugins.{PLUGIN_NAME}"], + install_requires=plugin_requires, + license="apache2", + python_requires=">=3.8", + classifiers=[ + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", + "Topic :: Software Development :: Libraries :: Python Modules", + ], + entry_points={"flytekit.plugins": [f"{PLUGIN_NAME}=flytekitplugins.{PLUGIN_NAME}"]}, +) diff --git a/plugins/flytekit-vscode/tests/__init__.py b/plugins/flytekit-vscode/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plugins/flytekit-vscode/tests/test_vscode_plugin.py b/plugins/flytekit-vscode/tests/test_vscode_plugin.py new file mode 100644 index 0000000000..7a6efa33bd --- /dev/null +++ b/plugins/flytekit-vscode/tests/test_vscode_plugin.py @@ -0,0 +1,25 @@ +import mock +from flytekitplugins.vscode import vscode + +from flytekit import task, workflow + + +@mock.patch("sys.exit") +@mock.patch("time.sleep") +@mock.patch("multiprocessing.Process") +@mock.patch("flytekitplugins.vscode.decorator.download_vscode") +def test_vscode_plugin(mock_download_vscode, mock_process, mock_sleep, mock_exit): + @task + @vscode + def t(): + return + + @workflow + def wf(): + t() + + wf() + mock_download_vscode.assert_called_once() + mock_process.assert_called_once() + mock_sleep.assert_called_once() + mock_exit.assert_called_once() diff --git a/plugins/setup.py b/plugins/setup.py index a5d0c07e16..09dd6d5713 100644 --- a/plugins/setup.py +++ b/plugins/setup.py @@ -43,6 +43,7 @@ "flytekitplugins-sqlalchemy": "flytekit-sqlalchemy", "flytekitplugins-vaex": "flytekit-vaex", "flytekitplugins-whylogs": "flytekit-whylogs", + "flytekitplugins-vscode": "flytekit-vscode", }