In [0]:
import os
import json
import dataiku
import os
import logging
import time
import re
from dataikuapi.utils import DataikuException
from concurrent.futures import ThreadPoolExecutor

In [0]:
folder_id = 'sH2qSC25'

# File containing the list of plugin IDs
plugins_to_install_file_path = 'dss_server_exports_honker-design-2.amer.dataiku-sandbox.io-2025-06-30_list_plugins_20250701_153326.txt'

In [0]:
client = dataiku.api_client()

In [0]:
def delete_code_environments_with_pattern():
    # Get a list of all code environments (focusing on Python environments)
    code_envs = client.list_code_envs()
    
    # Regular expression to match names ending with _# where # is a single digit
    pattern = re.compile(r'_[0-9]$')
    
    print(f"Found {len(code_envs)} code environments in total")
    deleted_count = 0
    
    # Iterate through each code environment
    for env in code_envs:
        env_id = env.get('envName')
        env_type = env.get('envLang', '').upper()  # PYTHON, R, etc.
        env_handle = client.get_code_env(env_type, env_id)
        
        # Check if the environment name matches our pattern
        if pattern.search(env_id):
            # print(f"Found matching environment: {env_id} (Type: {env_type})")
            try:
                # Delete the environment
                # Note: In the API, we need to specify the language (PYTHON, R, etc.)
                # client.delete_code_env(env_type, env_id)
                env_handle.delete()
                print(f"Successfully deleted environment: {env_id}")
                deleted_count += 1
            except Exception as e:
                print(f"Error deleting environment {env_id}: {str(e)}")
        #else:
        #    print(f"Skipping environment: {env_id} (does not match pattern)")
    
    print(f"Deletion complete. Removed {deleted_count} environments.")

print("Starting code environment cleanup...")
delete_code_environments_with_pattern()
print("Cleanup process completed.")

In [0]:
def read_file_from_managed_folder(folder_id, file_name):
    # Access the managed folder using its ID
    folder = dataiku.Folder(folder_id)
    
    # Download and read the file contents
    with folder.get_download_stream(file_name) as stream:
        file_content = stream.read().decode('utf-8')

    # Process each line as a separate entry in a list
    entries = file_content.splitlines()
    return entries

def read_plugin_ids(file_path):
    with open(file_path, 'r') as f:
        plugin_ids = [line.strip() for line in f if line.strip()]
    return plugin_ids

def is_plugin_installed(client, plugin_id):
    installed_plugins = client.list_plugins()
    return any(plugin['id'] == plugin_id for plugin in installed_plugins)

def install_plugin(client, plugin_id):
    print(f"Attempting to install plugin: {plugin_id}")
    try:
        client.install_plugin_from_store(plugin_id)
    except Exception as e:
        print(f"Error installing plugin {plugin_id}: {e}")

# Read plugin IDs from a plaintext file
plugin_ids = read_file_from_managed_folder(folder_id, plugins_to_install_file_path)

# Check and install plugins if needed
for plugin_id in plugin_ids:
    if not is_plugin_installed(client, plugin_id):
        # print(f"Installing plugin {plugin_id}...")
        install_plugin(client, plugin_id)
        success = is_plugin_installed(client, plugin_id)
        if not success:
            print(f"Attempt to install plugin {plugin_id} FAILED!")
    #else:
    #    print(f"Plugin {plugin_id} is already installed.")

In [0]:
def _process_code_env(code_env_info):
    try:
        envName = code_env_info['envName']
        code_env = client.get_code_env(code_env_info['envLang'], envName)

        # rebuild it from scratch
        print(f'Starting rebuilding {envName} ...')
        env_path = os.path.join('/data/dataiku/dss_data/code-envs/python', envName)

        res = code_env.update_packages(force_rebuild_env=True)

        if not res['messages']['success']:
            print(f"FAILED: {envName}")
            failed_builds.append(envName)
            print(res)

    except Exception as e:
        print(f"Exception in {envName}: {e}")
        pass

In [0]:
code_envs = client.list_code_envs()
failed_builds = []

max_workers = os.cpu_count() or 1
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    executor.map(_process_code_env, code_envs)

if len(failed_builds) > 0:
    print(f'Environments that failed to build: {failed_builds}')

print('Finished rebuilding all existing code environments from scratch')

In [0]:
for p in client.list_plugins():
    try:
        plugin_name = p.get('id',None)
        plugin = client.get_plugin(plugin_name)

        # Start creating the code env, and wait for it to be done
        future = plugin.create_code_env()
        print(f"Starting to build code env for plugin {plugin_name}...")
        result = future.wait_for_result()
        print(f"Finished code env for plugin {plugin_name}.")
    except DataikuException as e:
        print(f"Plugin {plugin_name} does not require a code environment. Skipping.")
    
print("Finished building code environments for all plugins.")

In [0]:

for p in client.list_plugins():
    try:
        plugin_name = p.get('id', None)
        plugin = client.get_plugin(plugin_name)
        
        # Check if this plugin requires a code environment
        plugin_details = plugin.get_settings().get_raw()
        # print(json.dumps(plugin_details, indent=4))

        # Not all plugins have code environments specified in their metadata
        if 'codeEnvName' in plugin_details: # and 'codeEnv' in plugin_details['meta']:
            required_env_name = plugin_details.get('codeEnvName',None)

            # First, get a list of all existing code environments
            existing_code_envs = client.list_code_envs()
            # Create a set of environment names for faster lookups
            existing_env_names = set(env.get('envName', '') for env in existing_code_envs)
            
            # Check if the code environment already exists
            if required_env_name in existing_env_names:
                print(f"Code environment '{required_env_name}' for plugin {plugin_name} already exists. Skipping.")
                continue

            # Start creating the code env, and wait for it to be done
            future = plugin.create_code_env()
            print(f"Starting to build code env '{required_env_name}' for plugin {plugin_name}...")
            result = future.wait_for_result()
            print(f"Finished code env for plugin {plugin_name}.")
#         else:
#             print(f"Plugin {plugin_name} does not specify a code environment. Skipping.")
    except DataikuException as e:
        print(f"Error with plugin {plugin_name}: {str(e)}")
    
#print("Finished building code environments for all plugins.")

In [0]:
def _process_plugin_code_env(plugin):
    try:
        plugin_name = plugin['id']
        plugin_handle = client.get_plugin(plugin_name)
         # Start creating the code env, and wait for it to be done
        future = plugin_handle.create_code_env()
        print(f"Starting to build code env for plugin {plugin_name}...")
        result = plugin_handle.wait_for_result()
        print(f"Finished code env for plugin {plugin_name}.")
    except DataikuException as e:
        print(f"Plugin {plugin_name} does not require a code environment. Skipping.")

In [0]:
plugins_list = client.list_plugins()
failed_builds = []

max_workers = os.cpu_count() or 1
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    executor.map(_process_plugin_code_env, plugins_list)

if len(failed_builds) > 0:
    print(f'Environments that failed to build: {failed_builds}')

print("Finished building code environments for all plugins.")

In [0]:
# Delete unused plugins.

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%Y-%m-%d %H:%M:%S'
)
logger = logging.getLogger()

def get_all_installed_plugins(client):
    """Retrieve a list of all installed plugins"""
    plugins = client.list_plugins()
    logger.info(f"Found {len(plugins)} installed plugins")
    return plugins

def check_plugin_usage(client, plugin_id):
    """Check if a plugin is used anywhere in the DSS instance"""
    try:
        usages = client.get_plugin(plugin_id).list_usages().usages
        usage_count = len(usages)
        logger.info(f"Plugin {plugin_id} has {usage_count} usages")
        return usages
    except Exception as e:
        logger.error(f"Error checking usages for plugin {plugin_id}: {str(e)}")
        return []  # Return empty list if there's an error

def delete_unused_plugin(client, plugin_id):
    """Delete a plugin from the DSS instance"""
    try:
        client.get_plugin(plugin_id).delete()
        logger.info(f"Successfully deleted plugin {plugin_id}")
        return True
    except Exception as e:
        logger.error(f"Failed to delete plugin {plugin_id}: {str(e)}")
        return False


try:
    # Get all installed plugins
    plugins = get_all_installed_plugins(client)

    # Track statistics
    checked_count = 0
    deleted_count = 0
    error_count = 0

    # Process each plugin
    for plugin in plugins:
        plugin_id = plugin.get('id')
        checked_count += 1

        # Skip if no plugin ID (shouldn't happen but just in case)
        if not plugin_id:
            logger.warning("Found plugin without ID, skipping")
            continue

        logger.info(f"Checking plugin: {plugin_id}")

        # Check if the plugin is used anywhere
        usages = check_plugin_usage(client, plugin_id)

        # If the plugin isn't used anywhere, delete it
        if len(usages) == 0:
            logger.info(f"Plugin {plugin_id} has no usages, deleting...")
            success = delete_unused_plugin(client, plugin_id)
            if success:
                deleted_count += 1
            else:
                error_count += 1
        else:
            logger.info(f"Plugin {plugin_id} is in use, keeping")

#         # Small delay to avoid overwhelming the API
#         time.sleep(0.5)

    # Log summary
    logger.info(f"Plugin cleanup complete. Checked: {checked_count}, Deleted: {deleted_count}, Errors: {error_count}")

except Exception as e:
    logger.error(f"An error occurred during execution: {str(e)}")
