# Install dependencies if needed

In [0]:
# ! python3.11 -m pip install dataiku-api-client

# Imports

In [0]:
import re
import json
# from concurrent.futures import ThreadPoolExecutor
# import os
# from datetime import datetime
# import copy

from dataikuapi.dssclient import DSSClient
from dataikuapi.utils import DataikuException

# Configuration

In [0]:
DSS_URL = "https://honker-design.amer.dataiku-sandbox.io/"

# TODO: move to secrets or file.
DSS_API_KEY = "REDACTED"  # Replace with your actual API key

PLUGINS_TO_INSTALL = [
    "google-search-tool",
    "nlp-analysis",
    "nlp-preparation",
    "timeseries-preparation",
    "govern",
    "sql-question-answering-tool",
    "graphrag",
    "agent-connect",
    "traces-explorer",
    "api-connect"
]

TOP_LEVEL_FOLDERS_TO_CREATE = ["Administrative", "Demo", "Personal"]

INTERNAL_CODE_ENVS_TO_CREATE = {
    "DEEP_HUB_IMAGE_CLASSIFICATION_CODE_ENV": "INTERNAL_image_classification_v1",
    "DEEP_HUB_IMAGE_OBJECT_DETECTION_CODE_ENV": "INTERNAL_object_detection_v1",
    "PII_DETECTION_CODE_ENV": "INTERNAL_pii_detection_v1",
    "HUGGINGFACE_LOCAL_CODE_ENV": "INTERNAL_huggingface_local_v1",
    "RAG_CODE_ENV": "INTERNAL_retrieval_augmented_generation_v1",
    "DOCUMENT_EXTRACTION_CODE_ENV": "INTERNAL_document_extraction_v1",
}

client = DSSClient(DSS_URL, DSS_API_KEY)

# Function definitions

In [0]:
# https://developer.dataiku.com/latest/api-reference/python/client.html#dataikuapi.DSSClient.install_plugin_from_store

def get_installed_plugin_names(client):
    try:
        installed_plugins = client.list_plugins()
        return set(plugin['id'] for plugin in installed_plugins)
    except DataikuException as e:
        print(f"Error retrieving installed plugins: {str(e)}")
        return set()
    
def get_installed_code_env_names(client):
    try:
        installed_code_envs = client.list_code_envs()
        return set(env['envName'] for env in installed_code_envs)
    except DataikuException as e:
        print(f"Error retrieving installed code environments: {str(e)}")
        return set()

def is_plugin_installed(client, plugin_id):
    #installed_plugins = client.list_plugins()
    #return any(plugin['id'] == plugin_id for plugin in installed_plugins)
    return plugin_id in get_installed_plugin_names(client)

    
def install_plugin(client, plugin_id):
    print(f"Attempting to install plugin: {plugin_id}")
    try:
        client.install_plugin_from_store(plugin_id)
    except Exception as e:
        print(f"Error installing plugin {plugin_id}: {e}")

    
def get_plugin_code_env_name(client, plugin):
    try:
        plugin_details = client.get_plugin(plugin).get_settings().get_raw()
        # print(json.dumps(plugin_details, indent=4))
        return plugin_details.get('codeEnvName', None)
    except DataikuException as e:
        print(f"Error retrieving code environment name for plugin {plugin}: {str(e)}")
        return None

def does_code_env_exist(client, env_name):
    try:
        # existing_code_envs = client.list_code_envs()
        # return any(env.get('envName', '') == env_name for env in existing_code_envs)
        return env_name in get_installed_code_env_names(client)
    except DataikuException as e:
        print(f"Error checking for code environment {env_name}: {str(e)}")
        return False

def install_plugin_code_env(client, plugin_name):
    try:
        ce_name = get_plugin_code_env_name(client, plugin_name)
        #print(f"install_plugin_code_env: alpha ce_name: {ce_name}")
        if ce_name:
            #print(f"install_plugin_code_env: beta")
            if does_code_env_exist(client, ce_name):
                #print(f"install_plugin_code_env: gamma exists ce_name: {ce_name}. Returning 'None'")
                return None
        plugin = client.get_plugin(plugin_name)
        # Start creating the code env, and wait for it to be done
        print(f"Starting to build code env for plugin {plugin_name}...")
        future = plugin.create_code_env()
        creation = future.wait_for_result()
        print(f"Finished code env for plugin {plugin_name}.")
        env_name = creation["envName"]
        # set it as the current plugin code env
        settings = plugin.get_settings()
        settings.set_code_env(env_name)
        settings.save()

    except DataikuException as e:
        print(f"{plugin_name} doesn't seem to require a code env")
        return None

In [0]:
install_plugin_code_env(client, 'agent-connect')

# Create Project Folders

In [0]:
root_folder = client.get_root_project_folder()

# Function to create project folders
def create_dss_project_top_level_folder(folder_name):
    try:
        # check if the folder already exists
        existing_folders = root_folder.list_child_folders()
        if folder_name in [folder.name for folder in existing_folders]:
            print(f"Folder '{folder_name}' already exists.")
            return
        new_folder = root_folder.create_sub_folder(folder_name)
        print(f"Folder '{folder_name}' created successfully.")
    except Exception as e:
        print(f"Failed to create folder '{folder_name}': {str(e)}")

# Create specified project folders
for name in TOP_LEVEL_FOLDERS_TO_CREATE:
    create_dss_project_top_level_folder(name)

# Install plugins and their code environments

In [0]:
for plugin in PLUGINS_TO_INSTALL:
    # test if the plugin is already installed
    if is_plugin_installed(client, plugin):
        print(f"Plugin '{plugin}' is already installed.")
    else:
        install_plugin(client, plugin)
    # it seems like there is NOT a reliable way to check if a plugin requires a code environment, and if so, what that code environment's name is
    # it's inconsistent between plugins.
    install_plugin_code_env(client, plugin)

# Create internal code environments

In [0]:
# https://developer.dataiku.com/latest/api-reference/python/client.html#dataikuapi.DSSClient.create_internal_code_env

for key, value in INTERNAL_CODE_ENVS_TO_CREATE.items():
    if does_code_env_exist(client, value):
        print(f"Internal code environment '{value}' already exists. Skipping creation.")
        continue
    else:
        print(f"Internal code environment '{value}' does not seem to exist, creating it.")
        client.create_internal_code_env(key)

# Delete duplicated code environments

In [0]:
def delete_code_environments_with_pattern(client):
    # Get a list of all code environments (focusing on Python environments)
    code_envs = client.list_code_envs()
    
    # Regular expression to match names ending with _# where # is a single digit
    pattern = re.compile(r'_[0-9]$')
    
    print(f"Found {len(code_envs)} code environments in total")
    deleted_count = 0
    
    # Iterate through each code environment
    for env in code_envs:
        env_id = env.get('envName')
        env_type = env.get('envLang', '').upper()  # PYTHON, R, etc.
        env_handle = client.get_code_env(env_type, env_id)
        
        # Check if the environment name matches our pattern
        if pattern.search(env_id):
            # print(f"Found matching environment: {env_id} (Type: {env_type})")
            try:
                # Delete the environment
                # Note: In the API, we need to specify the language (PYTHON, R, etc.)
                # client.delete_code_env(env_type, env_id)
                env_handle.delete()
                print(f"Successfully deleted environment: {env_id}")
                deleted_count += 1
            except Exception as e:
                print(f"Error deleting environment {env_id}: {str(e)}")
        #else:
        #    print(f"Skipping environment: {env_id} (does not match pattern)")
    
    print(f"Successfully removed {deleted_count} duplicate code environments.")

delete_code_environments_with_pattern(client)

In [0]:
# TODO: set admin user to use Full Designer license
# TODO: create a new code env that has old version of pandas
# project setup: https://honker-design.amer.dataiku-sandbox.io/projects/SOL_DKU_MONITORING/setup


# Perform sanity checks on instance

In [0]:
client.perform_instance_sanity_check()
client.get_sanity_check_codes()

In [0]:
# Possible future additions
# https://developer.dataiku.com/latest/api-reference/python/other-administration.html#dataikuapi.dss.admin.DSSGeneralSettings
# client.create_connection()
# client.create_user(login, password, display_name='', source_type='LOCAL', groups=None, profile='DATA_SCIENTIST', email=None)