In [0]:
import json
import os
# import datetime
from datetime import datetime
import copy

import dataiku
from dataikuapi import DSSClient

In [0]:
EXPORT_FOLDER_ID = "5fuFDP0G"

# https://developer.dataiku.com/latest/api-reference/python/projects.html#dataikuapi.dss.project.DSSProject.export_to_file
EXPORT_OPTIONS = {
    'exportUploads': True,
    'exportGitRepository': True,
    'exportUploads': True,
    #'exportManagedFS': True,
    'exportAnalysisModels': True,
    'exportSavedModels': True,
    'exportModelEvaluationStores': True,
    'exportAllInputDatasets': True, # this is okay, does not include folders that have files like screenshots
    #'exportAllInputManagedFolders': True,
    'exportInsightsData': True,
    'exportPromptStudioHistories': True,
     }

In [0]:
export_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

client = dataiku.api_client()
export_folder_handle = dataiku.Folder(EXPORT_FOLDER_ID)

In [0]:
SENSITIVE_FIELDS = [
    "password", "passwords", "apiKey", "apiKeys", "token", "tokens",
    "secret", "secrets", "privateKey", "privateKeys", "key", "appSecret",
    "appSecretContent", "credential", "credentials", "accessKey",
    "secretKey", "authToken", "connectionString", "pwd"
]

def sanitize_dict(obj):
    """ Recursively scan a dictionary and remove/sanitize sensitive information """
    if not isinstance(obj, dict):
        return obj
    
    result = copy.deepcopy(obj)
    
    for key, value in list(result.items()):
        # Check if the key contains any sensitive field names
        if any(sensitive in key.lower() for sensitive in SENSITIVE_FIELDS):
            if isinstance(value, str) and value:  # Only replace non-empty strings
                result[key] = "[REDACTED]"
            elif isinstance(value, dict):  # If it's a dictionary, still process it
                result[key] = sanitize_dict(value)
        # Recursively process nested dictionaries
        elif isinstance(value, dict):
            result[key] = sanitize_dict(value)
        # Process items in lists
        elif isinstance(value, list):
            result[key] = [sanitize_dict(item) if isinstance(item, dict) else item for item in value]
            
    return result

In [0]:
project_keys = client.list_project_keys()

for project_key in project_keys:
    project = client.get_project(project_key)    
    
    # Get the summary of the project, which includes the last modified timestamp
    project_summary = project.get_summary()
    #print(project_summary)

    # The last modified timestamp can be extracted as follows
    # The timestamp is typically in milliseconds since epoch (standard Unix time)
    last_modified_timestamp = project_summary.get('versionTag', None).get('lastModifiedOn', None)
    # print(last_modified_timestamp)

    # Convert the timestamp to a human-readable format, if necessary
    if last_modified_timestamp:
        last_modified_date = datetime.fromtimestamp(last_modified_timestamp / 1000)
        formatted_date = last_modified_date.strftime('%Y-%m-%d-%H-%M-%S')
        # print(f"Project '{project_key}' was last modified on: {formatted_date}")
    else:
        formatted_date='unknown_date'
    
    export_file_path = '/tmp/{}.zip'.format(project_key)
    
    # Export the project to a local file
    print(f"Starting export for {project_key} ...")
    project.export_to_file(export_file_path, options=EXPORT_OPTIONS)
    print(f"Finished export for {project_key} ...")
    
    # Upload the exported file to the managed folder
    with open(export_file_path, 'rb') as f:
        print(f"Starting copy to folder: {project_key} ...")
        upload_filename=f'PROJECT__{project_key}__{formatted_date}.zip'
        export_folder_handle.upload_stream(upload_filename, f)
        print(f"Finished copying to folder: {project_key} ...")
        os.remove(export_file_path)

In [0]:
plugins = client.list_plugins()

# Filter the plugins to include only those installed from the store
store_plugins = [plugin['id'] for plugin in plugins] # if plugin.get('isInstalledFromStore', False)]

# Path for the output text file
plugins_output_file_path = '/tmp/store_plugins.txt'

# Write the plugin IDs to a text file
with open(plugins_output_file_path, 'w') as output_file:
    for plugin_id in store_plugins:
        output_file.write(plugin_id + '\n')

# Upload the text file to the managed folder
with open(plugins_output_file_path, 'rb') as f:
    export_folder_handle.upload_stream(f'list_plugins_{export_timestamp}.txt', f)

# Cleaning up the temporary file
os.remove(plugins_output_file_path)

print("Plugin IDs exported to the managed folder.")

In [0]:
try:
    # Retrieve list of all connections
    connections = client.list_connections()
    
    # Prepare detailed connection information for export
    connections_details = []
    for conn in connections:
        # Obtain connection details
        conn_detail = client.get_connection(conn).get_definition()
        
        # Add connection details to list
        #connections_details.append(conn_detail)
        sanitized_conn = sanitize_dict(conn_detail)
        
        # Add sanitized connection details to list
        connections_details.append(sanitized_conn)
        
    # Define output file name and path
    connections_output_filepath = '/tmp/connections.txt'
    
    # Write connection details to JSON file
    with open(connections_output_filepath, 'w') as file:
        json.dump(connections_details, file, indent=4)


    # Upload the text file to the managed folder
    with open(connections_output_filepath, 'rb') as f:
        export_folder_handle.upload_stream(f"list_connections_{export_timestamp}.json", f)
        os.remove(connections_output_filepath)
        
    print(f"Connection details successfully exported to managed folder.")

except Exception as e:
    print(f"An error occurred: {e}")

In [0]:
# Code Environment names
code_env_name_list = sorted([env['envName'] for env in client.list_code_envs()])

with open('code_env_names.txt', 'w') as f:
    f.write('\n'.join(code_env_name_list))
    
# Upload the text file to the managed folder
with open('code_env_names.txt', 'rb') as f:
    export_folder_handle.upload_stream(f"list_codeenvs_{export_timestamp}.txt", f)
    
    os.remove('code_env_names.txt')

In [0]:

def export_code_environments():
    """ Exports all code environment settings to a JSON file. """
    try:
        # Get all code environments
        code_envs_list = client.list_code_envs()
        
        # Prepare a dictionary to store all environment details
        code_envs_details = {
            "python": [],
#             "r": [],
#             "julia": []
        }
        
        # Process Python environments
        for env in code_envs_list:
            env_name = env.get("envName")
            try:
                # Get detailed definition of the code environment
                env_details = client.get_code_env("python", env_name).get_definition()
                code_envs_details["python"].append(env_details)
                # print(f"Exported Python environment: {env_name}")
            except Exception as e:
                print(f"Error exporting Python environment {env_name}: {str(e)}")
        
        # Process R environments
#         for env in code_envs_list.get("r", []):
#             env_name = env.get("envName")
#             try:
#                 env_details = client.get_code_env("r", env_name).get_definition()
#                 code_envs_details["r"].append(env_details)
#                 print(f"Exported R environment: {env_name}")
#             except Exception as e:
#                 print(f"Error exporting R environment {env_name}: {str(e)}")
        
#         # Process Julia environments
#         for env in code_envs_list.get("julia", []):
#             env_name = env.get("envName")
#             try:
#                 env_details = client.get_code_env("julia", env_name).get_definition()
#                 code_envs_details["julia"].append(env_details)
#                 print(f"Exported Julia environment: {env_name}")
#             except Exception as e:
#                 print(f"Error exporting Julia environment {env_name}: {str(e)}")
        
        # Define output file name and path with timestamp
        output_filename = f"code_environments_export.json"
        
        # Write code environment details to JSON file
        with open(output_filename, 'w') as file:
            json.dump(sanitize_dict(code_envs_details), file, indent=4)
            
        with open(output_filename, 'rb') as f:
            export_folder_handle.upload_stream(f"list_codeenvs_{export_timestamp}.json", f)
            os.remove(output_filename)
        
        print(f"\nCode environment details successfully saved to {output_filename}")
    
    except Exception as e:
        print(f"An error occurred during export: {str(e)}")
        return None

    
export_code_environments()

In [0]:
settings_raw = client.get_general_settings().get_raw()
settings_output_filename = 'general_settings.json'
with open(settings_output_filename, 'w') as file:
    json.dump(sanitize_dict(settings_raw), file, indent=4)

with open(settings_output_filename, 'rb') as f:
    export_folder_handle.upload_stream(f"general_settings_{export_timestamp}.json", f)
    os.remove(settings_output_filename)

print('Finished exporting settings.')
