In [0]:
import os
import dataiku
from concurrent.futures import ThreadPoolExecutor

In [0]:
# Configuration
force_rebuild_env = True

In [0]:
client = dataiku.api_client()
code_envs = client.list_code_envs()

failed_builds = set()
successful_builds = set()

In [0]:
def _process_code_env(code_env_info):
    try:
        client = dataiku.api_client()
        envName = code_env_info['envName']
        code_env = client.get_code_env(code_env_info['envLang'], envName)

        # rebuild it
        # print(f'Starting rebuilding {envName} ...')
        env_path = os.path.join('/data/dataiku/dss_data/code-envs/python', envName)

        res = code_env.update_packages(force_rebuild_env=force_rebuild_env)

        if not res['messages']['success']:
            print(f"FAILED: {envName}")
            failed_builds.add(envName)
            print(res)
        else:
            successful_builds.add(envName)

    except Exception as e:
        try:
            if not force_rebuild_env:
                print(f'Failed to build {envName} without force rebuild, trying again with force rebuild...')
                res = code_env.update_packages(force_rebuild_env=True)
                print(f'SUCCESS when force rebuilding {envName}')
                pass
        except Exception as e:
            print(f"Still failed to build {envName} after force rebuild:\n   {e}")
            failed_builds.add(envName) # potential bug where this doesn't happen, should use a finally clause
            pass

In [0]:
max_workers = 2 # os.cpu_count() or 1 # can run out of disk space if running too many simultaneous
with ThreadPoolExecutor(max_workers=max_workers) as executor:
    executor.map(_process_code_env, code_envs)

In [0]:
if len(failed_builds) > 0:
    failed_str = "\n".join(sorted(failed_builds, key=str.lower))
    
    print(f"\n\n\nEnvironments that failed to build:\n\n{failed_str}")

print('\n\n\nFinished rebuilding code environments from scratch')

In [0]:
successful_builds