In [0]:
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd

In [0]:
# Define the old and new S3 connection names
old_connection_name = "honker_personal_AWS_S3"
new_s3_connection = "dataiku-managed-storage"

In [0]:
# Initialize the Dataiku API client
client = dataiku.api_client()

# Get the current project
project = client.get_default_project()

# List all datasets in the current project
unique_dataset_names = set(dataset['name'] for dataset in project.list_datasets())

In [0]:
def get_dataset_configs():
    extracted_data = [
        {
            'type': row.get('type'),
            'connection': row.get('params', {}).get('connection'),
            'name': row.get('name'),
            'table': row.get('params', {}).get('table'),
            'catalog': row.get('params', {}).get('catalog'),
            'schema': row.get('params', {}).get('schema'),
            'path':  row.get('params', {}).get('path'),
            # get the schema too
        }
        for row in project.list_datasets()
    ]
    return pd.DataFrame(extracted_data).sort_values(by=['type', 'connection', 'name'])

In [0]:
# display the BEFORE
get_dataset_configs()

In [0]:
for dataset_name in unique_dataset_names:
    dataset = dataiku.Dataset(dataset_name)
    dataset_info = dataset.get_config()

    # Check if the dataset is using the old S3 connection

    if dataset_info.get('params',{}).get('connection',None) == old_connection_name:
        settings = project.get_dataset(dataset_name).get_settings()

        # Update to the new S3 connection
        settings.set_connection_and_path(new_s3_connection, settings.get_raw_params()['path'])
        settings.save()
        print(f"Dataset {dataset_name} updated to use connection: {new_s3_connection}")

In [0]:
# display the AFTER for comparison
get_dataset_configs()