# Listing Dataiku projects and info for migration
### Written by Tim Honker - Sep 4, 2025

This script is designed to be run on a Dataiku DSS Design Node v12 and later.

It will produce a table at the bottom that can be directly exported to CSV file by clicking the "Export this dataframe" button.

I was not able to test it in an active DSS v12.6 environment with connections to Hive, so I cannot guarantee that the column "NUMBER_OF_HIVE_RECIPES_USED_IN_THIS_PROJECT" is accurate. However, I did include columns that list all the unique types of recipes and datasets used in each project.

In [0]:
# Written by Tim Honker @ Dataiku on Sep 4, 2025

from datetime import datetime
import dataiku
import pandas as pd

In [0]:
# Connect to the DSS instance
client = dataiku.api_client()
project = client.get_default_project()

In [0]:
# Prepare a list to store results
project_info = []

# Iterate over all projects
for project_key in client.list_project_keys():
    try:
        project = client.get_project(project_key)
        
        # Get project metadata to extract owner information
        project_metadata = project.get_metadata()
        project_summary = project.get_summary()
        last_modified_timestamp = project_summary.get('versionTag', None).get('lastModifiedOn', None)
        if last_modified_timestamp:
            last_modified_date = datetime.fromtimestamp(last_modified_timestamp / 1000).strftime('%Y-%m-%d %H:%M:%S')
        else:
            last_modified_date = 'unknown_date'
        
        # Get the list of recipes in the project
        recipes = project.list_recipes()
        
        # Count the number of Hive recipes
        hive_recipes = [recipe for recipe in recipes if recipe['type'].lower() == 'hive']
        hive_count = len(hive_recipes)
        
        dataset_types = set()
        
        for d in  project.list_datasets():
            dataset_types.add(d.get('type'))
            
        recipe_types = set()
        for r in  project.list_recipes():
            recipe_types.add(r.get('type'))
        
        # Append the result to the list
        project_info.append({
            'PROJECT_KEY': project_key, 
            'NUMBER_OF_HIVE_RECIPES_USED_IN_THIS_PROJECT': hive_count,
            'Dataset_types_used_in_project': dataset_types,
            'Owner': project.get_permissions().get('owner', 'unknown owner'),
            'DATE_PROJECT_WAS_LAST_MODIFIED': last_modified_date,
            'Recipe_types_used_in_project': recipe_types,
        })
    except Exception as e:
        print(f"Error processing project {project_key}: {str(e)}")

# Create a Pandas DataFrame with the results
df = pd.DataFrame(project_info)
df

# click EXPORT THIS DATAFRAME button below.