# Generate automatic documentation for individual project

In [0]:
import dataiku
from dataiku import pandasutils as pdu
import pandas as pd

In [0]:
PROJECT_KEY = 'TIM_HEALTH_DATA'
DATASET_ID = 'Apple_Health_daily_summaries'

In [0]:
client = dataiku.api_client()
project = client.get_project(PROJECT_KEY)
dataset_handle = project.get_dataset(DATASET_ID)

In [0]:
# this blocks execution, no futures
# dataset_handle.generate_ai_description(save_description=True)

In [0]:
# reset the metadata on a dataset
md = dataset_handle.get_metadata()
md['description'] = ''
dataset_handle.set_metadata(md)
dataset_handle.get_metadata()

In [0]:
def get_dataset_long_description(dataset_handle):
    dataset_metadata = dataset_handle.get_metadata()
    return dataset_metadata['description']


def get_dataset_short_description(dataset_handle):
    dataset_settings = dataset_handle.get_settings().get_raw()
    return dataset_settings['shortDesc']


def get_dataset_column_descriptions(dataset_handle):
    dataset_schema = dataset_handle.get_schema()
    return [item["comment"] for item in dataset_schema['columns']]


def dataset_has_full_documentation(client, PROJECT_KEY, dataset_id):
    """x"""
    project = client.get_project(PROJECT_KEY)
    dataset_handle = project.get_dataset(DATASET_ID)

    
    if not get_dataset_long_description(dataset_handle):
        print('Dataset lacks full documentation because empty: Long Description')
        return False
    
    if not get_dataset_short_description(dataset_handle):
        print('Dataset lacks full documentation because empty: Short Description')
        return False
    
    column_descriptions = get_dataset_column_descriptions(dataset_handle)

    if any(not s or not s.strip() for s in column_descriptions):
        print('Dataset lacks full documentation because empty: Column descriptions')
        return False
    
    return True

In [0]:
dataset_has_full_documentation(client, PROJECT_KEY, DATASET_ID)