## Import Dependencies

In [10]:
import requests
import math
import pandas as pd
import pprint

## Create functions to get and process data from dbt Cloud

In [13]:
def get_number_of_dbt_models_in_project(dbt_cloud_token, dbt_cloud_project_env_id):
    

    # Your GraphQL endpoint
    url = 'https://metadata.cloud.getdbt.com/graphql'
    
    # The GraphQL query
    query = """
            query Models($first: Int!, $environmentId: BigInt!) {
          environment(id: $environmentId) {
            applied {
              models(first: $first) {
                totalCount
              }
            }
          }
        }
    """

    # The variables to be used in your query
    variables = {
        "environmentId": dbt_cloud_project_env_id,
        "first": 500
    }
        
    # Headers, including possibly needed authorization
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {dbt_cloud_token}',  # Only include this line if you need authorization
    }
    
    # The payload for the request
    payload = {
        'query': query,
        'variables': variables
    }
    
    # Make the POST request to the GraphQL API
    response = requests.post(url, json=payload, headers=headers)
    
    # Check for errors
    if response.status_code == 200:
        # Parse the response JSON
        data = response.json()

        # get number of models
        number_of_models_in_cloud_project = data['data']['environment']['applied']['models']['totalCount']

    else:
        print(f"Query failed to run by returning code of {response.status_code}. {response.text}")
    
    try:
        # return number of models in project
        return number_of_models_in_cloud_project
    except:
        print("there was an error getting back GraphQL Payload, please check your inputs")


def get_model_info_from_project(dbt_cloud_token, dbt_cloud_project_env_id, model_name_to_filter_for=None, number_of_models_in_project=1):
    
    # Your GraphQL endpoint
    url = 'https://metadata.cloud.getdbt.com/graphql'
    
    # The GraphQL query
    query = """
            query Node($first: Int!, $environmentId: BigInt!, $after: String, $filter: ModelAppliedFilter) {
              environment(id: $environmentId) {
                applied {
                  models(first: $first, after: $after, filter: $filter) {
                    edges {
                      node {
                        uniqueId
                        tests {
                          name
                          resourceType
                          columnName
                          description
                          testType
                          executionInfo {
                            lastRunStatus
                            lastRunGeneratedAt
                          }
                        }
                      }
                    }
                    pageInfo {
                      endCursor
                      hasNextPage
                      startCursor
                    }
                  }
                }
              }
            }
    """

    # Headers, including possibly needed authorization
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {dbt_cloud_token}',  # Only include this line if you need authorization
    }

    # calculate the number of API calls to make
    number_of_apis_to_make = math.ceil(number_of_models_in_project/500)

    # set page start
    page_start = None

    # put the parsed payload in a list
    parsed_model_data_list = []

    # set model name to filter for
    model_name_to_filter_for = "null" if model_name_to_filter_for is None else model_name_to_filter_for

    # loop through and make the calls
    for call in range(number_of_apis_to_make):

        # The variables to be used in your query
        variables = {
            "environmentId": dbt_cloud_project_env_id,
            "first": 500,
            "after": page_start,
            "filter": {
              "identifier": model_name_to_filter_for
            }
        }
    
        # The payload for the request
        payload = {
            'query': query,
            'variables': variables
        }
        
        # Make the POST request to the GraphQL API
        response = requests.post(url, json=payload, headers=headers)

        # get the data 
        data = response.json()['data']

        # parse the data
        parsed_data = data['environment']['applied']['models']

        # get the next page
        page_start = parsed_data['pageInfo']['endCursor']

        # add the parsed data to the list
        parsed_model_data_list.append(parsed_data['edges'])

    return parsed_model_data_list

# Using the functions

### Set inputs

In [4]:
# get the dbt cloud service token or personal token
dbt_cloud_token = '<< dbt cloud token goes here >>'

# the production in environment id for the dbt Cloud project 
dbt_cloud_project_env_id = 123456

### Get the number of dbt Models in the prod enviroment

In [5]:
# get the number 
number_of_models = get_number_of_dbt_models_in_project(dbt_cloud_token, dbt_cloud_project_env_id)

# log it
print(f"the number of dbt models in the env {dbt_cloud_project_env_id} is {number_of_models}")

the number of dbt models in the env 105436 is 40


### Get test info for _ALL_ the models in the prod env

In [8]:
# pull back data from discovery API
prod_env_model_test_info = get_model_info_from_project(dbt_cloud_token, dbt_cloud_project_env_id, number_of_models)

# log that it completed
print("grabbed testing info for all of the models from the env")

grabbed all of the model info from the env


### Get all of the info for a _specified_ models in the prod env

In [15]:
# pull back data from discovery API
prod_env_model_test_info_spec_model = get_model_info_from_project(dbt_cloud_token, dbt_cloud_project_env_id, model_name_to_filter_for='a_small_view_model')

# log that it completed
print(f"grabbed testing info for the model a_small_view_model from the env")

grabbed testing info for the model a_small_view_model from the env


## Display outputs in JSON

### Outputs example when searching for tests of just one model

In [16]:
pprint.pp(prod_env_model_test_info_spec_model)

[[{'node': {'uniqueId': 'model.primary_project.a_small_view_model',
            'tests': [{'name': 'not_null_a_small_view_model_one',
                       'resourceType': 'TestAppliedStateNestedNode',
                       'columnName': 'one',
                       'description': None,
                       'testType': 'GENERIC_DATA_TEST',
                       'executionInfo': {'lastRunStatus': 'pass',
                                         'lastRunGeneratedAt': '2024-03-09T14:47:08.974Z'}},
                      {'name': 'not_null_a_small_view_model_two',
                       'resourceType': 'TestAppliedStateNestedNode',
                       'columnName': 'two',
                       'description': None,
                       'testType': 'GENERIC_DATA_TEST',
                       'executionInfo': {'lastRunStatus': 'pass',
                                         'lastRunGeneratedAt': '2024-03-09T14:47:08.974Z'}},
                      {'name': 'unique_a_small_view_mode

### Outputs example when searching for tests for all models in the Prod Env

In [11]:
pprint.pp(prod_env_model_test_info)

[[{'node': {'uniqueId': 'model.primary_project.a_sample_model', 'tests': []}},
  {'node': {'uniqueId': 'model.primary_project.a_small_view_model',
            'tests': [{'name': 'not_null_a_small_view_model_one',
                       'resourceType': 'TestAppliedStateNestedNode',
                       'columnName': 'one',
                       'description': None,
                       'testType': 'GENERIC_DATA_TEST',
                       'executionInfo': {'lastRunStatus': 'pass',
                                         'lastRunGeneratedAt': '2024-03-09T14:47:08.974Z'}},
                      {'name': 'not_null_a_small_view_model_two',
                       'resourceType': 'TestAppliedStateNestedNode',
                       'columnName': 'two',
                       'description': None,
                       'testType': 'GENERIC_DATA_TEST',
                       'executionInfo': {'lastRunStatus': 'pass',
                                         'lastRunGeneratedAt': '2024-0

___

# END OF SCRIPT

___