In [3]:
import lkml
from lkml import (
    Lexer,
    Parser
)

from lkml.tokens import(
    LiteralToken    
)
from lkml.tree import (
    BlockNode,
    ContainerNode,
    DocumentNode,
    ListNode,
    PairNode,
    SyntaxNode,
    SyntaxToken,
    LeftCurlyBrace,
    RightCurlyBrace,
    LeftBracket,
    RightBracket,
    Visitor,
    Colon,
)

from dataclasses import replace
from lkml.visitors import BasicTransformer

import os

In [4]:
lookml_base = 'C:\\Users\\EricHeidbreder\\Documents\\looker_projects\\a8_case_study\\'
lookml_folder = "_layers"
lookml_filename = "_base.layer.lkml"
lookml_path = os.path.join(lookml_base, lookml_folder, lookml_filename)

In [5]:
with open(lookml_path, 'r+') as f:
    text = f.read()

In [6]:
view_text = '''
    view: order_items {
    sql_table_name: "PUBLIC"."ORDER_ITEMS"
      ;;
    drill_fields: [pk1_order_item_id]

    dimension: pk1_order_item_id {
      primary_key: yes
      type: number
      sql: ${TABLE}."ID" ;;
    }

    dimension_group: created {
      type: time
      timeframes: [
        raw,
        time,
        day_of_month,
        hour_of_day,
        day_of_year,
        date,
        week,
        month,
        quarter,
        year
      ]
      sql: ${TABLE}."CREATED_AT" ;;
    }

    dimension_group: delivered {
      type: time
      timeframes: [
        raw,
        time,
        date,
        week,
        month,
        quarter,
        year
      ]
      sql: ${TABLE}."DELIVERED_AT" ;;
    }

    dimension: inventory_item_id {
      type: number
      # hidden: yes
      sql: ${TABLE}."INVENTORY_ITEM_ID" ;;
    }

    dimension: order_id {
      type: number
      sql: ${TABLE}."ORDER_ID" ;;
    }

    dimension_group: returned {
      type: time
      timeframes: [
        raw,
        time,
        date,
        week,
        month,
        quarter,
        year
      ]
      sql: ${TABLE}."RETURNED_AT" ;;
    }

    dimension: sale_price {
      type: number
      sql: ${TABLE}."SALE_PRICE" ;;
    }

    dimension_group: shipped {
      type: time
      timeframes: [
        raw,
        time,
        date,
        week,
        month,
        quarter,
        year
      ]
      sql: ${TABLE}."SHIPPED_AT" ;;
    }

    dimension: status {
      type: string
      sql: ${TABLE}."STATUS" ;;
    }

    dimension: user_id {
      type: number
      # hidden: yes
      sql: ${TABLE}."USER_ID" ;;
    }

    measure: count {
      type: count
      drill_fields: [detail*]
    }

    # ----- Sets of fields for drilling ------
    set: detail {
      fields: [
        pk1_order_item_id,
        inventory_items.product_name,
        inventory_items.pk1_inventory_item_id,
        users.last_name,
        users.first_name,
        users.pk1_user_id
      ]
    }
  }
'''

In [7]:
lexer=Lexer(view_text)
tokens = lexer.scan()
parser = Parser(tokens)
tree = parser.parse()

In [8]:
def load_with_comments(text):
    '''
    Parses LookML , but doesn't remove comments
    '''

    lexer=Lexer(text)
    tokens = lexer.scan()
    parser = Parser(tokens)
    tree = parser.parse()

    return tree

In [9]:
result = load_with_comments(text)

In [10]:
str(tree.container.items[0].container.items[-1])

'set: detail {\n      fields: [\n        pk1_order_item_id,\n        inventory_items.product_name,\n        inventory_items.pk1_inventory_item_id,\n        users.last_name,\n        users.first_name,\n        users.pk1_user_id\n      ]\n    }\n  '

In [11]:
tree.container.items[0].container.items[4].container.items

(PairNode(type='type', value='time'),
 ListNode(type='timeframes'),
 PairNode(type='sql', value='${TABLE}."DELIVERED_AT"'))

In [10]:
dim_group_timeframes = [field.value for field in tree.container.items[0].container.items[3].container.items[1].items]

In [223]:
dimension_group_timeframes = ['field1', 'field2']

In [11]:
view_node = tree.container.items[0]

In [46]:
len([item for item in view_node.container.items if item.type.value == 'dimension_group'])

4

In [435]:
item_list

[PairNode(type='sql_table_name', value='"PUBLIC"."ORDER_ITEMS"'),
 ListNode(type='drill_fields'),
 BlockNode(type='dimension', name='pk1_order_item_id'),
 BlockNode(type='dimension_group', name='created'),
 BlockNode(type='dimension_group', name='delivered'),
 BlockNode(type='dimension', name='inventory_item_id'),
 BlockNode(type='dimension', name='order_id'),
 BlockNode(type='dimension_group', name='returned'),
 BlockNode(type='dimension', name='sale_price'),
 BlockNode(type='dimension_group', name='shipped'),
 BlockNode(type='dimension', name='status'),
 BlockNode(type='dimension', name='user_id'),
 BlockNode(type='measure', name='count'),
 BlockNode(type='set', name='detail')]

In [429]:
item_list.append(new_set)

In [431]:
tuple(item_list)

(PairNode(type='sql_table_name', value='"PUBLIC"."ORDER_ITEMS"'),
 ListNode(type='drill_fields'),
 BlockNode(type='dimension', name='pk1_order_item_id'),
 BlockNode(type='dimension_group', name='created'),
 BlockNode(type='dimension_group', name='delivered'),
 BlockNode(type='dimension', name='inventory_item_id'),
 BlockNode(type='dimension', name='order_id'),
 BlockNode(type='dimension_group', name='returned'),
 BlockNode(type='dimension', name='sale_price'),
 BlockNode(type='dimension_group', name='shipped'),
 BlockNode(type='dimension', name='status'),
 BlockNode(type='dimension', name='user_id'),
 BlockNode(type='measure', name='count'),
 BlockNode(type='set', name='detail'),
 BlockNode(type='set', name='test_set'))

In [190]:
class SetCreation(BasicTransformer):
        dim_group_count = 0
        def visit_block(self, node: BlockNode, set_list=[]) -> BlockNode:
            global view_node
            global final_node
            global view_item_list
            global dim_group_count
            global num_dim_groups

            """Visit each Block and generate a set for each dimension group."""
            if node.type.value =='view':
                view_node = node
                view_item_list = list(node.container.items)
                num_dim_groups = len([item for item in node.container.items if item.type.value == 'dimension_group'])
                # print(num_dim_groups)
            if node.type.value == 'dimension_group':
                dim_group_count += 1
                # print(dim_group_count)
                dim_group_name = node.name.value
                for item in node.container.items: 
                    try:
                        # This returns either 'time' or 'duration' when iterating through each item in a BlockNode
                        if item.type.value == 'type':
                            # Storing the type for dynamic naming
                            dim_group_type = item.value
                    # If item doesn't have a type, we can move on to the next item
                    except AttributeError:
                        continue
                    if item.type.value == 'timeframes':
                        dimension_group_timeframes = [field.value for field in item.items]

                        fields = ListNode(
                            type=SyntaxToken(value='fields', prefix='   ', suffix=''),
                            items=tuple(
                                SyntaxToken(value=f'{field_value}', prefix='\n          ', suffix='')\
                                for field_value in dimension_group_timeframes
                            ),
                            left_bracket=LeftBracket(value='[', prefix='', suffix=''),
                            right_bracket=RightBracket(value=']', prefix='\n        ', suffix='\n')
                        )                

                        container = ContainerNode(
                            items=(fields,)
                        )

                        new_set = BlockNode(
                                    type=SyntaxToken(value='set', prefix='  ', suffix=''),
                                    name=SyntaxToken(value=f'{dim_group_name}_{dim_group_type}_fields', prefix='', suffix=''),
                                    left_brace=LeftCurlyBrace(value='{', prefix=' ', suffix='\n    '),
                                    right_brace=RightCurlyBrace(value='}', prefix='    ', suffix='\n\n'),
                                    container=container
                                )
                        set_list.append(new_set)

                        # new_node: BlockNode = replace(view_node.container, items=tuple(view_item_list + set_list))


                        if dim_group_count == num_dim_groups:
                            new_node: BlockNode = replace(view_node, container=replace(view_node.container, items=tuple(view_item_list + set_list)))
                            final_node = new_node
                            return self._visit_container(view_node)
                        else:
                            pass
            return self._visit_container(node)

class SetAddition(BasicTransformer):
        def visit_block(self, node: BlockNode) -> BlockNode:
            if node.type.value =='view':
                new_node: BlockNode = replace(node, container=replace(node.container, items=final_node.container.items))
                return new_node
            else:
                return self._visit_container(node)




# Assume we already have a parse tree to visit
result.accept(SetCreation())
new_tree = new_tree.accept(SetAddition())

In [191]:
Parser(Lexer('''    dimension: pk1_order_item_id {
      primary_key: yes
      type: number
      sql: ${TABLE}."ID" ;;
    }''').scan()).parse().container.items[0].container.items[1].value

SyntaxToken(value='number', prefix='', suffix='')

PairNode(type='group_label', value='something')

In [180]:
'sale, pk1'.replace(' ', '').split(',')

['sale', 'pk1']

In [268]:
class AddLabel(BasicTransformer):
        def __init__(self, field_search):
            self.field_search = field_search.replace(' ', '').split(',')

        def visit_block(self, node: BlockNode) -> BlockNode:            
            if any(search_term in node.name.value for search_term in self.field_search):
                new_label = PairNode(
                    SyntaxToken(value='group_label', prefix='', suffix=''),
                    SyntaxToken(value=label_name, prefix='', suffix='\n    ')
                )

                # Not actually the new items yet, creating a list of the orig items
                new_items = list(node.container.items)

                # Now we insert the group label at the front
                new_items.insert(0, new_label)

                # Replacing the original node's items with the new items
                new_container = replace(node.container, items=tuple(new_items))
                new_node = replace(node, container=new_container)

                return new_node

            else:
                return self._visit_container(node)

In [269]:
field_search = input('What search terms are you looking for? Separate multiple values with commas.')
label_name=input('What would you like the new label name to be?')
new_tree = result.accept(AddLabel(field_search=field_search))

In [270]:
with open('./test.lkml', 'w+') as f:
    f.write(str(new_tree))

In [149]:
final_node.container.items

(PairNode(type='sql_table_name', value='"PUBLIC"."ORDER_ITEMS"'),
 ListNode(type='drill_fields'),
 BlockNode(type='dimension', name='pk1_order_item_id'),
 BlockNode(type='dimension_group', name='created'),
 BlockNode(type='dimension_group', name='delivered'),
 BlockNode(type='dimension', name='inventory_item_id'),
 BlockNode(type='dimension', name='order_id'),
 BlockNode(type='dimension_group', name='returned'),
 BlockNode(type='dimension', name='sale_price'),
 BlockNode(type='dimension_group', name='shipped'),
 BlockNode(type='dimension', name='status'),
 BlockNode(type='dimension', name='user_id'),
 BlockNode(type='measure', name='count'),
 BlockNode(type='set', name='detail'),
 BlockNode(type='set', name='created_time_fields'),
 BlockNode(type='set', name='delivered_time_fields'),
 BlockNode(type='set', name='returned_time_fields'),
 BlockNode(type='set', name='shipped_time_fields'))

In [147]:
new_tree.container.items[0].container.items

(PairNode(type='sql_table_name', value='"PUBLIC"."ORDER_ITEMS"'),
 ListNode(type='drill_fields'),
 BlockNode(type='dimension', name='pk1_order_item_id'),
 BlockNode(type='dimension_group', name='created'),
 BlockNode(type='dimension_group', name='delivered'),
 BlockNode(type='dimension', name='inventory_item_id'),
 BlockNode(type='dimension', name='order_id'),
 BlockNode(type='dimension_group', name='returned'),
 BlockNode(type='dimension', name='sale_price'),
 BlockNode(type='dimension_group', name='shipped'),
 BlockNode(type='dimension', name='status'),
 BlockNode(type='dimension', name='user_id'),
 BlockNode(type='measure', name='count'),
 BlockNode(type='set', name='detail'))

In [64]:
tree.container.items[9].container.items[-1].container.items[0].items

(SyntaxToken(value='pk1_order_item_id', prefix='\n        ', suffix=''),
 SyntaxToken(value='inventory_items.product_name', prefix='\n        ', suffix=''),
 SyntaxToken(value='inventory_items.pk1_inventory_item_id', prefix='\n        ', suffix=''),
 SyntaxToken(value='users.last_name', prefix='\n        ', suffix=''),
 SyntaxToken(value='users.first_name', prefix='\n        ', suffix=''),
 SyntaxToken(value='users.pk1_user_id', prefix='\n        ', suffix='\n      '))

In [33]:
with open('./test.lkml', 'w+') as f:
    f.write(result)

In [4]:
lexer = lkml.Lexer(text)

In [5]:
tokens = lexer.scan()

In [16]:
parser = lkml.Parser(tokens)

In [25]:
parser.depth

-1

In [14]:
tree = parser.parse()

DocumentNode(container=ContainerNode(), prefix='# This is all machine-generated code,\n# so you\'ll only need to add "hidden: yes"\n# to the explores. You can unhide via the _basic.layer\n# file as you need them.\n\n', suffix='')

In [45]:
parser.advance()
parser.peek()

BlockStartToken()

In [141]:
parser.check(lkml.tokens.CommentToken)

False

In [11]:
lkml.dump()

AttributeError: 'DocumentNode' object has no attribute 'items'

In [9]:
tree.container.items

(BlockNode(type='explore', name='distribution_centers'),
 BlockNode(type='view', name='distribution_centers'),
 BlockNode(type='explore', name='etl_jobs'),
 BlockNode(type='view', name='etl_jobs'),
 BlockNode(type='explore', name='events'),
 BlockNode(type='view', name='events'),
 BlockNode(type='explore', name='inventory_items'),
 BlockNode(type='view', name='inventory_items'),
 BlockNode(type='explore', name='order_items'),
 BlockNode(type='view', name='order_items'),
 BlockNode(type='explore', name='products'),
 BlockNode(type='view', name='products'),
 BlockNode(type='explore', name='users'),
 BlockNode(type='view', name='users'))

In [4]:
def generate_new_filename(lookml_path, extension):
    lookml_filename_new = lookml_path.split('\\')[-1].split('.')
    lookml_filename_new[0] += extension
    return '.'.join(lookml_filename_new)

In [5]:
def find(lst, key, value):
    '''
    Finds a value in a list of dictionaries
    '''
    for i, dic in enumerate(lst):
        if dic[key] == value:
            return i
    return f'Value "{value}" not found'

In [123]:
def generate_sets(lookml_path):
    '''
    Automatically generate sets for each dimension group in a view
    '''
    # Indicates if a change was made to the file
    made_change = 0

    with open(lookml_path, 'r+') as file:
        result = lkml.load(file)

    try:    
        view_list = result['views']
    except:
        return 'No views in current file'


    for view_num in range(len(view_list)):

    # # get the view number index from list of dictionaries
    # view_num = find(view_list, 'name', view_name)
    
        # Are there any dimension groups in this view?
        try:
            dim_group_count = len(view_list[view_num]['dimension_groups'])

        except:
            print(f'No dimension groups in {os.path.basename(lookml_path)} view')
            continue

        # Yay! There's at least one dimension group.
        # Now, are there any sets in this dimension group?
        try:
            current_set_list = view_list[view_num]['sets']

        # There weren't any sets, but that's okay. We'll create a new one.
        except:
            current_set_list = view_list[view_num]['sets'] = []

        # Creating some tracking info
        new_set_names = []

        # If the duration/time fields got left out, they will default to these
        default_duration_fields = [
            'days',
            'hours',
            'minutes',
            'months',
            'quarters',
            'seconds',
            'weeks',
            'years'
        ]

        default_time_fields = [ 
            'date',
            'month',
            'quarter',
            'time',
            'week',
            'year'     
        ]

        # Iterate through dimension groups and generate sets based on the
        # timeframes in the dimension group
        for dim_group in view_list[view_num]['dimension_groups']:

            # Currently can't handle dynamic label names, so if there's
            # any liquid, we'll skip for now
            try:
                if "{%" in dim_group['sql']:
                    continue
                    print("There's Liquid!")
            except:
                pass

            # extracting info from the dimension group and creating
            # variables to be used in set generation
            try:
                dim_group_type = dim_group['type']
            except:
                print('No type, must be a refinement...continuing.')
                continue

            dim_group_name = dim_group['name']

            try:
                dim_group_timeframes = dim_group['timeframes']
            except:
                if dim_group_type == 'time':
                    dim_group_timeframes = default_time_fields
                elif dim_group_type == 'duration':
                    dim_group_timeframes = default_duration_fields

            new_set_name = f'{dim_group_name}_{dim_group_type}_fields'

            # there are different naming formats for duration vs. time types, so we need to build different field name
            # lists for the different types
            if dim_group_type == 'time':
                field_names = [dim_group_name + '_' + timeframe for timeframe in dim_group_timeframes]
            elif dim_group_type == 'duration':
                field_names = [timeframe + '_' + dim_group_name for timeframe in dim_group_timeframes]

            view_list[view_num]['sets'].append(
                {'fields': field_names,
            'name': new_set_name}
            )

            new_set_names.append(new_set_name)

            made_change = 1

    # We only want to overwrite the file if we made any changes

    if made_change:

        print(f'Created the following {dim_group_count} new sets:')
        [print(set_name) for set_name in new_set_names]

        orig_lookml_filename = lookml_path.split('\\')[-1]

        new_lookml_filename = generate_new_filename(lookml_path, '_new')

        lookml_new_path = lookml_path.replace(orig_lookml_filename, new_lookml_filename)

        with open(f'{lookml_path}', 'w+') as new_file:
            lkml.dump(result, new_file)
    

In [124]:
for folder in os.listdir(lookml_base):
    # First, let's only keep folders that don't begin with periods and make sure we're not running any stray .lkml files through this
    if folder[0] != '.' and folder.split('.')[-1] not in ['lkml']:
        for filename in os.listdir(lookml_base + folder):
            # ignore any files beginning with a period, and any python or jupyter notebooks files (py or ipynb)
            if filename[0] != '.' and filename.split('.')[-1] not in ['py', 'ipynb']:
                lookml_path = os.path.join(lookml_base, folder, filename)
                generate_sets(lookml_path)

Created the following 1 new sets:
creation_to_delivery_duration_fields
No dimension groups in location.layer.lkml view
No dimension groups in location.layer.lkml view
No dimension groups in profit.layer.lkml view
No dimension groups in revenue.layer.lkml view
No dimension groups in user_age_and_name.layer.lkml view
No dimension groups in order_sequence_1.layer.lkml view
No dimension groups in order_sequence_1.layer.lkml view
No dimension groups in order_sequence_1.layer.lkml view
No dimension groups in order_sequence_2.layer.lkml view
No dimension groups in order_sequence_2.layer.lkml view
No dimension groups in profit_per_order.layer.lkml view
No dimension groups in profit_per_order.layer.lkml view
No dimension groups in delivery_duration.group.lkml view
No dimension groups in demographics.group.lkml view
No dimension groups in ids.group.lkml view
No dimension groups in ids.group.lkml view
No dimension groups in location.group.lkml view
No dimension groups in location.group.lkml view
