In [0]:
import ipywidgets as widgets
import json
from IPython.display import display, HTML

class ExpectationSettingsWidget:


    def read_mapping_collections(start, end, return_dict=False):
        temp_list=[]
        with open('/Workspace/Users/dima.frank@strauss-group.com/ExpectationsModuleMapping', 'rb+') as f:
            lines = f.readlines()[start:end]
            for line in lines:
                temp_list.append(line.strip().decode('utf-8'))

            if return_dict==True:
                dct = {}
                for row in temp_list:
                    dct[row.split(':')[0].strip()] = row.split(':')[1].strip()
                return dct
            else:
                return temp_list
        
    string_expectations = read_mapping_collections(start=1, end=10, return_dict=False)
    numeric_expectations = read_mapping_collections(start=12, end=18, return_dict=False)
    table_expectations = read_mapping_collections(start=20, end=22, return_dict=False)
    content_mapping = read_mapping_collections(start=24, end=39, return_dict=True)


    def __init__(self):
        self.datasource_name = "sgbi_silver_prod.sapbw_queries.sdc_sout"  # default
        self.datacatalog_dropdown = widgets.Dropdown(options=[self.datasource_name.split('.')[0]], value=self.datasource_name.split('.')[0], description='Catalog') #temporary
        self.dataschema_dropdown = widgets.Dropdown(options=[self.datasource_name.split('.')[1]], value=self.datasource_name.split('.')[1], description='Schema') #temporary
        self.dataset_dropdown = widgets.Dropdown(options=[self.datasource_name.split('.')[2]], value=self.datasource_name.split('.')[2], description='Dataset') #temporary
        self.success_message = widgets.Label(value="Expectation was successfully added!")
        self.column_dropdown = self.create_column_dropdown()
        self.expectations_dropdown = self.create_expectations_dropdown()
        self.add_button = widgets.Button(description='Add', button_style='success')
        self.add_button.on_click(self.add_button_clicked)
        self.table_expectations_dropdown = self.create_table_expectations_dropdown()
        self.widget_box0 = widgets.VBox([
            widgets.HTML("<h3>Select DataSource:</h3>"), 
            self.datacatalog_dropdown,
            self.dataschema_dropdown,
            self.dataset_dropdown  
        ])
        self.widget_box1 = widgets.VBox([
            widgets.HTML("<h3>Select Column Map Expectation:</h3>"),
            self.column_dropdown,
            self.expectations_dropdown,
            self.add_button        
        ])
        self.widget_box2 = widgets.VBox([
            widgets.HTML("<h3>Select Table Expectation:</h3>"),
            self.table_expectations_dropdown,
            # self.expectations_dropdown,
            self.add_button
        ])
        self.widget_box1.layout.width = '800px'
        self.config_file = self.remove_duplicate_expectations({})
        display(self.widget_box0)
        display(self.widget_box1)
        display(self.widget_box2)
 
    def set_datasource_name(self, datasource_name):
        self.datasource_name = datasource_name
        self.update_column_dropdown()
    
    def get_dtype(self, col_name):
        return dict(spark.sql(f'''select * from {self.datasource_name} ''').dtypes)[col_name]

    def create_column_dropdown(self):
        columns_df = spark.sql(f"""SHOW COLUMNS IN {self.datasource_name}""")
        columns_list = [row.col_name for row in columns_df.collect()]
        column_dropdown = widgets.Dropdown(options=columns_list, value=columns_list[0], description='Column name:')
        column_dropdown.observe(self.column_dropdown_event, names='value')
        return column_dropdown
    
    def update_column_dropdown(self):
        columns_df = spark.sql(f"""SHOW COLUMNS IN {self.datasource_name}""")
        columns_list = [row.col_name for row in columns_df.collect()]
        self.column_dropdown.options = columns_list
        self.column_dropdown.value = columns_list[0]
        self.update_expectations_dropdown()

    def create_expectations_dropdown(self):
        expectations_dropdown = widgets.Dropdown(description='Expectations:', style={'description_width': 'initial'})
        return expectations_dropdown                              
    
    def update_expectations_dropdown(self):
        selected_column = self.column_dropdown.value
        if 'string' in self.get_dtype(selected_column):
            self.expectations_dropdown.options = self.string_expectations
        elif 'decimal' in self.get_dtype(selected_column):
            self.expectations_dropdown.options = self.numeric_expectations

    def column_dropdown_event(self, change):       
        self.update_expectations_dropdown()

    def add_button_clicked(self, _):
        selected_column = self.column_dropdown.value
        selected_expectation = self.expectations_dropdown.value
        display(HTML(f"<p style='color:green;'>Added: Column={selected_column}, Expectation={selected_expectation}</p>"))

    def create_table_expectations_dropdown(self):
        table_expectations_dropdown = widgets.Dropdown(options=self.table_expectations, description='Table Expectations:', style={'deccription_width': 'initial'})
        return table_expectations_dropdown     
    
    def create_config_file(self, selected_expectation):
        config = {
            "expectation_type": self.content_mapping[selected_expectation],
            "kwargs": {},
            "meta": {
                "notes": {
                    "format": "markdown",
                    "content": f"{selected_expectation}"
                }
            }
        }
        return config
    
    
    def remove_duplicate_expectations(self, input_dict):
            unique_expectations = {}  
            result_dict = {}          
            
            for key, expectation in input_dict.items():
                expectation_content = json.dumps(expectation, sort_keys=True)
                
                if expectation_content not in unique_expectations:
                    unique_expectations[expectation_content] = key
                    result_dict[key] = expectation
            
            return result_dict


    def add_button_clicked(self, _):
        selected_column = self.column_dropdown.value
        selected_expectation = self.expectations_dropdown.value
        key='expectation_1'
        if key not in self.config_file.keys():
            self.config_file[key] = self.create_config_file(selected_expectation)
            self.config_file = self.remove_duplicate_expectations(self.config_file)
        else:
            i=2
            key=f'''expectation_{i}'''
            while key in self.config_file.keys():
                i+=1
                key=f'''expectation_{i}'''             
            self.config_file[key] = self.create_config_file(selected_expectation) 
            self.config_file = self.remove_duplicate_expectations(self.config_file)
           
        print(self.remove_duplicate_expectations(self.config_file))
        display(HTML(f"<p style='color:green;'>Added: Column={selected_column}, Expectation={selected_expectation}</p>"))
        display(HTML(f"<p style='color:green;'>{self.success_message}</p>"))

dbutils.widgets.removeAll()

widget = ExpectationSettingsWidget()
widget.set_datasource_name("sgbi_silver_prod.sapbw_queries.sdc_sout")


VBox(children=(HTML(value='<h3>Select DataSource:</h3>'), Dropdown(description='Catalog', options=('sgbi_silve…

VBox(children=(HTML(value='<h3>Select Column Map Expectation:</h3>'), Dropdown(description='Column name:', opt…

VBox(children=(HTML(value='<h3>Select Table Expectation:</h3>'), Dropdown(description='Table Expectations:', o…

In [0]:
widget.config_file

Out[13]: {'expectation_1': {'expectation_type': 'expect_column_values_to_not_match_regex',
  'kwargs': {},
  'meta': {'notes': {'format': 'markdown',
    'content': 'Expect the column entries to be strings that do NOT match a given regular expression.'}}},
 'expectation_2': {'expectation_type': 'expect_column_values_to_not_match_like_pattern_list',
  'kwargs': {},
  'meta': {'notes': {'format': 'markdown',
    'content': 'Expect the column entries to be strings that do NOT match any of a provided list of like pattern expressions.'}}},
 'expectation_3': {'expectation_type': 'expect_column_values_to_be_null',
  'kwargs': {},
  'meta': {'notes': {'format': 'markdown',
    'content': 'Expect the column values to be null.'}}}}

In [0]:
widget.success_message

Label(value='Expectation was successfully added!')