# 📌 Attach Default Lakehouse
❗**Note the code in the cell that follows is required to programatically attach the lakehouse and enable the running of spark.sql(). If this cell fails simply restart your session as this cell MUST be the first command executed on session start.**

In [None]:
%%configure
{
    "defaultLakehouse": {  
        "name": "{{lakehouse_name}}",
    }
}

# 📦 Pip
Pip installs reqired specifically for this template should occur here

In [None]:
!pip install jsonpickle
!pip install tabulate

# 🔗 Imports

In [None]:
from notebookutils import mssparkutils # type: ignore
from dataclasses import dataclass
import jsonpickle # type: ignore
import pandas as pd # type: ignore
from tabulate import tabulate # type: ignore
import os
import json

# #️⃣ Functions

In [None]:
@dataclass
class NotebookResult:    
    notebook: str
    start_time: float
    status: str
    error: str
    execution_time: float
    run_order: int
    
@dataclass
class FileListing:
    """Class for Files - Attributes: name, directory"""
    name: str
    directory: str

def get_file_content_using_notebookutils(file):
    """Get the content of a file using notebookutils."""
    #return self.mssparkutils.fs.head(file, 1000000000)
    data = spark.sparkContext.wholeTextFiles(file).collect() # type: ignore

    # data is a list of tuples, where the first element is the file path and the second element is the content of the file
    file_content = data[0][1]

    return file_content

def remove_file_using_notebookutils(file):
    """Remove a file using notebookutils."""
    try:
        mssparkutils.fs.rm(file, True)
    except:
        pass


def create_path_using_notebookutils(path):
    """Create a path using notebookutils."""
    mssparkutils.fs.mkdirs(path)

def walk_directory_using_notebookutils(path):
    """Walk a directory using notebookutils."""
    # List the files in the directory
    files = mssparkutils.fs.ls(path)

    # Initialize the list of all files
    all_files = []

    # Iterate over the files
    for file in files:
        # If the file is a directory, recursively walk the directory
        if file.isDir:
            all_files.extend(
                walk_directory_using_notebookutils(file.path))
        else:
            # If the file is not a directory, add it to the list of all files
            directory = os.path.dirname(file.path)
            name = file.name
            all_files.append(FileListing(
                name=name, directory=directory))

    return all_files

# 🔒 Embed HASH information 

In [None]:
embedded_hashes = {{ hashes }}
RelativePathForMetaData = "Files/MetaExtracts/"
current_hashes = json.loads(get_file_content_using_notebookutils(RelativePathForMetaData + 'MetaHashes.json'))

def get_hash(file, hashes):
    ret = ""
    for h in hashes:
        if(h['file'] == file):
            return h['hash']
    return ret
        
if current_hashes != embedded_hashes:
    for h in embedded_hashes:
        print(
                h['file'] + '\n \t Emb Hash: ' + get_hash(h['file'], embedded_hashes) + '\n \t Env Hash: ' + get_hash(h['file'], current_hashes)
        )
    raise Exception('Hashes do not match. Please re-generate the dbt project using the latest extract of the target environment metadata.')
else:
    print('Metadata Hashes Match 😏')

# Prepare

In [None]:
log_location = 'Files/Control/'
create_path_using_notebookutils(log_location)
remove_file_using_notebookutils(log_location+'dbt_execution.log')


# Executions for Each Run Order Below:

# Execution Report

In [None]:
#Check if file exists
log_location = 'Files/Control/'
create_path_using_notebookutils(log_location)
files = walk_directory_using_notebookutils(log_location)
failed_results = []

if len([f for f in files if f.name == 'dbt_execution.log']) > 0:
    # Read the file
    contents = get_file_content_using_notebookutils(log_location + 'dbt_execution.log')
    
    # Deserialize the JSON string into a list of dictionaries
    results_dict = jsonpickle.decode(contents)

    # Convert each dictionary to a NotebookResult object
    results = [NotebookResult(**result) for result in results_dict]

    # Create a DataFrame from the results
    df = pd.DataFrame([(result.notebook, result.status) for result in results], columns=['Notebook', 'Status'])

    # Convert the DataFrame to a pretty text-based table
    table = tabulate(df, headers='keys', tablefmt='psql')

    # Print the table
    print(table)