# **Data Warehouse Automation (DWA) Setup in Microsoft Fabric**

- Modify the parameter values as needed.
- Execute each cell sequentially to ensure proper setup.
- Manually verify the created objects within the Fabric Workspace.

In [2]:
lakehouse_name = "LH"            #Lakehouse name e.g. Supplychain_silver, Finance_bronze, Sales_lh 
warehouse_name = "DW"            #Warehouse name e.g. Operations_DW, Supplychain_DW
metadata_db_name = "Meta"        #SQL Database name to store metadata information for the framework
lakehouse_schema_enabled = True  #If False then lakehouse and warehouse objects will need to be created manually
warehouse_case_sensitive = False #To make data warehouse case sensitive, default is False
deploy_aw = True                 #To deploy AdventureWorks files and objects set to True

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 4, Finished, Available, Finished)

In [3]:
import struct
import sqlalchemy
from sqlalchemy.sql import text
from notebookutils import mssparkutils
import sempy.fabric as fabric
import base64
from azure.core.credentials import AccessToken
from azure.storage.filedatalake import DataLakeServiceClient
from azure.identity import DefaultAzureCredential
import os
import pyodbc
import shutil
from git import Repo
import requests
import json
import fnmatch
import time
from enum import Enum
from sqlalchemy.engine import Engine

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 5, Finished, Available, Finished)

### 1. Create Lakehouse

In [4]:
workspace_id = fabric.get_workspace_id()
workspace_name = fabric.resolve_workspace_name()

BASE_URL = "https://api.fabric.microsoft.com/v1"
access_token = mssparkutils.credentials.getToken('pbi')

headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

lakehouse_url = f"{BASE_URL}/workspaces/{workspace_id}/lakehouses"
payload = {
    "displayName": f"{lakehouse_name}", 
    "description": "A schema-enabled lakehouse.",
    "creationPayload": {"enableSchemas": f"{lakehouse_schema_enabled}"} 
}
response = requests.post(lakehouse_url, headers=headers, json=payload)

if response.status_code == 400:
    print(f"Lakehouse {lakehouse_name} already exists. No change was made.")
elif response.status_code != 201:
    raise RuntimeError(f"Failed to create Lakehouse {lakehouse_name} : {response.status_code}, {response.text}")

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 6, Finished, Available, Finished)

Lakehouse LH already exists. No change was made.


### 2. Create Metadata SQL Database
#### **Warning:** Verify the SQL DB creation before executing the next cell.

In [5]:
payload = {
    "displayName": f"{metadata_db_name}",
    "type": "SQLDatabase",
    "description": "SQL Database to store metadata for Framework"
}

sqldb_url = f'{BASE_URL}/workspaces/{workspace_id}/items'
response = requests.post(sqldb_url, headers=headers, json=payload)

if response.status_code == 400:
    print(f"SQL DB {metadata_db_name} already exists. No change was made.")
else:
    time.sleep(300)                    #Sleep timer to wait for SQL DB creation
    if response.status_code not in [200, 201, 202]:
        raise RuntimeError(f"Failed to create database {metadata_db_name}: {response.status_code}, {response.text}")

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 7, Finished, Available, Finished)

SQL DB Meta already exists. No change was made.


### 3. Create Data Warehouse
#### **Warning:** Verify the Data Warehouse creation before executing the next cell.

In [6]:
if warehouse_case_sensitive == True:
    warehouse_collation = "Latin1_General_100_BIN2_UTF8"
else:
    warehouse_collation = "Latin1_General_100_CI_AS_KS_WS_SC_UTF8"

payload = {
    "displayName": f"{warehouse_name}",
    "type": "warehouse",
    "properties": {
        "collation": f"{warehouse_collation}"  
    }
}

dw_url = f'{BASE_URL}/workspaces/{workspace_id}/items'
response = requests.post(dw_url, headers=headers, json=payload)

if response.status_code == 400:
    print(f"Warehouse {warehouse_name} already exists. No change was made.")
else:
    time.sleep(300)                 #Sleep timer to wait for SQL DB creation
    if response.status_code not in [200, 201, 202]:
        raise RuntimeError(f"Failed to create warehouse {warehouse_name}: {response.status_code}, {response.text}")

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 8, Finished, Available, Finished)

Warehouse DW already exists. No change was made.


### 4. Upload Notebook Templates

In [14]:
repo_url = "https://github.com/ProdataSQL/DWA"
repo_dir = "DWA_repo"

def clone_git_repo(repo_url, repo_dir):
    if os.path.exists(repo_dir):
        print("Repo already cloned.")
    Repo.clone_from(repo_url, repo_dir)
    os.chdir(repo_dir)
    
clone_git_repo(repo_url, repo_dir)

def get_notebooks(directory, pattern):
    notebook_files = []
    for root, dirs, files in os.walk(directory):
        for file in fnmatch.filter(files, pattern):
            notebook_files.append(os.path.join(root, file))
    return notebook_files

def py_to_notebook(py_content, notebook_name):
    notebook_content = {
        "nbformat": 4,
        "nbformat_minor": 5,
        "cells": [
            {
                "cell_type": "code",
                "source": [py_content],
                "execution_count": None,
                "outputs": []
            }
        ],
        "metadata": {
            "language_info": {
                "name": "python"
            }
        }
    }
    return notebook_content

def upload_notebook(notebook_name, notebook_content, workspace_id, headers):
    notebook_json = json.dumps(notebook_content)
    notebook_base64 = base64.b64encode(notebook_json.encode('utf-8')).decode('utf-8')

    notebook_url = f"{BASE_URL}/workspaces/{workspace_id}/notebooks"
    
    payload = {
        "displayName": notebook_name,
        "description": f"Imported notebook {notebook_name}",
        "definition": {
            "format": "ipynb",
            "parts": [
                {
                    "path": "artifact.content.ipynb",
                    "payload": notebook_base64,
                    "payloadType": "InlineBase64"
                }
            ]
        }
    }

    fabric_response = requests.post(
        notebook_url,
        headers=headers,
        data=json.dumps(payload)
    )
    
    if fabric_response.status_code == 400:
        print(f"The {notebook_name} already exists. No changes were made")
    elif fabric_response.status_code not in [200, 201, 202]:
        raise RuntimeError(f"Failed to upload {notebook_name}: {fabric_response.status_code} - {fabric_response.text}")


directory = "Workspaces/DWA/"
pattern = "*.py"

notebook_files = get_notebooks(directory, pattern)

for notebook_file in notebook_files:
    directory, file_name = os.path.split(notebook_file)
    base_name = os.path.basename(directory)
    if base_name.endswith('.Notebook'):
        notebook_name = base_name.split('.')[0]
    
    with open(notebook_file, 'r') as file:
        py_content = file.read()
    
    notebook_content = py_to_notebook(py_content, notebook_name)
    
    upload_notebook(notebook_name, notebook_content, workspace_id=workspace_id, headers=headers)

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 16, Finished, Available, Finished)

The Extract-CSV already exists. No changes were made
The Refresh-Fabric already exists. No changes were made
The SharePoint-Shared-Functions already exists. No changes were made
The Ingest-SFTP already exists. No changes were made
The Extract-SP-Excel already exists. No changes were made
The Extract-Dictionary already exists. No changes were made
The Extract-XML already exists. No changes were made
The Demo - 10 Mins already exists. No changes were made
The Extract-CSV-Pandas already exists. No changes were made
The Extract-O365-API already exists. No changes were made
The Extract-Fabric-Logs already exists. No changes were made


### 5. Create Metadata and Data Warehouse SQL Objects

In [17]:
META_DB_NAME = metadata_db_name
DW_NAME = warehouse_name
original_dir = os.getcwd()
engine_pool = {}

class DBType(Enum):
    SQLDatabase = "SQLDatabases"
    Warehouse = "Warehouses"

    def get_connection_string(self) -> str: 
        display_name = DW_NAME if self == DBType.Warehouse else META_DB_NAME
        client = fabric.FabricRestClient()
        endpoint = f"/v1/workspaces/{workspace_id}/{self.value}"
        databases = client.get(endpoint).json()

        selected_database = next((db for db in databases.get("value", []) if db.get("displayName") == display_name), None)
        if not selected_database:
            raise ValueError(f"No {self.value} with displayName '{display_name}' found.")
        
        server = selected_database['properties'].get('serverFqdn') or selected_database['properties'].get('connectionString')
        database = selected_database['properties'].get('databaseName', warehouse_name)
            
        return f"Driver={{ODBC Driver 18 for SQL Server}};Server={server};database={database};LongAsMax=YES"

    def get_engine(self) -> Engine:
        if self not in engine_pool:
            token = mssparkutils.credentials.getToken('https://analysis.windows.net/powerbi/api').encode("UTF-16-LE")
            token_struct = struct.pack(f'<I{len(token)}s', len(token), token)
            engine_pool[self] = sqlalchemy.create_engine(
                "mssql+pyodbc://",
                creator=lambda: pyodbc.connect(self.get_connection_string(), attrs_before={1256: token_struct})
            )
        return engine_pool[self]
   
def read_table_order(file_path: str) -> list:
    if not os.path.exists(file_path):
        return []
    
    with open(file_path, "r") as f:
        return [line.strip() for line in f if line.strip()]

def process_sql_script(db_type: DBType, script_path: str):
    if not os.path.exists(script_path):
        return

    with open(script_path, "r") as file:
        script = file.read()

    statements = script.split("\nGO\n")
    
    object_name = script_path.split("/")[-1].split(".")[0]
    db_name = script_path.split("/")[0].replace(".", " ")
    schema_name = script_path.split("/")[1]

    if "Tables/" in script_path:
        check_query = f"SELECT COUNT(*) FROM sys.tables WHERE name = '{object_name}' AND schema_id = SCHEMA_ID('{schema_name}')"
    elif "Views/" in script_path:
        check_query = f"SELECT COUNT(*) FROM sys.views WHERE name = '{object_name}' AND schema_id = SCHEMA_ID('{schema_name}')"
    elif "StoredProcedures/" in script_path:
        check_query = f"SELECT COUNT(*) FROM sys.procedures WHERE name = '{object_name}' AND schema_id = SCHEMA_ID('{schema_name}')"
    elif "Security/" in script_path:  
        check_query = f"SELECT COUNT(*) FROM sys.schemas WHERE name = '{object_name}'"
    else:
        check_query = f"SELECT COUNT(*) FROM sys.schemas WHERE name = '{schema_name}'"
    
    with db_type.get_engine().connect() as conn:
        object_exists = conn.execute(text(check_query)).scalar() > 0

        if not object_exists:
            try:
                for statement in filter(None, map(str.strip, statements)):
                    conn.execute(text(statement))
                conn.commit()
            except Exception as e:
                conn.rollback()
                print(f"Failed to create {schema_name}.{object_name}: {e}")
                raise
        else:
            print(f"The object {schema_name}.{object_name} already exists in {db_name}. No changes made.")

def iterate_sql_objects(db_type: DBType, object_type: str, objects_created: set):
    base_path = "Meta.SQLDatabase" if db_type == DBType.SQLDatabase else "DW.Warehouse"
    visited_dirs = set()
    
    for root, _, files in os.walk(base_path):
        if root in visited_dirs:
            continue
        visited_dirs.add(root)
        
        for file in filter(lambda f: f.endswith(".sql"), files):
            script_path = os.path.join(root, file).lstrip("./")
           
            if script_path in objects_created:
                continue

            parts = script_path.split(os.sep)
            if object_type == "Schema":
                if any(folder in parts for folder in ["Tables", "Views", "StoredProcedures"]):
                    continue  
            else:
                if object_type not in parts:
                    continue  

            process_sql_script(db_type, script_path)
            objects_created.add(script_path)

def process_sql_objects(db_type: DBType, obj_type: str = None):
    table_orders_cache = {}
    has_changed_directory = False 
    current_dir = os.path.abspath(os.getcwd())

    table_order_files = {
        DBType.SQLDatabase: os.path.join(current_dir, "Setup", "Files", "SQLDatabase", "MetaTableOrder.txt"),
        DBType.Warehouse: os.path.join(current_dir, "Setup", "Files", "Warehouse", "DWTableOrder.txt")
    }
    
    for db_type_key, file_path in table_order_files.items():        
        if os.path.exists(file_path):
            table_orders_cache[db_type_key] = read_table_order(file_path)

    table_order = table_orders_cache.get(db_type, [])

    if not has_changed_directory:
        workspaces_dwa_path = os.path.join(current_dir, "Workspaces", "DWA")
        
        if os.path.exists(workspaces_dwa_path):
            os.chdir(workspaces_dwa_path)
            has_changed_directory = True 
    
    objects_created = set()
    base_path = "Meta.SQLDatabase" if db_type == DBType.SQLDatabase else "DW.Warehouse"
    
    for table in table_order:
        schema = table.split(".", maxsplit=1)[0].strip("[]")
        table_name = table.split(".", maxsplit=1)[1].strip("[]")
        script_path = f"{base_path}/{schema}/{obj_type}/{table_name}.sql"
            
        if os.path.exists(script_path) and script_path not in objects_created:
            process_sql_script(db_type, script_path)
            objects_created.add(script_path)
    
    iterate_sql_objects(db_type, obj_type, objects_created)
    os.chdir(original_dir)

#Create meta databse objects
process_sql_objects(DBType.SQLDatabase, "Security")
process_sql_objects(DBType.SQLDatabase, "Tables")
process_sql_objects(DBType.SQLDatabase, "Views")
process_sql_objects(DBType.SQLDatabase, "StoredProcedures") 

#Create warehouse objects
process_sql_objects(DBType.Warehouse, "Schema")
process_sql_objects(DBType.Warehouse, "StoredProcedures")

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 19, Finished, Available, Finished)

The object Security.audit already exists in Meta SQLDatabase. No changes made.
The object Security.config already exists in Meta SQLDatabase. No changes made.
The object Security.devops already exists in Meta SQLDatabase. No changes made.
The object dbo.dict_artefacts already exists in Meta SQLDatabase. No changes made.
The object config.Configurations already exists in Meta SQLDatabase. No changes made.
The object config.Datasets already exists in Meta SQLDatabase. No changes made.
The object config.edwTables already exists in Meta SQLDatabase. No changes made.
The object config.edwTableJoins already exists in Meta SQLDatabase. No changes made.
The object config.Templates already exists in Meta SQLDatabase. No changes made.
The object config.PackageGroups already exists in Meta SQLDatabase. No changes made.
The object config.PackageGroupLinks already exists in Meta SQLDatabase. No changes made.
The object config.PackageGroupTables already exists in Meta SQLDatabase. No changes made.
T

### 6. Upload Data Pipelines

In [18]:
# Stores new references for updating old data pipelines connections 
lh_artifact_id = fabric.resolve_item_id(lakehouse_name, "Lakehouse", workspace_id) #Lakehouse ID

meta_artifact_id = fabric.resolve_item_id(metadata_db_name, "SqlDatabase", workspace_id) #SQL DB ID and Connection String

sqldb_url = f'{BASE_URL}/workspaces/{workspace_id}/SQLDatabases/{meta_artifact_id}'
response = requests.get(sqldb_url, headers=headers)

meta_endpoint = response.json().get("properties", {}).get("serverFqdn")
meta_databasename = response.json().get("properties", {}).get("databaseName")

dw_artifact_id = fabric.resolve_item_id(warehouse_name, "Warehouse", workspace_id) #Warehouse ID and Connection String

dw_url = f'{BASE_URL}/workspaces/{workspace_id}/warehouses/{dw_artifact_id}'
response = requests.get(dw_url, headers=headers)

dw_endpoint = response.json().get("properties", {}).get("connectionString")

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 20, Finished, Available, Finished)

In [19]:
access_token = mssparkutils.credentials.getToken('pbi') #Rebuild access token 

headers = {
    "Authorization": f"Bearer {access_token}",
    "Content-Type": "application/json"
}

class DataPipeline:
    pass

class DataPipeline:
    name: str
    git_id: str
    real_id: str
    raw_definition: str
    definition: object
    pipeline_created: bool = False

    def __init__(self, name, git_id, raw_definition, definition):
        self.name = name
        self.git_id = git_id
        self.definition = definition
        self.raw_definition = raw_definition
    def __hash__(self) -> int:
        return hash(self.git_id)
    def __eq__(self, other):
        return self.git_id == other.git_id

    def __str__(self):
        return f"DataPipeline: {self.name}"

    def __repr__(self):
        return f"DataPipeline: {self.name}"

    def is_created(self) -> bool:
        return self in pipelines_created or self.pipeline_created

    def create(self, pipelines):
        global pipelines_created
        if self.is_created():
            print(f"{self.name} already created")
            return

        print(f"Creating pipeline {self.name}")

        child_pipelines: list[DataPipeline] = self.get_child_pipelines(pipelines)

        for child_pipeline in child_pipelines:
            if child_pipeline.is_created():
                continue
            child_pipeline.create(pipelines)

        self.update_activity_references()
        self.upload_pipeline()

        print(f"{self.name} created and uploaded.")

    def get_child_pipelines(self, pipelines : list[DataPipeline]) -> list[DataPipeline]:
        self.child_pipelines: set[DataPipeline] = set()

        def find_pipelines(activities : list [object]):
            for activity in activities:
                activity_type = activity.get("type")
                type_properties = activity.get("typeProperties", {})

                if activity_type == "ExecutePipeline":
                    reference_name = type_properties.get("pipeline", {}).get(
                        "referenceName"
                    )
                    matched_pipeline = next(
                        (x for x in pipelines if x.git_id == reference_name), None
                    )
                    if matched_pipeline:
                        self.child_pipelines.add(matched_pipeline)

                elif activity_type == "InvokePipeline":
                    pipeline_id = type_properties.get("pipelineId")
                    matched_pipeline = next(
                        (x for x in pipelines if x.git_id == pipeline_id), None
                    )
                    if matched_pipeline:
                        self.child_pipelines.add(matched_pipeline)
                    
                else:
                    for key in ["activities", "ifTrueActivities", "ifFalseActivities"]:
                        if key in type_properties:
                            find_pipelines(type_properties[key])
            return self.child_pipelines

        return find_pipelines(
            self.definition.get("properties", {}).get("activities", [])
        )

    def update_activity_references(self):
        global pipelines_created

        def fetch_trident_notebooks():
            notebooks_url = f"{BASE_URL}/workspaces/{workspace_id}/notebooks"
            response = requests.get(notebooks_url, headers=headers)
            response.raise_for_status()
            return {notebook["displayName"]: notebook["id"] for notebook in response.json().get("value", [])}

        trident_notebooks = fetch_trident_notebooks()

        new_definition = self.raw_definition
        for pipeline in pipelines_created: 
            new_definition = new_definition.replace(pipeline.git_id, pipeline.real_id)

        definition_json = json.loads(new_definition)

        def replace_values(obj):
            if isinstance(obj, dict):
                for key, value in obj.items():
                    if key == "type" and isinstance(value, str):
                        if "Lakehouse" in value:
                            if "typeProperties" in obj and isinstance(obj["typeProperties"], dict) :
                                if "artifactId" in obj["typeProperties"]  and not obj["typeProperties"]["artifactId"].startswith("@"):
                                    obj["typeProperties"]["artifactId"] = lh_artifact_id  

                        elif "DataWarehouse" in value:
                            if "typeProperties" in obj and isinstance(obj["typeProperties"], dict):
                                if "artifactId" in obj["typeProperties"] and not obj["typeProperties"]["artifactId"].startswith("@"):
                                    obj["typeProperties"]["artifactId"] = dw_artifact_id 
                            if "endpoint" in obj and not obj["endpoint"].startswith("@"):
                                obj["endpoint"] = dw_endpoint 

                        elif "FabricSqlDatabase" in value:
                            if "typeProperties" in obj and isinstance(obj["typeProperties"], dict):
                                if "artifactId" in obj["typeProperties"] and not obj["typeProperties"]["artifactId"].startswith("@"):
                                    obj["typeProperties"]["artifactId"] = meta_artifact_id  
                            if "endpoint" in obj and not obj["endpoint"].startswith("@"):
                                obj["endpoint"] = meta_endpoint  

                        elif "TridentNotebook" in value:
                            notebook_name = obj.get("name")
                            if notebook_name in trident_notebooks:
                                obj["typeProperties"]["notebookId"] = trident_notebooks[notebook_name]

                    elif key == "workspaceId" and isinstance(value, str) and not value.startswith("@"):
                        obj[key] = workspace_id  

                    replace_values(value)

            elif isinstance(obj, list):
                for item in obj:
                    replace_values(item)

        replace_values(definition_json)

        self.raw_definition = json.dumps(definition_json, indent=4)

    def upload_pipeline(self):
        global pipelines_created
        pipelines_url = f"{BASE_URL}/workspaces/{workspace_id}/dataPipelines"
        pipeline_b64 = base64.b64encode(self.raw_definition.encode()).decode()

        payload = {
            "displayName": self.name,
            "definition": {
                "parts": [
                    {
                        "path": "pipeline.content.json",
                        "payload": pipeline_b64,
                        "payloadType": "InlineBase64",
                    }
                ]
            },
        }

        create_pipeline_request = requests.post(pipelines_url, headers=headers, data=json.dumps(payload))
        if not create_pipeline_request.ok:
            print(create_pipeline_request.json())
            raise RuntimeError(f"Failed to create pipeline {self.name}. ({create_pipeline_request.text})")
        
        self.real_id = create_pipeline_request.json().get("id")
        self.pipeline_created = True
        pipelines_created.append(self)
    
pipelines: list[DataPipeline] = []

for path, dirs, files in os.walk("."):
    if not path.endswith(".DataPipeline"):
        continue

    pipeline_name = path.strip(".").split(".")[0].replace("\\", "/").split("/")[-1]
    platform = json.load(open(os.path.join(path, ".platform")))
    raw_definition = open(os.path.join(path, "pipeline-content.json")).read()
    definition = json.loads(raw_definition)
    pipeline_id = platform["config"]["logicalId"]

    data_pipeline = DataPipeline(pipeline_name, pipeline_id, raw_definition, definition)
    pipelines.append(data_pipeline)

pipelines_url = f"{BASE_URL}/workspaces/{workspace_id}/dataPipelines"
response = requests.get(pipelines_url, headers=headers)
response.raise_for_status()

pipelines_created: list[DataPipeline] = []

pipelines_existing: list[(str, str)] = [
    (pipeline["displayName"], pipeline["id"])
    for pipeline in response.json().get("value", [])
]

for pipeline_name, pipeline_id in pipelines_existing:
    pipeline = next((x for x in pipelines if x.name == pipeline_name), None)
    if not pipeline:
        continue

    pipeline.real_id = pipeline_id
    pipeline.pipeline_created = True
    pipelines_created.append(pipeline)

for pipeline in pipelines:
    if pipeline in pipelines_created or pipeline.pipeline_created:
        continue
    pipeline.create(pipelines)

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 21, Finished, Available, Finished)

Creating pipeline TR_ConfigBackup
Creating pipeline Backup_ConfigSchema
Creating pipeline Environment-Variables
Environment-Variables created and uploaded.
Backup_ConfigSchema created and uploaded.
TR_ConfigBackup created and uploaded.
Creating pipeline Copy-Meta
Copy-Meta created and uploaded.
Creating pipeline Copy-SFTP-Prodata
Copy-SFTP-Prodata created and uploaded.
Creating pipeline TR_Ops
TR_Ops created and uploaded.
Creating pipeline Pipeline-Worker
Pipeline-Worker created and uploaded.
Creating pipeline Pipeline-Controller
Pipeline-Controller created and uploaded.
Creating pipeline Restore_ConfigSchema
Restore_ConfigSchema created and uploaded.
Creating pipeline TR_AdventureWorks
TR_AdventureWorks created and uploaded.


### 7. Create objects for Adventure Works when deploy_aw is set to True

In [21]:
if deploy_aw == False:
    mssparkutils.notebook.exit(1)

class CustomTokenCredential:
    def get_token(self, *scopes, **kwargs):
        return AccessToken(notebookutils.credentials.getToken('storage'), expires_on=9999999999)

credential = CustomTokenCredential()
service_client = DataLakeServiceClient(account_url=f"https://onelake.dfs.fabric.microsoft.com", credential=credential)
fs = service_client.get_file_system_client(workspace_name)

lh_paths = {
    "tmp/landing/": f"{lakehouse_name}.Lakehouse/Files/landing",
    "tmp/Tables/": f"{lakehouse_name}.Lakehouse/Tables"
}

def unpack_files(Setup_dir, archives):
    clone_git_repo(repo_url, repo_dir)
    for archive, target in archives.items():
        shutil.unpack_archive(os.path.join(Setup_dir, archive), target, "zip")

def upload_files(local_path, azure_path):
    for root, _, files in os.walk(local_path):
        for file in files:
            file_path_on_local = os.path.join(root, file)
            relative_path = os.path.relpath(root, local_path)
            file_path_on_azure = os.path.join(azure_path, relative_path, file).replace("\\", "/")
            file_client = fs.get_file_client(file_path_on_azure)
            with open(file_path_on_local, "rb") as data:
                file_client.upload_data(data, overwrite=True)

git_lh_directory = "Setup/Files/Lakehouse"
archives = {
    'AW_landing.zip': "tmp/landing",
    'AW_tables.zip': "tmp/Tables"
}

unpack_files(git_lh_directory, archives)
for local, azure in lh_paths.items():
    upload_files(local, azure)

#Deploy AW Warehouse Objects 
process_sql_objects(DBType.Warehouse, "Tables")
process_sql_objects(DBType.Warehouse, "Views")

StatementMeta(, 0b18a74d-7148-4609-86a1-cdda146cc5b1, 23, Finished, Available, Finished)

The object aw.DimCurrency already exists in DW Warehouse. No changes made.
The object aw.DimDepartmentGroup already exists in DW Warehouse. No changes made.
The object aw.DimScenario already exists in DW Warehouse. No changes made.
The object aw.DimDate already exists in DW Warehouse. No changes made.
The object aw.ReportAccountMap already exists in DW Warehouse. No changes made.
The object aw.FactFinance already exists in DW Warehouse. No changes made.
The object aw.DimAccount already exists in DW Warehouse. No changes made.
The object aw.DimOrganization already exists in DW Warehouse. No changes made.
