#### Enter desired Lakehouse Name and Workspace Name to Optimize

In [None]:
# UsersProperty
workspace_name = 'FeatureOnyxToolsII'
lakehouse_name = "TEST00005"

print(f"Lakehouse to mount: {lakehouse_name}")
print(f"Workspace : {workspace_name}")

In [None]:
%run NB - Load Configuration

In [None]:
import notebookutils
from notebookutils import mssparkutils
from pyspark.sql import SparkSession

In [None]:
%run Fabric_utils

In [None]:
workspace_id = get_workspace_id(workspace_name)
lakehouse_id = get_lakehouse_id(workspace_name, lakehouse_name)


In [None]:

storage_url = f"abfss://{workspace_id}@onelake.dfs.fabric.microsoft.com/{lakehouse_id}"
mount_point = f"/{lakehouse_name}"  

notebookutils.fs.mount(storage_url, mount_point)

mount_info = next((m for m in notebookutils.fs.mounts() if m.mountPoint == mount_point), None)

if mount_info:
    localPath = mount_info.localPath  
    print(f"localPath = {localPath}")

else:
    raise ValueError(f"Mount point {mount_point} not found!")


#### TableSize Analysis

In [None]:
distinct_folder=mssparkutils.notebook.run("NB - TableSizeAnalysis",500,{
    "beforeAfter": "before",
    "lakehouse_name": lakehouse_name,
    "lakehouse_path": localPath
})

In [None]:
mssparkutils.notebook.run("NB - performance baseline",500,{
    "specificpath":f"{distinct_folder}/before",
    "lakehouse_name": lakehouse_name,
    "lakehouse_id" : lakehouse_id,
    "workspace_id" : workspace_id
})

#### Exclude shortcuts from maintenance

In [None]:
is_parent_call = True

In [None]:
%run "NB - Shortcuts Identification"

In [None]:
shortcut_names = fetch_shortcut_names(workspace_id, lakehouse_id)
display(shortcut_names)

#### Maintainance

In [None]:
from delta.tables import *
from datetime import datetime
import pandas as pd

spark.conf.set("spark.databricks.delta.retentionDurationCheck.enabled", "false")

tables_base_path = f"file:{localPath}/Tables/"

tables_info = mssparkutils.fs.ls(tables_base_path)

table_names_df = pd.DataFrame([
    {
        "table_name": t.name.strip('/'),
        "table_path": tables_base_path + t.name.strip('/')
    }
    for t in tables_info
])

all_tables = table_names_df["table_name"].tolist()

shortcuts_list = shortcut_names["name"].tolist()

real_tables_df = table_names_df[~table_names_df["table_name"].isin(shortcuts_list)].reset_index(drop=True)

display(real_tables_df)

# 🧹 
def cleanTables(table_name, table_path):
    retention = 48  
    print(f"\n🔄 Optimizing and vacuuming: {table_name}")
    spark.sql(f"OPTIMIZE delta.`{table_path}` VORDER")
    spark.sql(f"VACUUM delta.`{table_path}` RETAIN {retention} HOURS")
    print(f"✅ Completed: {table_name}")

# 🔁 
for _, row in real_tables_df.iterrows():
    cleanTables(row["table_name"], row["table_path"])


In [None]:
mssparkutils.notebook.run("NB - TableSizeAnalysis",500,{
     "beforeAfter": "after", 
     "distinct_folder": distinct_folder,
    "lakehouse_name": lakehouse_name,
    "lakehouse_path": localPath
})