In [None]:
# %pip install snowflake-connector-python==3.14.0 --quiet
# %pip install msfabricpysdkcore --quiet

In [None]:
from msfabricpysdkcore import FabricClientCore
try:
    import sempy.fabric as fabric
    local = False
except:
    local = True
    from azure.identity import DefaultAzureCredential, AzureCliCredential
    local_credentials = AzureCliCredential()
import pandas as pd
import snowflake.connector
from azure.storage.filedatalake import DataLakeServiceClient, FileSystemClient, DataLakeDirectoryClient
import os
from time import sleep
import pyodbc
import struct
from azure.identity import ClientSecretCredential
import requests

In [None]:
sql_database_workspace_id = None #"dafsd84f7"
mirrored_db_workspace_id = None #"dafsd84f7"

mirrored_db_id = None #"dafsd84f7"

# If you are running this notebook locally, please set the workspace ID to the one you are using in your Fabric workspace.
# If you are running this notebook in Fabric, please leave the workspace ID as None.
# The code will automatically get the workspace ID of the workspace you are using for this notebook.

if sql_database_workspace_id is None:
    sql_database_workspace_id = fabric.get_workspace_id()
if mirrored_db_workspace_id is None:
    mirrored_db_workspace_id = fabric.get_workspace_id()

new_entries = [{"workspace_id": mirrored_db_workspace_id,
                "mirrored_db_id": mirrored_db_id,
                "snowflake_db": "STREAMTEST",
                "snowflake_schema": "STREAMTESTSCHEMA",
                "view_name": "CUSTOMERS_PER_STOCKITEMKEY2",
                "keyvault": "openmirroring"}
             ]   

sql_database_name="metadatamirroring"

In [None]:
fcc = FabricClientCore()
sql_db = fcc.get_sql_database(workspace_id=sql_database_workspace_id, sql_database_name=sql_database_name)

In [None]:
server = sql_db.properties["serverFqdn"][:-5]
databasename = sql_db.properties["databaseName"]
database = "{" + f"{databasename}" +"}"

In [None]:
# Get Azure token using DefaultAzureCredential
if local:
    token_bytes = local_credentials.get_token("https://database.windows.net/.default").token.encode("UTF-16-LE")
else:
    token_bytes = notebookutils.credentials.getToken("https://database.windows.net/.default").encode("UTF-16-LE")
token_struct = struct.pack(f'<I{len(token_bytes)}s', len(token_bytes), token_bytes)
SQL_COPT_SS_ACCESS_TOKEN = 1256  # This connection option is defined by microsoft in msodbcsql.h

# Connection parameters
connection_string = f"Driver={{ODBC Driver 18 for SQL Server}};Server={server};Database={database};"

for new_entry in new_entries:
    workspace_id = new_entry["workspace_id"]
    mirrored_db_id = new_entry["mirrored_db_id"]
    snowflake_db = new_entry["snowflake_db"]
    snowflake_schema = new_entry["snowflake_schema"]
    view_name = new_entry["view_name"]
    identifiers = new_entry.get("identifiers", None)
    keyvault = new_entry["keyvault"]

    if identifiers is not None:
        value_str = f"'{workspace_id}', '{mirrored_db_id}', '{snowflake_db}', '{snowflake_schema}', '{view_name}', '{identifiers}', '{keyvault}'"
    else:
        value_str = f"'{workspace_id}', '{mirrored_db_id}', '{snowflake_db}', '{snowflake_schema}', '{view_name}', NULL, '{keyvault}'"

    query = f"""INSERT INTO [dbo].[Metadata] (
        [workspace_id],
        [mirrored_db_id],
       [snowflake_db],
        [snowflake_schema],
        [view_name],
        [identifiers],
        [keyvault]
    ) VALUES (
        {value_str}
    );
    """
    conn = pyodbc.connect(connection_string, attrs_before={SQL_COPT_SS_ACCESS_TOKEN: token_struct})
    cursor = conn.cursor()

    cursor.execute(query + "COMMIT;")

    cursor.close()
    conn.close()

# Connect with Entra ID (Azure AD) token
conn = pyodbc.connect(connection_string, attrs_before={SQL_COPT_SS_ACCESS_TOKEN: token_struct})
cursor = conn.cursor()

# Test the connection
cursor.execute("SELECT * FROM Metadata WHERE STATUS = 'not initialized' ORDER BY ID DESC;")
rows = cursor.fetchall()

column_names = [column[0] for column in cursor.description]

# Close the connection
cursor.close()
conn.close()
df = pd.DataFrame.from_records(rows, columns=column_names)

if df["ID"].any():
    count = df["ID"].count()
    print(f"Initializing {count} views")
    
else:
    print("Nothing to initialize")


In [None]:
def run_for_one_view(df):
    streamable = False

    if local:

        # Get the credentials by calling the API of the Key Vault 'https://{df["keyvault"]}.vault.azure.net/'
        def get_secret(secret_name):
            vaultBaseUrl = f"https://{df['keyvault']}.vault.azure.net"
            
            keyvault_token = local_credentials.get_token(vaultBaseUrl).token
            
            request_headers = {
                "Authorization": f"Bearer {keyvault_token}",
                "Content-Type": "application/json"
            }
            keyvault_url = f"{vaultBaseUrl}/secrets/{secret_name}?api-version=7.4"
            response = requests.get(keyvault_url, headers=request_headers)
            if response.status_code == 200:
                secret_value = response.json()["value"]
                return secret_value
            else:
                raise Exception(f"Failed to retrieve secret: {response.status_code} - {response.text}")
        mirroringspntenantid = get_secret('mirroringspntenantid')
        mirroringspnclientid = get_secret('mirroringspnclientid')
        mirroringspnclientsecret = get_secret('mirroringspnclientsecret')

        snowflakeaccount = get_secret('snowflakeaccount')
        snowflakeuser = get_secret('snowflakeuser')
        snowflakepassword = get_secret('snowflakepassword')



    else:
        mirroringspntenantid = notebookutils.credentials.getSecret(f'https://{df["keyvault"]}.vault.azure.net/', 'mirroringspntenantid')
        mirroringspnclientid = notebookutils.credentials.getSecret(f'https://{df["keyvault"]}.vault.azure.net/', 'mirroringspnclientid')
        mirroringspnclientsecret = notebookutils.credentials.getSecret(f'https://{df["keyvault"]}.vault.azure.net/', 'mirroringspnclientsecret')

        snowflakeaccount = notebookutils.credentials.getSecret(f'https://{df["keyvault"]}.vault.azure.net/', 'snowflakeaccount')
        snowflakeuser = notebookutils.credentials.getSecret(f'https://{df["keyvault"]}.vault.azure.net/', 'snowflakeuser')
        snowflakepassword = notebookutils.credentials.getSecret(f'https://{df["keyvault"]}.vault.azure.net/', 'snowflakepassword')

    credential = ClientSecretCredential(tenant_id=str(mirroringspntenantid),
                        client_id=mirroringspnclientid,
                        client_secret=mirroringspnclientsecret)
    mirrored_db_workspace_id = df["workspace_id"]
    mirrored_db_id = df["mirrored_db_id"]

    snowflake_db = df["snowflake_db"]
    snowflake_schema = df["snowflake_schema"]
    view_name = df["view_name"]
    identifiers = df["identifiers"]
    identifiers = None if identifiers is None else json.loads(identifiers)
    conn = snowflake.connector.connect(account=snowflakeaccount,
                                    user=snowflakeuser,
                                    password=snowflakepassword)
    def run_query(query):
        with conn.cursor() as cur:
            results = cur.execute(query).fetchall()
        return results

   
    if identifiers:
        identifier_concat = ",".join(identifiers)
        query = f"""CREATE or REPLACE VIEW {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR AS SELECT *, CONCAT({identifier_concat},HASH(*)) as hash_ FROM {snowflake_db}.{snowflake_schema}.{view_name}"""
    else:
        query = f"""CREATE or REPLACE VIEW {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR AS SELECT *, TO_VARCHAR(HASH(*)) as hash_ FROM {snowflake_db}.{snowflake_schema}.{view_name}"""
    run_query(query)
    query = f"""CREATE or REPLACE TABLE {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR_HASHES (HASH_ VARCHAR);"""
    run_query(query)
    query = f"""ALTER TABLE {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR_HASHES ADD COLUMN id INT AUTOINCREMENT;"""
    run_query(query)
    query = f"""COMMIT;"""
    run_query(query)
    query = f"""
    CREATE OR REPLACE VIEW {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR_OLD as (

    select 
    *
    from 
    {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR_HASHES t1
    where 
    not exists (select 
                    1
                from 
                    {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR t2
                where
                    t2.hash_ = t1.hash_
                ))

    """
    run_query(query)
    query = f"""

    CREATE OR REPLACE VIEW {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR_NEW as (
    select 
    *
    from 
    {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR t1
    where 
    not exists (select 
                    1
                from 
                    {snowflake_db}.{snowflake_schema}.{view_name}_MIRROR_HASHES t2
                where
                    t2.hash_ = t1.hash_
                ))
    """
    run_query(query)

    mirrored_db_path = f"{mirrored_db_id}/Files/LandingZone"

    def get_service_client_token_credential() -> DataLakeServiceClient:
        account_url = f"https://onelake.dfs.fabric.microsoft.com/"
        token_credential = credential

        service_client = DataLakeServiceClient(account_url, credential=token_credential)

        return service_client


    def list_directory_contents(file_system_client: FileSystemClient, directory_name: str):
        paths = file_system_client.get_paths(path=directory_name)

        return paths
    # %%
    def upload_file_to_directory(directory_client: DataLakeDirectoryClient, local_path: str, file_name: str):
        file_client = directory_client.get_file_client(file_name)

        with open(file=os.path.join(local_path, file_name), mode="rb") as data:
            file_client.upload_data(data, overwrite=True)

    dlsc = get_service_client_token_credential()
    fsc = dlsc.get_file_system_client(mirrored_db_workspace_id)

    fsc.create_directory(mirrored_db_path)
    print(f"Directory {mirrored_db_path} created")

    mirrored_db_path = f"{mirrored_db_id}/Files/LandingZone"

    contents = list_directory_contents(fsc, directory_name=mirrored_db_path)
    content_names = [content["name"] for content in contents]
    table_path = mirrored_db_path + f"/{view_name}"
    if table_path not in content_names:
        fsc.create_directory(table_path)
        print(f"Directory {table_path} created")

    contents = list_directory_contents(fsc, directory_name=mirrored_db_path)
    content_names = [content["name"] for content in contents]
    metadata_path = mirrored_db_path + f"/{view_name}/_metadata.json"
    if metadata_path not in content_names:
        import json, os
        json_content = {
                    "keyColumns": [
                    "METADATAROW_ID"
                    ],
                    "fileFormat": "parquet"
                }
        os.makedirs("upload", exist_ok=True)
        json.dump(json_content, open("upload/_metadata.json", "w"), indent=4)
        dldc = fsc.get_directory_client(table_path)
        upload_file_to_directory(dldc, local_path="upload", file_name="_metadata.json")
        print(f"File {metadata_path} uploaded")

    status = fcc.get_mirroring_status(workspace_id=mirrored_db_workspace_id, mirrored_database_id=mirrored_db_id)
    if 'status' not in status or status['status'].upper() != 'RUNNING':
        fcc.start_mirroring(workspace_id=mirrored_db_workspace_id, mirrored_database_id=mirrored_db_id)

    for _ in range(20):
        if 'status' not in status or status['status'].upper() != 'RUNNING':
            status = fcc.get_mirroring_status(workspace_id=mirrored_db_workspace_id, mirrored_database_id=mirrored_db_id)
            sleep(5)
        else:
            break

    if local:
        token_bytes = local_credentials.get_token("https://database.windows.net/.default").token.encode("UTF-16-LE")
    else:
        token_bytes = notebookutils.credentials.getToken("https://database.windows.net/.default").encode("UTF-16-LE")
    token_struct = struct.pack(f'<I{len(token_bytes)}s', len(token_bytes), token_bytes)

    # Connect with Entra ID (Azure AD) token
    conn = pyodbc.connect(connection_string, attrs_before={SQL_COPT_SS_ACCESS_TOKEN: token_struct})
    cursor = conn.cursor()

    # Test the connection
    id = df["ID"]
    query = f"""UPDATE Metadata 
                SET [status] = 'active',
                    [streamable] = '{streamable}'
                WHERE ID = {id}; COMMIT;"""

    cursor.execute(query)

    # Close the connection
    cursor.close()
    conn.close()


In [None]:
for _, row in df.iterrows():
    run_for_one_view(row)