In [4]:
from pyspark.sql.functions import col,when 
from datetime import datetime
import re

def get_latest_file(src_table: str) -> str:
    """
    Retrieves the latest file in a given source table folder based on the filename date.

    :param src_table: Name of the table (subfolder in Lakehouse).
    :return: Path of the latest file, or None if no valid files are found.
    """

    # Define the base path for the table's folder
    lakehouse_path = f"abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Files/Sybase/{src_table}"

    try:
        # List all files in the specified directory
        files_df = spark.read.format("binaryFile").load(lakehouse_path)
        file_paths = [row.path for row in files_df.select("path").collect()]
        
        if not file_paths:
            print(f"No files found in {lakehouse_path}")
            return None

        # Define regex pattern for extracting date and table name
        pattern = r".*/(\d{4}-\d{2}-\d{2})([A-Za-z0-9_]+)$"

        # Extract dates and table names
        valid_files = [
            (fp, datetime.strptime(m.group(1), "%Y-%m-%d"), m.group(2))
            for fp in file_paths
            if (m := re.search(pattern, fp))
        ]

        if not valid_files:
            print(f"No valid files matching pattern found in {lakehouse_path}")
            return None

        # Get the latest file based on the extracted date
        latest_file = max(valid_files, key=lambda x: x[1])
        latest_file_path = latest_file[0]
        latest_table_name = latest_file[2]
        # print(f"Latest file for {src_table}: {latest_file_path}")
        return latest_file_path

    except Exception as e:
        print(f"Error processing {src_table}: {e}")
        return None

StatementMeta(, 2d8801c8-bf15-4e64-87c3-46b5adcd3c29, 6, Finished, Available, Finished)

In [5]:
ftiendas_ext_df = spark.read.parquet(get_latest_file('ftiendas_ext'))

StatementMeta(, 2d8801c8-bf15-4e64-87c3-46b5adcd3c29, 7, Finished, Available, Finished)

In [6]:
# Ruta donde se guardará la tabla Delta en OneLake
delta_table_path = "abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Tables/ftiendas_ext"

# Guardar el DataFrame como tabla Delta
ftiendas_ext_df.write.format("delta").save(delta_table_path)

print(f"Tabla Delta guardada en: {delta_table_path}")

StatementMeta(, 2d8801c8-bf15-4e64-87c3-46b5adcd3c29, 8, Finished, Available, Finished)

Tabla Delta guardada en: abfss://b2c899fb-e571-4496-aebf-c7a23083635a@onelake.dfs.fabric.microsoft.com/a00cf91a-f92e-498a-9f14-ba10221fb05a/Tables/ftiendas_ext


In [4]:
df = spark.sql("SELECT * FROM LZ_LH_Main.source_lookup LIMIT 1000")
display(df)

StatementMeta(, fa4c1625-1db4-4f16-88bc-6fe442d114f4, 6, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, ec64fe1f-e5e2-43a5-b9c7-09f70ab8872f)

In [3]:
%%sql
update LZ_LH_Main.source_lookup
set load = 'no'
where table = 'fmovarti'

StatementMeta(, fa4c1625-1db4-4f16-88bc-6fe442d114f4, 5, Finished, Available, Finished)

<Spark SQL result set with 1 rows and 1 fields>

In [1]:
spark.sql("""
INSERT INTO LZ_LH_Main.source_lookup (table, is_replication_mmp, permisson, load, where_clause)
VALUES ('ftiendas_ext', 'yes', 'yes', 'yes', '1=1')
""")

StatementMeta(, 2d8801c8-bf15-4e64-87c3-46b5adcd3c29, 3, Finished, Available, Finished)

DataFrame[]

In [4]:
%%sql
update source_lookup
set where_clause = "UDDATE >= DATEADD(DAY, -2, GETDATE())"
where table = 'fmovarti'
     

StatementMeta(, cd9efb9c-8c12-4ef3-acc6-c4b5b9d84aa0, 5, Finished, Available, Finished)

<Spark SQL result set with 1 rows and 1 fields>