# Copy CSV Files from GitHub Repository into One Lake

In [9]:
import csv
import requests

CSV_BASE_URL = "https://github.com/PowerBiDevCamp/Python-In-Fabric-Notebooks/raw/main/CSV/"

CSV_FILES = { "Categories.csv", "Countries.csv", "Customers.csv", "InvoiceDetails.csv", "Invoices.csv", "Products.csv" }

ONELAKE_FOLDER_PATH = "Files/product_sales/"

# delete folder to remove any existing files in target folder
mssparkutils.fs.rm(ONELAKE_FOLDER_PATH, recurse=True)

for CSV_FILE in CSV_FILES:
    CSV_FILE_PATH = CSV_BASE_URL + CSV_FILE
    with requests.get(CSV_FILE_PATH) as response:
        csv_content = response.content.decode('utf-8-sig')
        mssparkutils.fs.put(ONELAKE_FOLDER_PATH + CSV_FILE, csv_content, True)
        print(CSV_FILE + " copy into One Lake")

StatementMeta(, aad638bf-5741-47c2-b995-390abd554f7f, 11, Finished, Available)

Customers.csv copy into One Lake
InvoiceDetails.csv copy into One Lake
Invoices.csv copy into One Lake
Categories.csv copy into One Lake
Products.csv copy into One Lake
Countries.csv copy into One Lake


# Convert CSV Files into Lakehouse Tables

In [11]:
FOLDER_PATH = "Files/product_sales/"

CSV_FILES = { "Categories.csv", "Countries.csv", "Customers.csv", "InvoiceDetails.csv", "Invoices.csv", "Products.csv" }

spark.conf.set("sprk.sql.parquet.vorder.enabled", "true") # Enable VOrder write
spark.conf.set("spark.microsoft.delta.optimizeWrite.enabled", "true") # Enable automatic delta optimized write

for CSV_FILE in CSV_FILES:
    df = spark.read.format("csv").option("header","true").load(FOLDER_PATH + CSV_FILE)
    table_name =  CSV_FILE.lower().replace(".csv", "")
    df.write.mode("overwrite").format("delta").save(f"Tables/{table_name}")
    print(f"Spark dataframe saved to delta table: {table_name}")

StatementMeta(, aad638bf-5741-47c2-b995-390abd554f7f, 13, Finished, Available)

Spark dataframe saved to delta table: customers
Spark dataframe saved to delta table: invoicedetails
Spark dataframe saved to delta table: invoices
Spark dataframe saved to delta table: categories
Spark dataframe saved to delta table: products
Spark dataframe saved to delta table: countries
