### Install dependencies

In [None]:

!pip install pandas
!pip install pyspark

### Import dependencies

In [None]:
import pandas as pd
import pyspark
from pyspark.sql import SparkSession

### Establish connection with azure

In [None]:


storage_account = "aditestacc"
account_key = "secret_key_here"
container_name = "testcontainer"
folder = "dir"

spark = SparkSession.builder \
    .appName("MyDockerSparkApp") \
    .config("spark.jars.packages", "org.apache.hadoop:hadoop-azure:3.3.0,com.microsoft.azure:azure-storage:8.6.6") \
    .getOrCreate()

spark.conf.set("fs.azure.impl", "org.apache.hadoop.fs.azure.NativeAzureFileSystem")
print("PySpark version:", pyspark.__version__)
hadoop_version = spark.sparkContext._jvm.org.apache.hadoop.util.VersionInfo.getVersion()
print("Hadoop version:", hadoop_version)  
blob_container_url = f"wasbs://{container_name}@{storage_account}.blob.core.windows.net/{folder}"
spark.conf.set(f"fs.azure.account.key.{storage_account}.blob.core.windows.net", account_key)

### Define writer function to add to unity catalog

In [None]:
def write_df_to_unity_catalog(pandas_df, blob_container_url, database_name, table_name, file_format='parquet'):
    """
    Write a Pandas DataFrame to Azure Blob Storage and create an external table in Unity Catalog.
    """

    spark_df = spark.createDataFrame(pandas_df)

    storage_path = f"{blob_container_url}/{table_name}"
    
    spark_df.write.mode("overwrite").format(file_format).save(storage_path)
    
    create_table_sql = f"""
    CREATE TABLE {database_name}.{table_name}
    USING {file_format}
    LOCATION '{storage_path}'
    """
    spark.sql(create_table_sql)
    
    print(f"External table {database_name}.{table_name} created successfully with data at {storage_path}.")


### Example usecase

In [None]:
data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}


df = pd.DataFrame(data)
db_name = "adi_test_catalog"
tb_name = "untiy_tb"

write_df_to_unity_catalog(df, blob_container_url, db_name, tb_name)