### Package installations

In [None]:
%pip install clarifai-pyspark

### Setting up ClarifaiPySpark Objects

In [None]:
from clarifaipyspark.client import ClarifaiPySpark

CLARIFAI_PAT = dbutils.secrets.get(scope="clarifai", key="clarifai-pat")

user_id = 'user_id'
app_id = 'app_id'
dataset_id = 'dataset_id'

cspark_obj = ClarifaiPySpark(user_id=user_id, app_id=app_id, pat=CLARIFAI_PAT)
dataset_obj = cspark_obj.dataset(dataset_id=dataset_id)

### Fetching annotations from App to dataframe

In [None]:
annot_df = dataset_obj.export_annotations_to_dataframe()
annot_df.show()

+--------------------+---------------------+--------------------+----------------------+------------------+----------------+
|          annotation|annotation_created_at|       annotation_id|annotation_modified_at|annotation_user_id|        input_id|
+--------------------+---------------------+--------------------+----------------------+------------------+----------------+
|text {\n  url: "h...|  10/30/% 15:04:   %5|f602439fd7b14aa6b...|   10/30/% 15:04:   %5|           mansi_k|XFmGD0xHlNXgGIXF|
|concepts {\n  id:...|  10/30/% 13:02:   %5|    TVFLGTHd8NryFWsY|   10/30/% 13:02:   %5|           mansi_k|Ak1n8DZ1l1RWKATv|
|text {\n  url: "h...|  10/30/% 12:46:   %5|125b80fb08604e36a...|   10/30/% 12:46:   %5|           mansi_k|Ak1n8DZ1l1RWKATv|
|concepts {\n  id:...|  10/26/% 12:45:   %5|190e2387564c416f8...|   10/26/% 12:45:   %5|           mansi_k|PKaXcNjJ5fJ7wZqR|
|concepts {\n  id:...|  10/26/% 12:45:   %5|22757a0e73fa4cac8...|   10/26/% 12:45:   %5|           mansi_k|IYMxwJq0jjwJguLE|


### Creating a Delta Live Table from annotations dataframe

In [None]:

from pyspark.sql.functions import *
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Delta Live Table Demo").getOrCreate()
spark.conf.set("spark.databricks.agent.id", "clarifaipyspark")
database_name = "db_test"
table_name = "dlt_anns2"
delta_path = "/mnt/delta_anns2"
annot_df.write.format("delta").mode("overwrite").save(delta_path)

# Create a Spark session
spark.sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
spark.sql(f"USE {database_name}")
spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {table_name}
    USING delta
    LOCATION '{delta_path}'
""")

DataFrame[]

### Reading the annotations Delta Live table

In [None]:
df_delta = spark.read.format("delta").load(delta_path)
df_delta.show()

+--------------------+---------------------+--------------------+----------------------+------------------+----------------+
|          annotation|annotation_created_at|       annotation_id|annotation_modified_at|annotation_user_id|        input_id|
+--------------------+---------------------+--------------------+----------------------+------------------+----------------+
|concepts {\n  id:...|  10/26/% 12:45:   %5|22757a0e73fa4cac8...|   10/26/% 12:45:   %5|           mansi_k|IYMxwJq0jjwJguLE|
|text {\n  url: "h...|  10/30/% 12:46:   %5|125b80fb08604e36a...|   10/30/% 12:46:   %5|           mansi_k|Ak1n8DZ1l1RWKATv|
|text {\n  url: "h...|  10/30/% 15:04:   %5|f602439fd7b14aa6b...|   10/30/% 15:04:   %5|           mansi_k|XFmGD0xHlNXgGIXF|
|concepts {\n  id:...|  10/26/% 12:43:   %5|    8sH9gt43eqx3rBYF|   10/26/% 12:43:   %5|           mansi_k|             t21|
|concepts {\n  id:...|  10/26/% 12:45:   %5|190e2387564c416f8...|   10/26/% 12:45:   %5|           mansi_k|PKaXcNjJ5fJ7wZqR|


### Add new inputs and annotations to the App

In [None]:
### Done in the App directly

### Fetch new annotations from app to dataframe

In [None]:
annot_df = dataset_obj.export_annotations_to_dataframe()
annot_df.show()

+--------------------+---------------------+--------------------+----------------------+------------------+--------------------+
|          annotation|annotation_created_at|       annotation_id|annotation_modified_at|annotation_user_id|            input_id|
+--------------------+---------------------+--------------------+----------------------+------------------+--------------------+
|concepts {\n  id:...|  11/17/% 13:51:   %5|d0bb5fea2d8c4f4e8...|   11/17/% 13:51:   %5|           mansi_k|fe3f42364969fe544...|
|concepts {\n  id:...|  11/17/% 13:43:   %5|1e6224252c324b04b...|   11/17/% 13:43:   %5|           mansi_k|5a1d54db53403352f...|
|text {\n  url: "h...|  10/30/% 15:04:   %5|f602439fd7b14aa6b...|   10/30/% 15:04:   %5|           mansi_k|    XFmGD0xHlNXgGIXF|
|concepts {\n  id:...|  10/30/% 13:02:   %5|    TVFLGTHd8NryFWsY|   10/30/% 13:02:   %5|           mansi_k|    Ak1n8DZ1l1RWKATv|
|text {\n  url: "h...|  10/30/% 12:46:   %5|125b80fb08604e36a...|   10/30/% 12:46:   %5|         

### Upsert new annotations into the Delta live table

In [None]:
from delta.tables import DeltaTable
# Upsert the updated rows into the Delta table
delta_table = DeltaTable.forPath(spark, delta_path)
delta_table.alias("t").merge(
    annot_df.alias("s"),
    "s.annotation_id = t.annotation_id"
).whenMatchedUpdateAll() \
 .whenNotMatchedInsertAll() \
 .execute()

### Display annotations Delta live table

In [None]:
df_delta = spark.read.format("delta").load(delta_path)
df_delta.show()

+--------------------+---------------------+--------------------+----------------------+------------------+--------------------+
|          annotation|annotation_created_at|       annotation_id|annotation_modified_at|annotation_user_id|            input_id|
+--------------------+---------------------+--------------------+----------------------+------------------+--------------------+
|concepts {\n  id:...|  11/17/% 13:43:   %5|1e6224252c324b04b...|   11/17/% 13:43:   %5|           mansi_k|5a1d54db53403352f...|
|concepts {\n  id:...|  10/26/% 12:45:   %5|190e2387564c416f8...|   10/26/% 12:45:   %5|           mansi_k|    PKaXcNjJ5fJ7wZqR|
|text {\n  url: "h...|  10/30/% 15:04:   %5|f602439fd7b14aa6b...|   10/30/% 15:04:   %5|           mansi_k|    XFmGD0xHlNXgGIXF|
|concepts {\n  id:...|  10/26/% 12:43:   %5|    NrOaWbbfH6QEVFBW|   10/26/% 12:43:   %5|           mansi_k|                 t11|
|concepts {\n  id:...|  11/17/% 13:51:   %5|d0bb5fea2d8c4f4e8...|   11/17/% 13:51:   %5|         