### Package installations

In [None]:
!pip install clarifaipyspark
!pip install protobuf==4.24.2

### Setting Environment Variables & ClarifaiPySpark Objects

In [None]:
from clarifai.client.input import Inputs
from google.protobuf.json_format import MessageToJson
import os
import json

os.environ['CLARIFAI_PAT'] = ''

user_id = 'mansi_k'
app_id = 'databricks_tester_txt'
dataset_id = 'dataset1'

cspark_obj = ClarifaiPySpark(user_id=user_id, app_id=app_id)
dataset_obj = cspark_obj.dataset(dataset_id=dataset_id)

### Fetching annotations from App to dataframe

In [None]:
annot_df = dataset_obj.export_annotations_to_dataframe()
annot_df.show()

+--------------------+-------------------+----------------+--------+-------------------+-------+
|          annotation|         created_at|              id|input_id|        modified_at|user_id|
+--------------------+-------------------+----------------+--------+-------------------+-------+
|{concepts -> [{na...|10/26/% 12:43:   %5|NrOaWbbfH6QEVFBW|     t11|10/26/% 12:43:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|8sH9gt43eqx3rBYF|     t21|10/26/% 12:43:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|fc9VQUXheCJaZu28|     t31|10/26/% 12:43:   %5|mansi_k|
+--------------------+-------------------+----------------+--------+-------------------+-------+



### Creating a Delta Live Table from annotations dataframe

In [None]:

from pyspark.sql.functions import *
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Delta Live Table Demo").getOrCreate()
database_name = "mansi_test"
table_name = "dlt_anns2"
delta_path = "/mnt/delta_anns2"
annot_df.write.format("delta").mode("overwrite").save(delta_path)

# Create a Spark session
spark.sql(f"CREATE DATABASE IF NOT EXISTS {database_name}")
spark.sql(f"USE {database_name}")
spark.sql(f"""
    CREATE TABLE IF NOT EXISTS {table_name} 
    USING delta 
    LOCATION '{delta_path}'
""")

DataFrame[]

### Reading the annotations Delta Live table

In [None]:
df_delta = spark.read.format("delta").load(delta_path)
df_delta.show()

+--------------------+-------------------+----------------+--------+-------------------+-------+
|          annotation|         created_at|              id|input_id|        modified_at|user_id|
+--------------------+-------------------+----------------+--------+-------------------+-------+
|{concepts -> [{na...|10/26/% 12:43:   %5|8sH9gt43eqx3rBYF|     t21|10/26/% 12:43:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|fc9VQUXheCJaZu28|     t31|10/26/% 12:43:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|NrOaWbbfH6QEVFBW|     t11|10/26/% 12:43:   %5|mansi_k|
+--------------------+-------------------+----------------+--------+-------------------+-------+



### Add new inputs and annotations to the App

In [None]:
### Done in the App directly

### Fetch new annotations from app to dataframe

In [None]:
annot_df = dataset_obj.export_annotations_to_dataframe()
annot_df.show()

+--------------------+-------------------+--------------------+----------------+-------------------+-------+
|          annotation|         created_at|                  id|        input_id|        modified_at|user_id|
+--------------------+-------------------+--------------------+----------------+-------------------+-------+
|{concepts -> [{na...|10/26/% 12:45:   %5|190e2387564c416f8...|PKaXcNjJ5fJ7wZqR|10/26/% 12:45:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:45:   %5|22757a0e73fa4cac8...|IYMxwJq0jjwJguLE|10/26/% 12:45:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|    NrOaWbbfH6QEVFBW|             t11|10/26/% 12:43:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|    8sH9gt43eqx3rBYF|             t21|10/26/% 12:43:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|    fc9VQUXheCJaZu28|             t31|10/26/% 12:43:   %5|mansi_k|
+--------------------+-------------------+--------------------+----------------+-------------------+-------+



### Upsert new annotations into the Delta live table

In [None]:
from delta.tables import DeltaTable
# Upsert the updated rows into the Delta table
delta_table = DeltaTable.forPath(spark, delta_path)
delta_table.alias("t").merge(
    annot_df.alias("s"),
    "s.id = t.id"
).whenMatchedUpdateAll() \
 .whenNotMatchedInsertAll() \
 .execute()

### Display annotations Delta live table

In [None]:
df_delta = spark.read.format("delta").load(delta_path)
df_delta.show()

+--------------------+-------------------+--------------------+----------------+-------------------+-------+
|          annotation|         created_at|                  id|        input_id|        modified_at|user_id|
+--------------------+-------------------+--------------------+----------------+-------------------+-------+
|{concepts -> [{na...|10/26/% 12:45:   %5|22757a0e73fa4cac8...|IYMxwJq0jjwJguLE|10/26/% 12:45:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|    8sH9gt43eqx3rBYF|             t21|10/26/% 12:43:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:45:   %5|190e2387564c416f8...|PKaXcNjJ5fJ7wZqR|10/26/% 12:45:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|    NrOaWbbfH6QEVFBW|             t11|10/26/% 12:43:   %5|mansi_k|
|{concepts -> [{na...|10/26/% 12:43:   %5|    fc9VQUXheCJaZu28|             t31|10/26/% 12:43:   %5|mansi_k|
+--------------------+-------------------+--------------------+----------------+-------------------+-------+

