## Installations

In [None]:
%pip install clarifai-pyspark

## Getting Started

In [None]:
from clarifaipyspark.client import ClarifaiPySpark

CLARIFAI_PAT = dbutils.secrets.get(scope="clarifai", key="clarifai-pat")

## ClarifaiPyspark object for Clarifai app with image dataset

In [None]:
user_id = 'user_id'
app_id = 'app_id'
dataset_id = 'dataset_id'

cspark_obj = ClarifaiPySpark(user_id=user_id, app_id=app_id, pat=CLARIFAI_PAT)
dataset_obj = cspark_obj.dataset(dataset_id=dataset_id)

## Get details for images from Clarifai dataset

In [None]:
inputs_df = dataset_obj.export_inputs_to_dataframe(input_type='image')
inputs_df.count()

26

In [None]:
inputs_df.show()

+--------------------+--------------------+-------------------+--------------------+-------------------+
|          image_info|           image_url|   input_created_at|            input_id|  input_modified_at|
+--------------------+--------------------+-------------------+--------------------+-------------------+
|width: 872\nheigh...|https://cdni.auto...|10/27/% 12:48:   %5|              img211|10/27/% 12:48:   %5|
|width: 872\nheigh...|https://cdni.auto...|10/27/% 12:48:   %5|              img111|10/27/% 12:48:   %5|
|width: 900\nheigh...|https://samples.c...|10/27/% 12:02:   %5|             image01|10/27/% 12:02:   %5|
|width: 900\nheigh...|https://samples.c...|10/27/% 12:02:   %5|             image03|10/27/% 12:02:   %5|
|width: 512\nheigh...|https://samples.c...|10/27/% 12:02:   %5|             image02|10/27/% 12:02:   %5|
|width: 900\nheigh...|https://data.clar...|10/26/% 08:28:   %5|    SjICZ3CwpUUVb411|10/26/% 08:28:   %5|
|width: 300\nheigh...|https://data.clar...|10/25/% 16:1

## Get image annotations from Clarifai app

In [None]:
anns_df = dataset_obj.export_annotations_to_dataframe()
anns_df.count()

25

In [None]:
anns_df.show(30)

+--------------------+---------------------+--------------------+----------------------+------------------+--------------------+
|          annotation|annotation_created_at|       annotation_id|annotation_modified_at|annotation_user_id|            input_id|
+--------------------+---------------------+--------------------+----------------------+------------------+--------------------+
|concepts {\n  id:...|  10/27/% 12:48:   %5|448267668cd74cb9b...|   10/27/% 12:48:   %5|           mansi_k|              img111|
|concepts {\n  id:...|  10/27/% 12:48:   %5|4708d433b17e4a6a9...|   10/27/% 12:48:   %5|           mansi_k|              img211|
|concepts {\n  id:...|  10/27/% 12:02:   %5|5ec8c323c73b4e9ba...|   10/27/% 12:02:   %5|           mansi_k|             image02|
|concepts {\n  id:...|  10/27/% 12:02:   %5|b361c36483d74e63b...|   10/27/% 12:02:   %5|           mansi_k|             image03|
|concepts {\n  id:...|  10/27/% 12:02:   %5|c284d4b5158041139...|   10/27/% 12:02:   %5|         

## Get both (image details + annotations) from Clarifai app

In [None]:
all_df = dataset_obj.export_dataset_to_dataframe(input_type='image')
all_df.count()

26

In [None]:
all_df.show(30)

+--------------------+--------------------+-------------------+--------------------+-------------------+--------------------+---------------------+--------------------+----------------------+------------------+
|          image_info|           image_url|   input_created_at|            input_id|  input_modified_at|          annotation|annotation_created_at|       annotation_id|annotation_modified_at|annotation_user_id|
+--------------------+--------------------+-------------------+--------------------+-------------------+--------------------+---------------------+--------------------+----------------------+------------------+
|width: 872\nheigh...|https://cdni.auto...|10/27/% 12:48:   %5|              img211|10/27/% 12:48:   %5|concepts {\n  id:...|  10/27/% 12:48:   %5|4708d433b17e4a6a9...|   10/27/% 12:48:   %5|           mansi_k|
|width: 872\nheigh...|https://cdni.auto...|10/27/% 12:48:   %5|              img111|10/27/% 12:48:   %5|concepts {\n  id:...|  10/27/% 12:48:   %5|448267668

## ClarifaiPyspark object for Clarifai app with text dataset

In [None]:
user_id = 'user_id'
app_id = 'app_id'
dataset_id = 'dataset_id'

cspark_obj = ClarifaiPySpark(user_id=user_id, app_id=app_id, pat=CLARIFAI_PAT)
dataset_obj = cspark_obj.dataset(dataset_id=dataset_id)

## Get text details from Clarifai dataset

In [None]:
inputs_df = dataset_obj.export_inputs_to_dataframe(input_type='text')
inputs_df.count()

7

In [None]:
inputs_df.show()

+-------------------+----------------+-------------------+--------------------+--------------------+
|   input_created_at|        input_id|  input_modified_at|           text_info|            text_url|
+-------------------+----------------+-------------------+--------------------+--------------------+
|10/30/% 15:04:   %5|XFmGD0xHlNXgGIXF|10/30/% 15:04:   %5|char_count: 3\nen...|https://data.clar...|
|10/30/% 12:46:   %5|Ak1n8DZ1l1RWKATv|10/30/% 12:46:   %5|char_count: 6\nen...|https://data.clar...|
|10/26/% 12:45:   %5|PKaXcNjJ5fJ7wZqR|10/26/% 12:46:   %5|char_count: 18\ne...|https://data.clar...|
|10/26/% 12:45:   %5|IYMxwJq0jjwJguLE|10/26/% 12:46:   %5|char_count: 22\ne...|https://data.clar...|
|10/23/% 08:38:   %5|             t11|10/23/% 08:38:   %5|char_count: 17\ne...|https://data.clar...|
|10/23/% 08:38:   %5|             t31|10/23/% 08:38:   %5|char_count: 28\ne...|https://data.clar...|
|10/23/% 08:38:   %5|             t21|10/23/% 08:38:   %5|char_count: 20\ne...|https://data

## Get image annotations from Clarifai app

In [None]:
anns_df = dataset_obj.export_annotations_to_dataframe()
anns_df.count()

8

In [None]:
anns_df.show(30)

+--------------------+---------------------+--------------------+----------------------+------------------+----------------+
|          annotation|annotation_created_at|       annotation_id|annotation_modified_at|annotation_user_id|        input_id|
+--------------------+---------------------+--------------------+----------------------+------------------+----------------+
|text {\n  url: "h...|  10/30/% 15:04:   %5|f602439fd7b14aa6b...|   10/30/% 15:04:   %5|           mansi_k|XFmGD0xHlNXgGIXF|
|concepts {\n  id:...|  10/30/% 13:02:   %5|    TVFLGTHd8NryFWsY|   10/30/% 13:02:   %5|           mansi_k|Ak1n8DZ1l1RWKATv|
|text {\n  url: "h...|  10/30/% 12:46:   %5|125b80fb08604e36a...|   10/30/% 12:46:   %5|           mansi_k|Ak1n8DZ1l1RWKATv|
|concepts {\n  id:...|  10/26/% 12:45:   %5|190e2387564c416f8...|   10/26/% 12:45:   %5|           mansi_k|PKaXcNjJ5fJ7wZqR|
|concepts {\n  id:...|  10/26/% 12:45:   %5|22757a0e73fa4cac8...|   10/26/% 12:45:   %5|           mansi_k|IYMxwJq0jjwJguLE|


## Get both (image details + annotations) from Clarifai app

In [None]:
all_df = dataset_obj.export_dataset_to_dataframe(input_type='text')
all_df.count()

8

In [None]:
all_df.show(30)

+-------------------+----------------+-------------------+--------------------+--------------------+--------------------+---------------------+--------------------+----------------------+------------------+
|   input_created_at|        input_id|  input_modified_at|           text_info|            text_url|          annotation|annotation_created_at|       annotation_id|annotation_modified_at|annotation_user_id|
+-------------------+----------------+-------------------+--------------------+--------------------+--------------------+---------------------+--------------------+----------------------+------------------+
|10/30/% 15:04:   %5|XFmGD0xHlNXgGIXF|10/30/% 15:04:   %5|char_count: 3\nen...|https://data.clar...|text {\n  url: "h...|  10/30/% 15:04:   %5|f602439fd7b14aa6b...|   10/30/% 15:04:   %5|           mansi_k|
|10/30/% 12:46:   %5|Ak1n8DZ1l1RWKATv|10/30/% 12:46:   %5|char_count: 6\nen...|https://data.clar...|text {\n  url: "h...|  10/30/% 12:46:   %5|125b80fb08604e36a...|   10/30