## Intro to Programmatic Actions with Label Studio -- SDK

In [None]:
!pip install label-studio-sdk

In [205]:
# credentials and setup 
label_studio_url = "http://app.heartex.com" 
label_studio_api_key = "Your Label Studio API Key"

## Step 0: Download and Prep Your Data

In [206]:
data = [
    {"image": "https://htx-pub.s3.us-east-1.amazonaws.com/demo_pdf/rag_0.jpg"},
    {"image": "https://htx-pub.s3.us-east-1.amazonaws.com/demo_pdf/rag_1.jpg"},
    {"image": "https://htx-pub.s3.us-east-1.amazonaws.com/demo_pdf/rag_2.jpg"},
    {"image": "https://htx-pub.s3.us-east-1.amazonaws.com/demo_pdf/rag_3.jpg"},
    {"image": "https://htx-pub.s3.us-east-1.amazonaws.com/demo_pdf/rag_4.jpg"}
]

## Step 1: Create a Label Studio Client

In [207]:
from label_studio_sdk.client import LabelStudio

# the Label Studio Client is the main entry point for the SDK.
ls = LabelStudio(
    base_url=label_studio_url,  
    api_key=label_studio_api_key,
)

## Step 2: Creating and Intitalizing a Project

In [None]:
from label_studio_sdk.label_interface import LabelInterface
from label_studio_sdk.label_interface.create import labels, choices

# We can define the labeling config in one of two ways: 
# 1. a string
label_config = """
<View>    
   <Image name="image" value="$image" zoom="true" zoomControl="false"
         rotateControl="true" width="100%" height="100%"
         maxHeight="auto" maxWidth="auto"/>
   
   <RectangleLabels name="bbox" toName="image" strokeWidth="1" smart="true">
      <Label value="Heading" background="green"/>
      <Label value="Paragraph" background="blue"/>
   </RectangleLabels>

   <TextArea name="transcription" toName="image" 
   editable="true" perRegion="true" required="false" 
   maxSubmissions="1" rows="5" placeholder="Recognized Text" 
   displayMode="region-list"/>
</View>
"""

# 2. using the LabelInterface. Maps the fromName to the control tag type
label_config2 = LabelInterface.create({
      "image": "Image",
      "transcription" : "TextArea"
    })

print(label_config2)

# Projects need a title and a labeling config, at a minimum. 
proj = ls.projects.create(
    title="OCR for RAG PDF",
    label_config=label_config, 
    workspace=93723, 
    color="#00FFFF",
)
pid = proj.id
print(f"Created project {pid} in workspace {proj.workspace}")

## Step 3: Add your Data

In [None]:
# There are a few ways to upload your data to Label Studio

# 1. As a CSV: 
# task_ids = project.import_tasks('/full/path/to/your_tasks.csv')
# print('Imported task IDs:', task_ids)


# 2 As a JSON: 
ls.projects.import_tasks(
    id=pid,
    request=data
)


### Now, you're ready to label! 

## Step 4: Chaining Projects Together

In [167]:
# First, we create a new project as we did before

# 1. a string
ner_label_config = """
<View>
  	<Text name="text" value="$text" />
    <Labels name="NER" toName="text">
        <Label value="PER" />
        <Label value="ORG" />
  </Labels>
</View>
"""

# Projects need a title and a labeling config, at a minimum. 
proj2 = ls.projects.create(
    title="NER on OCR",
    description="",
    label_config=ner_label_config, 
    workspace=93723, 
    color="#800080",
)
p2id = proj2.id

In [None]:
# Now, we can export data from our first project and upload it to our second project

tasks = ls.tasks.list(project=proj.id)

#Let's see what a task annotation looks like

#IF TASK HAS ANNOTATIONS 
for i, task in enumerate(tasks):
    text = ""
    # You can access annotations in Label Studio JSON format
    if i == 0:
        # print just the first task for investigation
        print(f"TASK {i}")
        print(json.dumps(task.annotations[0], indent=4))
    for a in task.annotations: 
        results = a["result"]
        for r in results: 
            if "text" in r["value"].keys():
                text = text + r["value"]["text"][0]
    ls.tasks.create(
    project=p2id,
    data={'text': text}
)

## Step 4: Adding Preannotations

In [None]:
import json

print(p2id)
project = ls.projects.get(id=p2id)
li = project.get_label_interface()

sample_pred = li.generate_sample_prediction()
sample_pred = json.dumps(sample_pred, indent=4)
print(sample_pred)


In [175]:
import random
from label_studio_sdk.label_interface.objects import PredictionValue

tasks = ls.tasks.list(project=p2id)
for task in tasks:
    predicted_slogan = {"from_name": "NER",
                        "to_name": "text",
                        "type": "labels",
                        "value": {
                            "start": 10,
                            "end": 20,
                            "labels": [
                                "PER"
                            ]
                        }
                       }
    prediction = PredictionValue(
        model_version='random-ai-prediction',
        score=0.99,
        result=[predicted_label, predicted_slogan]
    )
    ls.predictions.create(task=task.id, **prediction.model_dump())
