# How to use the Kimi-VL-A3B Model

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone.utils import huggingface as fouh

dataset = fouh.load_from_hub(
    "Voxel51/ScreenSpot", 
    max_samples=5,
    overwrite=True)

In [None]:
dataset

In [None]:
from PIL import Image

Image.open(dataset.first().filepath)

# Setup Zoo Model

In [None]:
foz.register_zoo_model_source("https://github.com/harpreetsahota204/Kimi_VL_A3B", overwrite=True)

In [None]:
foz.download_zoo_model(
    "https://github.com/harpreetsahota204/Kimi_VL_A3B",
    model_name="moonshotai/Kimi-VL-A3B-Instruct", #can also choose moonshotai/Kimi-VL-A3B-Thinking 
)

In [None]:
model = foz.load_zoo_model("moonshotai/Kimi-VL-A3B-Instruct")

# VQA

In [None]:
model.operation="vqa"

model.prompt="Describe this screenshot and what the user might be doing in it. Answer in a single sentence."

dataset.apply_model(model, label_field="q_vqa")

dataset.first().q_vqa

# Detection

In [None]:
model.operation="detect"

print(model.system_prompt)

model.prompt="Locate the elements of this UI that a user can interact with."

dataset.apply_model(model, label_field="qdets")

dataset.first().qdets

# OCR with detection

In [None]:
model.operation="ocr"

model.prompt="Read all the text in the image."

dataset.apply_model(model, label_field="q_ocr")

dataset.first().q_ocr

# Pointing

In [None]:
model.operation="point"

model.prompt="Point to all the interactive elements in UI."

print(model.system_prompt)

dataset.apply_model(model, label_field="qpts")

In [None]:
dataset.first().qpts

# Classification

In [None]:
model.operation="classify"

model.prompt="List the type of operating system, open application, and what the user is working on as depicted in the UI."

print(model.system_prompt)

dataset.apply_model(model, label_field="q_cls")

In [None]:
dataset.first().q_cls

# Agentic

In [None]:
model.operation="agentic"

model.prompt="Write code to close application windows and quit the application."

dataset.apply_model(model, label_field="q_agentic")

In [None]:
dataset.first().q_agentic

# Using a Sample field as an input

In [None]:
dataset.apply_model(model, label_field="instruction_field_agent", prompt_field="instruction")

In [None]:
dataset.first().instruction_field_agent

In [None]:
fo.launch_app(dataset)