In [None]:
import taskingai
# Load TaskingAI API Key from environment variable

# TaskingAI Retrieval Module CRUD Example

In [None]:
from taskingai.retrieval import Collection, Record, Chunk, TokenTextSplitter

# choose an available text_embedding model from your project
embedding_model_id = "YOUR_EMBEDDING_MODEL_ID"

## Collection Object

In [None]:
collections = taskingai.retrieval.list_collections()
print(collections)

In [None]:
# create a collection
def create_collection() -> Collection:
    collection: Collection = taskingai.retrieval.create_collection(
        embedding_model_id=embedding_model_id,
        capacity=1000 # maximum text chunks can be stored 
    )
    return collection

collection: Collection = create_collection()
print(f"created collection: {collection}")

In [None]:
# get collection
collection_id: str = collection.collection_id
collection: Collection = taskingai.retrieval.get_collection(
    collection_id=collection_id
)

print(f"collection: {collection}\n")

In [None]:
# update collection
collection: Collection = taskingai.retrieval.update_collection(
    collection_id=collection_id,
    metadata={"foo": "bar"}
)

print(f"updated collection: {collection}\n")


In [None]:
# delete collection
taskingai.retrieval.delete_collection(collection_id=collection_id)
print(f"deleted collection: {collection_id}\n")

In [None]:
# list collections
collections = taskingai.retrieval.list_collections()
collection_ids = [collection.collection_id for collection in collections]
# ensure the collection we deleted is not in the list
print(f"f{collection_id} in collection_ids: {collection_id in collection_ids}\n")

## Record Object

In [None]:
# create a new collection
collection: Collection = create_collection()
print(collection)

### Text Record

In [None]:
# create a new text record
record: Record = taskingai.retrieval.create_record(
    collection_id=collection.collection_id,
    type="text",
    title="Machine learning",
    content="Machine learning is a subfield of artificial intelligence (AI) that involves the development of algorithms that allow computers to learn from and make decisions or predictions based on data. The term \"machine learning\" was coined by Arthur Samuel in 1959. In other words, machine learning enables a system to automatically learn and improve from experience without being explicitly programmed. This is achieved by feeding the system massive amounts of data, which it uses to learn patterns and make inferences. There are three main types of machine learning: 1. Supervised Learning: This is where the model is given labeled training data and the goal of learning is to generalize from the training data to unseen situations in a principled way. 2. Unsupervised Learning: This involves training on a dataset without explicit labels. The goal might be to discover inherent groupings or patterns within the data. 3. Reinforcement Learning: In this type, an agent learns to perform actions based on reward/penalty feedback to achieve a goal. It's commonly used in robotics, gaming, and navigation. Deep learning, a subset of machine learning, uses neural networks with many layers (\"deep\" structures) and has been responsible for many recent breakthroughs in AI, including speech recognition, image recognition, and natural language processing. It's important to note that machine learning is a rapidly developing field, with new techniques and applications emerging regularly.",
    text_splitter={"type": "token", "chunk_size": 200, "chunk_overlap": 20}
)
print(f"created record: {record.record_id} for collection: {collection.collection_id}\n")

In [None]:
# update record - content
record = taskingai.retrieval.update_record(
    record_id=record.record_id,
    collection_id=collection.collection_id,
    type="text",
    title="New title",
    content="New content",
    text_splitter={"type": "token", "chunk_size": 100, "chunk_overlap": 20},
)
print(f"updated record: {record}")

### Web Record

In [None]:
# create a new web record
record: Record = taskingai.retrieval.create_record(
    collection_id=collection.collection_id,
    type="web",
    title="Machine learning",
    url="https://www.tasking.ai", # must https
    text_splitter={"type": "token", "chunk_size": 200, "chunk_overlap": 20},
)
print(f"created record: {record.record_id} for collection: {collection.collection_id}\n")

In [None]:
# update record - url
record = taskingai.retrieval.update_record(
    collection_id=collection.collection_id,
    record_id=record.record_id,
    type="web",
    url="https://docs.tasking.ai",
    text_splitter={"type": "token", "chunk_size": 200, "chunk_overlap": 20},
)
print(f"updated record: {record}")

### File Record

In [None]:
# upload a file first
from taskingai.file import upload_file

file = upload_file(file="your file path", purpose="record_file")
# or
# file = upload_file(file=open("your file path", "rb"), purpose="record_file")
print(f"uploaded file id: {file.file_id}")

# create a new web record
record: Record = taskingai.retrieval.create_record(
    collection_id=collection.collection_id,
    type="file",
    title="Machine learning",
    file_id=file.file_id,
    text_splitter={"type": "token", "chunk_size": 200, "chunk_overlap": 20},
)
print(f"created record: {record.record_id} for collection: {collection.collection_id}\n")

In [None]:
new_file = upload_file(file="new_file_path", purpose="record_file")
print(f"new uploaded file id: {new_file.file_id}")

# update record - file
record = taskingai.retrieval.update_record(
    collection_id=collection.collection_id,
    record_id=record.record_id,
    type="file",
    file_id=new_file.file_id,
    text_splitter={"type": "token", "chunk_size": 200, "chunk_overlap": 20},
)
print(f"updated record: {record}")

### Record Other Cases

In [None]:
# update record - metadata
record = taskingai.retrieval.update_record(
    collection_id=collection.collection_id,
    record_id=record.record_id,
    metadata={"foo": "bar"},
)
print(f"updated record: {record}")

In [None]:
# get text record
record = taskingai.retrieval.get_record(
    collection_id=collection.collection_id,
    record_id=record.record_id
)
print(f"got record: {record}\n")

In [None]:
# list records
records = taskingai.retrieval.list_records(collection_id=collection.collection_id)
record_ids = [record.record_id for record in records]
# ensure the collection we deleted is not in the list
print(f"f{record.record_id} in record_ids: {record.record_id in record_ids}\n")

In [None]:
# delete record
taskingai.retrieval.delete_record(
    collection_id=collection.collection_id,
    record_id=record.record_id,
)
print(f"deleted record {record.record_id} from collection {collection.collection_id}\n")

## Chunk Object

In [None]:
# create a new text record
chunk: Chunk = taskingai.retrieval.create_chunk(
    collection_id=collection.collection_id,
    content="The dog is a domesticated descendant of the wolf. Also called the domestic dog, it is derived from extinct gray wolves, and the gray wolf is the dog's closest living relative. The dog was the first species to be domesticated by humans.",
)
print(f"created chunk: {chunk.chunk_id} for collection: {collection.collection_id}\n")

In [None]:
# update chunk metadata
chunk = taskingai.retrieval.update_chunk(
    collection_id=collection.collection_id,
    chunk_id=chunk.chunk_id,
    metadata={"k": "v"},
)
print(f"updated chunk: {chunk}")

In [None]:
# update chunk content
chunk = taskingai.retrieval.update_chunk(
    collection_id=collection.collection_id,
    chunk_id=chunk.chunk_id,
    content="New content",
)
print(f"updated chunk: {chunk}")

In [None]:
# get chunk
chunk = taskingai.retrieval.get_chunk(
    collection_id=collection.collection_id,
    chunk_id=chunk.chunk_id
)
print(f"got chunk: {chunk}\n")

In [None]:
# delete chunk
taskingai.retrieval.delete_chunk(
    collection_id=collection.collection_id,
    chunk_id=chunk.chunk_id,
)
print(f"deleted chunk {chunk.chunk_id} from collection {collection.collection_id}\n")

In [None]:
# create a new text record and a new chunk
taskingai.retrieval.create_record(
    collection_id=collection.collection_id,
    content="Machine learning is a subfield of artificial intelligence (AI) that involves the development of algorithms that allow computers to learn from and make decisions or predictions based on data. The term \"machine learning\" was coined by Arthur Samuel in 1959. In other words, machine learning enables a system to automatically learn and improve from experience without being explicitly programmed. This is achieved by feeding the system massive amounts of data, which it uses to learn patterns and make inferences. There are three main types of machine learning: 1. Supervised Learning: This is where the model is given labeled training data and the goal of learning is to generalize from the training data to unseen situations in a principled way. 2. Unsupervised Learning: This involves training on a dataset without explicit labels. The goal might be to discover inherent groupings or patterns within the data. 3. Reinforcement Learning: In this type, an agent learns to perform actions based on reward/penalty feedback to achieve a goal. It's commonly used in robotics, gaming, and navigation. Deep learning, a subset of machine learning, uses neural networks with many layers (\"deep\" structures) and has been responsible for many recent breakthroughs in AI, including speech recognition, image recognition, and natural language processing. It's important to note that machine learning is a rapidly developing field, with new techniques and applications emerging regularly.",
    text_splitter=TokenTextSplitter(chunk_size=200, chunk_overlap=20)
)

taskingai.retrieval.create_chunk(
    collection_id=collection.collection_id,
    content="The dog is a domesticated descendant of the wolf. Also called the domestic dog, it is derived from extinct gray wolves, and the gray wolf is the dog's closest living relative. The dog was the first species to be domesticated by humans.",
)

In [None]:
# list chunks
chunks = taskingai.retrieval.list_chunks(collection_id=collection.collection_id)
for chunk in chunks:
    print(chunk)
    print("-" * 50)

In [None]:
# delete collection
taskingai.retrieval.delete_collection(collection_id=collection.collection_id)