# **CRUD Operation**

Each function corresponds to a specific CRUD operation (create, read, update, or delete)

In [None]:
#Installation
!pip install google-cloud-discoveryengine
!pip install --upgrade --user --quiet google-cloud-aiplatform
!pip install jsonlines

Collecting google-cloud-discoveryengine
  Downloading google_cloud_discoveryengine-0.13.5-py3-none-any.whl.metadata (5.3 kB)
Downloading google_cloud_discoveryengine-0.13.5-py3-none-any.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-cloud-discoveryengine
Successfully installed google-cloud-discoveryengine-0.13.5


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/6.9 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.5/6.9 MB[0m [31m15.8 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━[0m [32m4.1/6.9 MB[0m [31m58.1 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m6.9/6.9 MB[0m [31m76.8 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m58.9 MB/s[0m eta [36m0:00:00[0m
[0mCollecting jsonlines
  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)
Downloading jsonlines-4.0.0-py3-none-any.whl (8.7 kB)
Installing collected packages: jsonlines
Successfully installed jsonlines-4.0.0


In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()



**1. Create One Record**

To create a single record in the data store:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development" # @param {type:"string"}
DATA_STORE_ID = "contoh1_1735396977358" # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
print(ACCESS_TOKEN)
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def create_one_record(record):
    document_id = record["id"]
    url = f"{BASE_URL}?documentId={document_id}"
    payload = {"structData": record}
    response = requests.post(url, headers=HEADERS, data=json.dumps(payload))
    if response.status_code == 200:
        print(f"Record {document_id} created successfully!")
    else:
        print(f"Failed to create record {document_id}: {response.text}")

# Define record data inputs
record = {
    "id": "doc6",
    "question": "What is AI?",
    "answer": "AI is artificial intelligence.",
    "category": "Tech"
}
create_one_record(record)




ya29.a0ARW5m76EXL6fl_8KcZYZeH3842UrGM6HlNsdwvEC9uDW2VHdxO6NbmLhFG21agJckNkAktNKEvqVvlT2yIG5JkMimA6ouh1on8acjtlSdw20anxQWFEPxdKxNN8LY5b32Pea7zv44RtYLAgrhVuzlAHP5mnL3uvxI7M9eix6YOOQbL9pA9Ma9sLsaCgYKAX4SARASFQHGX2MiRVAFg5ET8_tVB4LT9Qpu0g0191
Failed to create record doc6: {
  "error": {
    "code": 409,
    "message": "Active document with the same name \"projects/954273464710/locations/global/collections/default_collection/dataStores/contoh1_1735396977358/branches/0/documents/doc6\" exists.",
    "status": "ALREADY_EXISTS"
  }
}



**2. Create Multiple Records**

To create several records at once:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development"  # @param {type:"string"}
DATA_STORE_ID = "contoh1_1735396977358"  # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"

# Fetch access token
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def create_one_record(record):
    """Create a single record in the data store."""
    document_id = record["id"]
    url = f"{BASE_URL}?documentId={document_id}"
    payload = {"structData": record}

    response = requests.post(url, headers=HEADERS, data=json.dumps(payload))
    if response.status_code == 200:
        print(f"Record {document_id} created successfully!")
    else:
        print(f"Failed to create record {document_id}: {response.status_code} - {response.text}")

def create_multiple_records(records):
    """Create multiple records by calling create_one_record for each."""
    for record in records:
        create_one_record(record)

# Example records
records = [
    {"id": "doc4", "question": "What is AI?", "answer": "AI is artificial intelligence.", "category": "Tech"},
    {"id": "doc2", "question": "What is ML?", "answer": "ML is machine learning.", "category": "Tech"}
]

# Call the function to create multiple records
create_multiple_records(records)


Record doc4 created successfully!
Record doc2 created successfully!


**3. Delete One Record**

To delete a specific record:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development"  # @param {type:"string"}
DATA_STORE_ID = "contoh1_1735396977358"  # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"

# Fetch access token
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def delete_one_record(document_id):
    url = f"{BASE_URL}/{document_id}"
    response = requests.delete(url, headers=HEADERS)
    if response.status_code == 200:
        print(f"Record {document_id} deleted successfully!")
    else:
        print(f"Failed to delete record {document_id}: {response.text}")

delete_one_record("doc1")


Record doc1 deleted successfully!


**4. Delete Multiple Records**

To delete several records:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development"  # @param {type:"string"}
DATA_STORE_ID = "contoh1_1735396977358"  # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"

# Fetch access token
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def delete_one_record(document_id):
    """Delete a single record by ID."""
    url = f"{BASE_URL}/{document_id}"
    response = requests.delete(url, headers=HEADERS)
    if response.status_code == 200:
        print(f"Record {document_id} deleted successfully!")
    else:
        print(f"Failed to delete record {document_id}: {response.status_code} - {response.text}")

def delete_multiple_records(document_ids):
    """Delete multiple records by their IDs."""
    for document_id in document_ids:
        delete_one_record(document_id)

# Call the function to delete multiple records
delete_multiple_records(["doc6", "doc4"])


Record doc6 deleted successfully!
Record doc4 deleted successfully!


**5. Update a Record**

To update a record:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development"  # @param {type:"string"}
DATA_STORE_ID = "contoh1_1735396977358"  # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"

# Fetch access token
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def update_record(document_id, updated_data):
    url = f"{BASE_URL}/{document_id}"
    payload = {"structData": updated_data}
    response = requests.patch(url, headers=HEADERS, data=json.dumps(payload))
    if response.status_code == 200:
        print(f"Record {document_id} updated successfully!")
    else:
        print(f"Failed to update record {document_id}: {response.text}")


updated_record = {
    "id": "doc2",
    "question": "Updated thexxxx question?",
    "answer": "Updated the answer.",
    "category": "Updated the category"
}
update_record("doc2", updated_record)


Record doc2 updated successfully!


**6. Read One Record**

To retrieve a single record:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development"  # @param {type:"string"}
DATA_STORE_ID = "contoh1_1735396977358"  # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"

# Fetch access token
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def read_one_record(document_id):
    """
    Fetch a single document's data from the Discovery Engine.
    Args:
        document_id (str): The ID of the document to read.
    """
    if not document_id:
        print("Error: Document ID cannot be empty!")
        return

    url = f"{BASE_URL}/{document_id}"  # API endpoint for the document
    try:
        response = requests.get(url, headers=HEADERS)  # Send GET request
        if response.status_code == 200:
            print(f"Record {document_id} retrieved successfully!")
            print(json.dumps(response.json(), indent=2))  # Pretty print the JSON response
        elif response.status_code == 404:
            print(f"Record {document_id} not found.")
        else:
            print(f"Failed to retrieve record {document_id}: {response.status_code} - {response.text}")
    except requests.exceptions.RequestException as e:
        print(f"Error while trying to fetch record {document_id}: {e}")

# Example: Read the document with ID "doc2"
read_one_record("doc2")



Record doc2 retrieved successfully!
{
  "name": "projects/954273464710/locations/global/collections/default_collection/dataStores/contoh1_1735396977358/branches/0/documents/doc2",
  "id": "doc2",
  "schemaId": "default_schema",
  "structData": {
    "id": "doc2",
    "question": "Updated thexxxx question?",
    "category": "Updated the category",
    "answer": "Updated the answer."
  },
  "parentDocumentId": "doc2",
  "indexTime": "2024-12-31T02:59:58.531615Z"
}


**7. Read Multiple Records**

To retrieve several records:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development"  # @param {type:"string"}
DATA_STORE_ID = "contoh1_1735396977358"  # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"

# Fetch access token
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def read_one_record(document_id):
    """
    Fetch a single document's data from the Discovery Engine.
    Args:
        document_id (str): The ID of the document to read.
    """
    if not document_id:
        print("Error: Document ID cannot be empty!")
        return

    url = f"{BASE_URL}/{document_id}"  # API endpoint for the document
    try:
        response = requests.get(url, headers=HEADERS)  # Send GET request
        if response.status_code == 200:
            print(f"Record {document_id} retrieved successfully!")
            print(json.dumps(response.json(), indent=2))  # Pretty print the JSON response
        elif response.status_code == 404:
            print(f"Record {document_id} not found.")
        else:
            print(f"Failed to retrieve record {document_id}: {response.status_code} - {response.text}")
    except requests.exceptions.RequestException as e:
        print(f"Error while trying to fetch record {document_id}: {e}")


def read_multiple_records(document_ids):
    for document_id in document_ids:
        read_one_record(document_id)


read_multiple_records(["doc34", "doc2"])


Record doc34 retrieved successfully!
{
  "name": "projects/954273464710/locations/global/collections/default_collection/dataStores/contoh1_1735396977358/branches/0/documents/doc34",
  "id": "doc34",
  "schemaId": "default_schema",
  "structData": {
    "id": "doc34",
    "category": "Tech",
    "answer": "AI is artificial intelligence.",
    "question": "What is AI?"
  },
  "parentDocumentId": "doc34",
  "indexTime": "2024-12-30T16:31:23.630975Z"
}
Record doc2 retrieved successfully!
{
  "name": "projects/954273464710/locations/global/collections/default_collection/dataStores/contoh1_1735396977358/branches/0/documents/doc2",
  "id": "doc2",
  "schemaId": "default_schema",
  "structData": {
    "category": "Updated the category",
    "question": "Updated thexxxx question?",
    "answer": "Updated the answer.",
    "id": "doc2"
  },
  "parentDocumentId": "doc2",
  "indexTime": "2024-12-31T02:59:58.531615Z"
}


**8. Read All Records**

To retrieve several records:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development"  # @param {type:"string"}
DATA_STORE_ID = "contoh1_1735396977358"  # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"

# Fetch access token
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def read_all_records():
    """
    Fetch all documents from the Discovery Engine.
    """
    url = f"{BASE_URL}"  # API endpoint for listing documents
    records = []
    next_page_token = None

    try:
        while True:
            params = {}
            if next_page_token:
                params["pageToken"] = next_page_token

            response = requests.get(url, headers=HEADERS, params=params)
            if response.status_code == 200:
                data = response.json()
                records.extend(data.get("documents", []))  # Append retrieved documents
                next_page_token = data.get("nextPageToken")

                if not next_page_token:  # No more pages
                    break
            else:
                print(f"Failed to fetch records: {response.status_code} - {response.text}")
                break

        # Print all retrieved records
        if records:
            print(f"Retrieved {len(records)} records:")
            print(json.dumps(records, indent=2))  # Pretty print all records
        else:
            print("No records found.")
    except requests.exceptions.RequestException as e:
        print(f"Error while trying to fetch records: {e}")

# Call the function to read all records
read_all_records()


Retrieved 799 records:
[
  {
    "name": "projects/954273464710/locations/global/collections/default_collection/dataStores/contoh1_1735396977358/branches/0/documents/016b17c388d22ff07c337b69e91d7263",
    "id": "016b17c388d22ff07c337b69e91d7263",
    "schemaId": "default_schema",
    "structData": {
      "question": "\u3054\u56de\u7b54\u3001\u6dfb\u524a\u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3057\u305f\u3002",
      "id": "qa_107",
      "category": "## \u8981\u7d04: \u304a\u5ba2\u69d8\u306f\u56de\u7b54\u3068\u6dfb\u524a\u306b\u611f\u8b1d\u306e\u610f\u3092\u8868\u660e\u3057\u3066\u3044\u307e\u3059\u3002",
      "answer": "\u677e\u4e38\u3055\u3093\u672c\u65e5\u306f\u304a\u5fd9\u3057\u3044\u4e2d\u304a\u6642\u9593\u3044\u305f\u3060\u304d\u307e\u3057\u3066\u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3057\u305f\u3002\u4eca\u5f8c\u30ab\u30ea\u30ad\u30e5\u30e9\u30e0\u3092\u9032\u3081\u3064\u3064\u3001\u52c9\u5f37\u4f1a\u3082\u6d3b\u7528\u3057\u306a\u304c\u3089\u5b9

**9. Delete All Records**

To retrieve several records:

In [None]:
import requests
import json
import subprocess

# Configuration
PROJECT_ID = "monobrain-development"  # @param {type:"string"}
DATA_STORE_ID = "contoh2_1735444434810"  # @param {type:"string"}
BASE_URL = f"https://discoveryengine.googleapis.com/v1beta/projects/{PROJECT_ID}/locations/global/collections/default_collection/dataStores/{DATA_STORE_ID}/branches/0/documents"

# Fetch access token
ACCESS_TOKEN = subprocess.run(["gcloud", "auth", "print-access-token"], capture_output=True, text=True).stdout.strip()
HEADERS = {"Authorization": f"Bearer {ACCESS_TOKEN}", "Content-Type": "application/json"}

def list_all_records():
    """
    Fetch all document IDs from the Discovery Engine.
    Returns:
        list: A list of document IDs.
    """
    url = f"{BASE_URL}"
    document_ids = []
    next_page_token = None

    try:
        while True:
            params = {}
            if next_page_token:
                params["pageToken"] = next_page_token

            response = requests.get(url, headers=HEADERS, params=params)
            if response.status_code == 200:
                data = response.json()
                documents = data.get("documents", [])
                for document in documents:
                    document_ids.append(document["id"])  # Extract the document ID
                next_page_token = data.get("nextPageToken")

                if not next_page_token:  # No more pages
                    break
            else:
                print(f"Failed to list records: {response.status_code} - {response.text}")
                break
    except requests.exceptions.RequestException as e:
        print(f"Error while listing records: {e}")

    return document_ids

def delete_one_record(document_id):
    """
    Delete a single document by its ID.
    Args:
        document_id (str): The ID of the document to delete.
    """
    url = f"{BASE_URL}/{document_id}"
    try:
        response = requests.delete(url, headers=HEADERS)
        if response.status_code == 200:
            print(f"Record {document_id} deleted successfully!")
        elif response.status_code == 404:
            print(f"Record {document_id} not found.")
        else:
            print(f"Failed to delete record {document_id}: {response.status_code} - {response.text}")
    except requests.exceptions.RequestException as e:
        print(f"Error while deleting record {document_id}: {e}")

def delete_all_records():
    """
    Delete all records from the Discovery Engine.
    """
    document_ids = list_all_records()  # Fetch all document IDs
    if not document_ids:
        print("No records found to delete.")
        return

    print(f"Found {len(document_ids)} records. Proceeding to delete them...")
    for document_id in document_ids:
        delete_one_record(document_id)

# Call the function to delete all records
delete_all_records()


Found 796 records. Proceeding to delete them...
Record 016b17c388d22ff07c337b69e91d7263 deleted successfully!
Record 01c316201b7631c399839c9a060ff013 deleted successfully!
Record 01d9e1a21782d65e21a45f0e045c8cbc deleted successfully!
Record 02b3484e6bfbe1e4a0aacac01303fafe deleted successfully!
Record 02e2806f6bc7baaea1fe15696ae1eb5b deleted successfully!
Record 03913a96f9ea8fec113a7dbce14652ca deleted successfully!
Record 040f2a0faaf962a6e8aeb879fbd2f1a0 deleted successfully!
Record 043dfda6e10fd09dce3ba0694dd65b2a deleted successfully!
Record 0487f3acac9de51159067db80d808e0c deleted successfully!
Record 050281b792cfa6785165ae5d28e1c87a deleted successfully!
Record 057930274402e5aecb36d9b8ab39aa20 deleted successfully!
Record 057abdd599f143dbe1279c7832cb6466 deleted successfully!
Record 06096196c6a614e08195038499152b6b deleted successfully!
Record 0642c1eb5cd8c13323ca507c4922827a deleted successfully!
Record 06c508bcde429f4f857eb8d98de96556 deleted successfully!
Record 06cb558881bd0a2