In [1]:
import os

# Environment variable setup
GROCLAKE_API_KEY = 'ea5d2f1c4608232e07d3aa3d998e5135'
GROCLAKE_ACCOUNT_ID = 'b3920ccf5094424a74ca0aec2b66dfcb'

os.environ['GROCLAKE_API_KEY'] = GROCLAKE_API_KEY
os.environ['GROCLAKE_ACCOUNT_ID'] = GROCLAKE_ACCOUNT_ID


In [2]:
from groclake.vectorlake import VectorLake
from groclake.datalake import DataLake
from groclake.modellake import ModelLake

try:
    # Initialize VectorLake
    vectorlake = VectorLake()
    vector_create = vectorlake.create()
    vectorlake_id = vector_create["vectorlake_id"]
    print(f"VectorLake created with ID: {vectorlake_id}")

    # Initialize DataLake
    datalake = DataLake()
    datalake_create = datalake.create()
    datalake_id = datalake_create["datalake_id"]
    print(f"DataLake created with ID: {datalake_id}")

except Exception as e:
    print("Error during VectorLake or DataLake creation:", str(e))


VectorLake created with ID: nbxpgn3qiuvju058
DataLake created with ID: a6n31j7q6ufe6yc1


In [3]:

try:
    # Prepare payload for pushing the document
    payload_push = {
        "datalake_id": datalake_id,
        "document_type": "url",
        "document_data": "https://drive.google.com/uc?export=download&id=1lVln94gqJKUfHXh07iAw6PSnz85s9vUf"   #example document of greenfield City
    }

    # Push the document
    data_push = datalake.push(payload_push)
    document_id = data_push.get("document_id")

    if not document_id:
        raise ValueError("Document ID not found in the push response.")

    print(f"Document pushed successfully. Document ID: {document_id}")

except Exception as e:
    print("Error while pushing document:", str(e))


Document pushed successfully. Document ID: 72294276ee2a4c5b


In [4]:
try:
    # Prepare payload for fetching the document
    payload_fetch = {
        "document_id": document_id,
        "datalake_id": datalake_id,
        "fetch_format": "chunk",
        "chunk_size": "500"
    }

    # Fetch the document
    data_fetch = datalake.fetch(payload_fetch)
    document_chunks = data_fetch.get("document_data", [])

    if not document_chunks:
        raise ValueError("No document data found.")

    print(f"Document fetched successfully. Total chunks: {len(document_chunks)}")

    # Print each chunk and its index
    for index, chunk in enumerate(document_chunks):
        print(f"Chunk {index + 1}: {chunk}")

except Exception as e:
    print("Error while fetching document:", str(e))

Document fetched successfully. Total chunks: 29
Chunk 1: :: Result No.Sample Reported On MR No. Sample Received On :Referred By:: Age / GenderName YASH SAMEER SAWANT 19 Year(s) /Male Dr. DIPANJAN  HALDAR - Reg No.  2015/08/449605:06:23 PM 26/02/2024 723733 LB-2024-0087054HAEMATOLOGY / IP No. 231677Whole Blood Specimen : Bed No. 0309303 Ward No. :DAY CARESample Registered On : :26/02/2024 03:34:40 PM : /26/02/2024 03:33:40 PM : : Investigations Result Methods Unit Biological Ref. Interval Complete Blood Count Hemoglobin SLS 4.7 g/dl 13.000 - 18.000 Red
Chunk 2:  Cell Count DC Detection 3.53 10^6/ul 4.500 - 6.500 Hematocrit Pulse Detection 20.6 % 40.000 - 54.000 MCV Calculated 58.4 fl 76.000 - 96.000 MCH Calculated 13.3 pg 27.000 - 32.000 MCHC Calculated 22.8 g/dl 32.000 - 35.000 RDW Calculated 22.5 % 11.500 - 14.500 WBC Count Flowcytometry 8.19 10^3/ul 4.000 - 11.000 Platelet Count Impedence 414 10^3/ul 150.000 - 450.000 MPV Impedence 9.6 fl 4.000 - 11.000 Diff. WBC Count Neutrophils Fl

In [5]:
try:
    for idx, chunk in enumerate(document_chunks):
        print(f"Processing chunk {idx + 1}: {chunk}")

        # Generate vector for the chunk
        vector_doc = vectorlake.generate(chunk)
        vector_chunk = vector_doc.get("vector")

        if not vector_chunk:
            raise ValueError(f"Vector generation failed for chunk {idx + 1}.")

        # Prepare payload for pushing the vector
        vectorlake_push_request = {
            "vector": vector_chunk,
            "vectorlake_id": vectorlake_id,
            "document_text": chunk,
            "vector_type": "text",
            "metadata": {}
        }

        # Push vector to VectorLake
        push_response = vectorlake.push(vectorlake_push_request)
        print(f"Push response for chunk {idx + 1}: {push_response}")

except Exception as e:
    print("Error while processing and pushing chunks:", str(e))

Processing chunk 1: :: Result No.Sample Reported On MR No. Sample Received On :Referred By:: Age / GenderName YASH SAMEER SAWANT 19 Year(s) /Male Dr. DIPANJAN  HALDAR - Reg No.  2015/08/449605:06:23 PM 26/02/2024 723733 LB-2024-0087054HAEMATOLOGY / IP No. 231677Whole Blood Specimen : Bed No. 0309303 Ward No. :DAY CARESample Registered On : :26/02/2024 03:34:40 PM : /26/02/2024 03:33:40 PM : : Investigations Result Methods Unit Biological Ref. Interval Complete Blood Count Hemoglobin SLS 4.7 g/dl 13.000 - 18.000 Red
Push response for chunk 1: {'vector_id': 1117}
Processing chunk 2:  Cell Count DC Detection 3.53 10^6/ul 4.500 - 6.500 Hematocrit Pulse Detection 20.6 % 40.000 - 54.000 MCV Calculated 58.4 fl 76.000 - 96.000 MCH Calculated 13.3 pg 27.000 - 32.000 MCHC Calculated 22.8 g/dl 32.000 - 35.000 RDW Calculated 22.5 % 11.500 - 14.500 WBC Count Flowcytometry 8.19 10^3/ul 4.000 - 11.000 Platelet Count Impedence 414 10^3/ul 150.000 - 450.000 MPV Impedence 9.6 fl 4.000 - 11.000 Diff. WBC

In [6]:
try:
    # Generate vector for the search query
    search_query = "Recylcing Rate"
    vector_search_data = vectorlake.generate(search_query)
    search_vector = vector_search_data.get("vector")

    if not search_vector:
        raise ValueError("Search vector generation failed.")

    # Prepare payload for the search
    search_payload = {
        "vector": search_vector,
        "vectorlake_id": vectorlake_id,
        "vector_type": "text",
    }

    # Perform the search
    search_response = vectorlake.search(search_payload)
    print("Search results:", search_response)

except Exception as e:
    print("Error while performing vector search:", str(e))


Search results: {'results': [{'vector_document': ' Cell Count DC Detection 3.53 10^6/ul 4.500 - 6.500 Hematocrit Pulse Detection 20.6 % 40.000 - 54.000 MCV Calculated 58.4 fl 76.000 - 96.000 MCH Calculated 13.3 pg 27.000 - 32.000 MCHC Calculated 22.8 g/dl 32.000 - 35.000 RDW Calculated 22.5 % 11.500 - 14.500 WBC Count Flowcytometry 8.19 10^3/ul 4.000 - 11.000 Platelet Count Impedence 414 10^3/ul 150.000 - 450.000 MPV Impedence 9.6 fl 4.000 - 11.000 Diff. WBC Count Neutrophils Flowcyto/Manual 62 % 40.000 - 70.000 Eosinophils Flowcyto/Manual 02 %', 'metadata': {}}, {'vector_document': ':DAY CARESample Registered On : :26/02/2024 03:34:40 PM : /26/02/2024 03:33:40 PM : : Investigations Result Methods Unit Biological Ref. Interval Reticulocytes* Supravital Stain(Methylene  Blue)1.6 % Reticulocytes : Reference Range:-   Adults :- 0.2 - 2.0 %   Infants :- 2.5 - 6.5 %  Cord Blood :- 1.0 - 2.0 % SMITA JADHAVChecked By  DR. SUPRIYA DUTTA M.B.B.S, M.D.(PATH)(MMC NO:  2001/02/720) Partial Reprodu

In [7]:
try:
    # Generate vector for the search query
    search_query = "Tell me about the patient's condition."
    vector_search_data = vectorlake.generate(search_query)
    search_vector = vector_search_data.get("vector")

    if not search_vector:
        raise ValueError("Search vector generation failed.")

    # Prepare the vector search request with metadata
    vectorlake_search_request = {
        "vector": search_vector,
        "vector_type": "text",
        "vector_document": search_query,
        "metadata": {
            "key": "value"  # Include custom metadata as needed
        }
    }

    print("VectorLake Search Request:", vectorlake_search_request)

    # Perform vector search in VectorLake
    search_response = vectorlake.search(vectorlake_search_request)
    print("Search Response:", search_response)

    # Extract search results from the response
    search_results = search_response.get("results", [])
    if not search_results:
        raise ValueError("No relevant search results found.")

    # Combine relevant vector documents into enriched context
    enriched_context = []
    token_count = 0

    for result in search_results:
        doc_content = result.get("vector_document", "")
        doc_tokens = len(doc_content.split())

        if token_count + doc_tokens <= 1000:  # Adjust limit dynamically
            enriched_context.append(doc_content)
            token_count += doc_tokens
        else:
            break  # Stop when the token limit is reached

    enriched_context = " ".join(enriched_context)
    print("Enriched Context:", enriched_context)

    # Construct the ModelLake query with enriched context
    payload = {
        "messages": [
            {"role": "system", "content": "You are a helpful assistant."},
            {
                "role": "user",
                "content": f"Using the following context from retrieved documents: {enriched_context}, "
                           f"please provide a detailed explanation."
            }
        ],
        "token_size": 3000
    }

    # Query ModelLake for a response
    try:
        chat_response = ModelLake().chat_complete(payload)
        # Extract the assistant's answer
        answer = chat_response.get("answer", "No answer received from ModelLake.")
        print("Chat Answer:", answer)
    except Exception as e:
        print("An error occurred with ModelLake:", str(e))

except Exception as e:
    print("Error during vector search or processing:", str(e))


VectorLake Search Request: {'vector': [-0.009455756284296513, 0.053259097039699554, -0.05022736266255379, -0.038807064294815063, 0.03770303726196289, 0.05498025566339493, 0.011235513724386692, -0.013021600432693958, -0.026190418750047684, 0.03654191270470619, 0.03309577330946922, 0.047452956438064575, 0.014454667456448078, 0.00890774093568325, 0.029007866978645325, -0.02002837508916855, 0.004240773152559996, -0.043437980115413666, -0.049521248787641525, -0.052060987800359726, 0.012724868021905422, -0.025905460119247437, 0.029691753908991814, -0.00654886569827795, -0.016061384230852127, -0.0167784933000803, -0.008466691710054874, -0.03607962653040886, 0.0625627189874649, -0.07193291932344437, 0.048593178391456604, -0.01156935840845108, 0.03245991840958595, -0.01564394310116768, 0.05795184150338173, 0.005809191148728132, 0.0056205675937235355, 0.0049143219366669655, 0.0135997598990798, -0.09197153896093369, -0.012290893122553825, -0.00232896045781672, -0.024971093982458115, -0.0088644921