In [1]:
# Cell 1: Imports and Configuration
import requests
import json
import threading
import uuid
import time # For potential delays or timeouts if needed

# Configuration for your Flask API
API_BASE_URL = "http://localhost:5000" # Adjust if your API is on a different host/port

In [2]:
# Cell 2: Helper Function to Get Available Models
def get_available_models(api_base_url):
    """Fetches the list of available models from the API."""
    try:
        response = requests.get(f"{api_base_url}/models")
        response.raise_for_status()  # Raises an exception for HTTP errors
        models = response.json()
        print("Available Models:")
        for model in models:
            print(f"- ID: {model.get('id')}, Name: {model.get('name')}, Type: {model.get('type')}, Source: {model.get('source_type')}")
            if model.get('source_type') == 'local':
                print(f"  Path: {model.get('path')}")
            elif model.get('source_type') == 'hub':
                if model.get('type') == 'gguf':
                    print(f"  Repo ID: {model.get('repo_id')}, Filename: {model.get('filename')}")
                else:
                    print(f"  Hub ID/Path: {model.get('path')}")
            # print(f"  Default Params: {model.get('params')}") # Uncomment for more detail
        return models
    except requests.exceptions.RequestException as e:
        print(f"Error fetching models: {e}")
        return []

# Fetch and display available models when this cell is run
AVAILABLE_MODELS = get_available_models(API_BASE_URL)

Error fetching models: HTTPConnectionPool(host='localhost', port=5000): Max retries exceeded with url: /models (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x00000173BE95F100>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))


In [3]:
# Cell 3: Helper Function to Create a New Session
def create_new_session(api_base_url):
    """Creates a new session via the API and returns the session_id."""
    try:
        response = requests.post(f"{api_base_url}/create-session")
        response.raise_for_status()
        session_data = response.json()
        if session_data.get('status') == 'success' and session_data.get('session_id'):
            print(f"Created new session: {session_data['session_id']}")
            return session_data['session_id']
        else:
            print(f"Failed to create session: {session_data.get('message', 'Unknown error')}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Error creating session: {e}")
        return None

In [4]:
# Cell 4: Helper Function to Ask a Question on a Session (Handles SSE)
def ask_question_on_session(api_base_url, model_id, session_id, user_prompt, system_prompt, 
                            generation_params=None, model_load_params=None, temperature=0.7):
    """
    Sends a question to the chat API for a given session and streams the response.
    Returns the complete assistant response.
    """
    payload = {
        "session_id": session_id,
        "prompt": user_prompt,
        "system_prompt": system_prompt,
        "model_id": model_id,
        "temperature": temperature,  # Base temperature
        "model_specific_params": generation_params if generation_params else {},
        "model_load_params": model_load_params if model_load_params else {}
    }

    full_response_text = ""
    print(f"\n[Session: {session_id}, Model: {model_id}] Asking: {user_prompt[:100]}...")
    if system_prompt:
        print(f"System Prompt: {system_prompt[:100]}...")

    try:
        response = requests.post(f"{api_base_url}/chat", json=payload, stream=True, timeout=300) # Added timeout
        response.raise_for_status()

        for line in response.iter_lines():
            if line:
                decoded_line = line.decode('utf-8')
                if decoded_line.startswith('data: '):
                    try:
                        data_json_str = decoded_line[len('data: '):]
                        data_json = json.loads(data_json_str)
                        
                        if data_json.get('error'):
                            error_msg = f"[API Error for session {session_id}]: {data_json['error']}"
                            print(error_msg)
                            return error_msg # Return error message as response

                        if 'text_chunk' in data_json and not data_json.get('is_final'):
                            full_response_text += data_json['text_chunk']
                            # print(data_json['text_chunk'], end='', flush=True) # For live streaming in notebook
                        
                        if data_json.get('is_final'):
                            if 'full_response' in data_json: # Use server's full response if available
                                full_response_text = data_json['full_response']
                            # print("\n--- End of Stream ---")
                            break 
                    except json.JSONDecodeError:
                        print(f"\nWarning: Could not decode JSON from stream: {decoded_line}")
        
        print(f"[Session: {session_id}] Full Response: {full_response_text[:100]}...")
        return full_response_text.strip()

    except requests.exceptions.Timeout:
        error_msg = f"[API Timeout for session {session_id} while asking: {user_prompt[:50]}...]"
        print(error_msg)
        return error_msg
    except requests.exceptions.RequestException as e:
        error_msg = f"[API Request Error for session {session_id}]: {e}"
        print(error_msg)
        return error_msg

In [5]:
# Cell 5: Worker Function to Process a Query Group
def worker_process_query_group(query_group_config, model_id, api_base_url, all_results_list, results_lock):
    """
    Worker function for a thread. Processes a group of questions.
    Each query_group runs in its own session.
    """
    group_id = query_group_config["group_id"]
    print(f"Thread started for Query Group: {group_id}")

    session_id = create_new_session(api_base_url)
    if not session_id:
        print(f"Failed to create session for group {group_id}. Aborting this group.")
        with results_lock:
            all_results_list.append({
                "group_id": group_id,
                "session_id": None,
                "status": "failed_session_creation",
                "results": {}
            })
        return

    system_prompt = query_group_config.get("system_prompt", "")
    generation_params = query_group_config.get("generation_params", {})
    model_load_params = query_group_config.get("model_load_params", {})
    default_temperature = query_group_config.get("temperature", 0.7) # Can be set per group

    group_results_data = {}

    # The backend's /chat endpoint handles history accumulation based on session_id.
    # We send the system_prompt with each call in this setup, 
    # or rely on the backend to use the first system_prompt for the session.
    # The current backend app.py prepends the system_prompt if provided in the payload.
    for q_item in query_group_config["questions_and_keys"]:
        user_question = q_item["question"]
        answer_key = q_item["key"]
        
        answer = ask_question_on_session(
            api_base_url, 
            model_id, 
            session_id, 
            user_question, 
            system_prompt, # System prompt is associated with the session by the backend
            generation_params,
            model_load_params,
            temperature=default_temperature
        )
        group_results_data[answer_key] = answer
        # time.sleep(1) # Optional: small delay between questions in the same session if needed

    with results_lock:
        all_results_list.append({
            "group_id": group_id,
            "session_id": session_id,
            "status": "completed",
            "results": group_results_data
        })
    print(f"Thread finished for Query Group: {group_id}")

In [7]:
license='''
        California
        DRIVER LICENSe
        dl 11234568
        CLASS C
        EXP 08/31/2014
        END NONE
        LNCARDHOLDER FNIMA
        2570 24TH STREET ANYTOWN, CA 95818
        doB 08/31/1977 RSTR NONE
        08311977
        VETERAN
        Cordhslde
        SEX F HGT 5'-05"
        HAIR BRN WGT 125 lb
        EYES BRN
        DD 00/00/0000NNNAN/ANFD/YY
        ISS 08/31/2009
'''

In [27]:
# Cell 6: Main Execution Block (Single Session per License, License in First Prompt, Execution Time)

# --- Configuration ---
CHOSEN_MODEL_ID = "gguf_local_Llama-3_2-1B-Instruct-Q8_0" # <--- !!! SET YOUR MODEL ID HERE !!!

# --- License Data ---
license_text_1 = """
        California
        DRIVER LICENSe
        dl 11234568
        CLASS C
        EXP 08/31/2014
        END NONE
        LNCARDHOLDER FNIMA
        2570 24TH STREET ANYTOWN, CA 95818
        doB 08/31/1977 RSTR NONE
        08311977
        VETERAN
        Cordhslde
        SEX F HGT 5'-05"
        HAIR BRN WGT 125 lb
        EYES BRN
        DD 00/00/0000NNNAN/ANFD/YY
        ISS 08/31/2009
"""

# --- Query Structure (Only one license document now) ---
QUERY_DOCUMENTS = [
    {
        "document_id": "california_license_1",
        "license_text": license_text_1,
        "system_prompt": "Share answer in a single word in JSON ONLY. DON'T ADD ANY OTHER WORD/REMARK/NOTE WITH THE ANSWER.",
        "questions_and_keys": [
            {"question": "What is the FULL address?", "key": "full_address", "is_first_question": True},
            {"question": "What's the state?", "key": "state"},
            {"question": "What's the gender (SEX)?", "key": "gender"},
            {"question": "What's the FULL NAME?", "key": "full_name"},

            # {"question": "What is the Date of Birth (doB)?", "key": "date_of_birth"},
            # {"question": "What is the License Expiry Date (EXP)?", "key": "expiry_date"}
        ],
        "temperature": 0.01,
        "generation_params": {"max_tokens": 150},
        "model_load_params": {"n_gpu_layers": -1}
    }
]

# --- Worker Function (Sequential Processing for a single document/license) ---
def process_license_document_sequentially(document_config, model_id, api_base_url):
    doc_id = document_config["document_id"]
    current_license_text = document_config["license_text"]
    system_prompt = document_config["system_prompt"]
    temperature = document_config.get("temperature", 0.1)
    generation_params = document_config.get("generation_params", {})
    model_load_params = document_config.get("model_load_params", {})

    doc_start_time = time.time() # Start timer for this document
    print(f"\n--- Processing Document: {doc_id} ---")

    session_id = create_new_session(api_base_url)
    if not session_id:
        print(f"Failed to create session for {doc_id}. Skipping this document.")
        return {
            "document_id": doc_id,
            "session_id": None,
            "status": "failed_session_creation",
            "results": {}
        }

    document_results_data = {}

    for i, q_item in enumerate(document_config["questions_and_keys"]):
        question_start_time = time.time() # Start timer for this question
        user_question_text = q_item["question"]
        answer_key = q_item["key"]
        
        effective_user_prompt = ""
        if q_item.get("is_first_question", False):
            effective_user_prompt = f"{user_question_text}\n\nLicense Text:\n{current_license_text}"
        else:
            effective_user_prompt = user_question_text
        
        answer = ask_question_on_session(
            api_base_url,
            model_id,
            session_id,
            effective_user_prompt,
            system_prompt,
            generation_params,
            model_load_params,
            temperature=temperature
        )
        document_results_data[answer_key] = answer
        question_end_time = time.time()
        print(f"  Question '{user_question_text[:50]}...' processed in {question_end_time - question_start_time:.2f}s")
        # time.sleep(0.1) # Optional small delay

    doc_end_time = time.time()
    print(f"--- Finished processing Document: {doc_id} in {doc_end_time - doc_start_time:.2f}s ---")
    return {
        "document_id": doc_id,
        "session_id": session_id,
        "status": "completed",
        "results": document_results_data,
        "processing_time_seconds": doc_end_time - doc_start_time
    }

# --- Execution ---
if 'AVAILABLE_MODELS' not in globals():
    print("Warning: AVAILABLE_MODELS not found. Running Cell 2 to fetch models is recommended.")
    AVAILABLE_MODELS = []

if CHOSEN_MODEL_ID == "default_model_id_placeholder" and not AVAILABLE_MODELS:
    print("CRITICAL: CHOSEN_MODEL_ID is a placeholder, or AVAILABLE_MODELS is empty.")
    print("Please ensure your Flask server is running, Cell 2 ran successfully, and update CHOSEN_MODEL_ID if needed.")
else:
    print(f"Using Model ID: {CHOSEN_MODEL_ID}")

    all_collected_results = []
    overall_start_time = time.time() # Start timer for the entire batch

    for document_config in QUERY_DOCUMENTS:
        result_item = process_license_document_sequentially(
            document_config,
            CHOSEN_MODEL_ID,
            API_BASE_URL
        )
        all_collected_results.append(result_item)

    overall_end_time = time.time()
    total_processing_time = overall_end_time - overall_start_time
    print(f"\n--- All documents processed sequentially in {total_processing_time:.2f} seconds ---")

    # --- Display Results ---
    print("\n--- Collected Results ---")
    for item in all_collected_results:
        if item: 
            print(f"\nDocument ID: {item.get('document_id', 'N/A')}")
            print(f"Session ID: {item.get('session_id', 'N/A')}")
            print(f"Status: {item.get('status', 'N/A')}")
            if "processing_time_seconds" in item:
                print(f"Processing Time for Doc: {item['processing_time_seconds']:.2f}s")

# In Cell 6, within the "--- Display Results ---" section:

            if item.get("results"):
                for key, value in item["results"].items():
                    is_valid_json_format = False
                    parsed_json_value = None
                    final_display_value = value # Default to raw value

                    if isinstance(value, str): # Only attempt to parse if it's a string
                        try:
                            cleaned_value = value.strip()
                            if cleaned_value.startswith("```json"):
                                cleaned_value = cleaned_value[len("```json"):].strip()
                            if cleaned_value.startswith("```"):
                                cleaned_value = cleaned_value[len("```"):].strip()
                            if cleaned_value.endswith("```"):
                                cleaned_value = cleaned_value[:-len("```")].strip()
                            
                            # Now, try to parse the cleaned string as JSON
                            parsed_json_value = json.loads(cleaned_value)
                            
                            # If the parsed value is a dictionary and contains our original key,
                            # and the prompt asked for {"key": "value"}, we might want the inner value.
                            # However, the prompt "Example: {\"key\": \"value\"}" implies the model should return
                            # a dict where 'key' is actually the 'answer_key' from your config.
                            # Example: if answer_key is "full_name", model should return {"full_name": "FNIMA"}
                            
                            # For now, let's assume the model is trying to return a JSON object
                            # as per the system prompt's example.
                            final_display_value = json.dumps(parsed_json_value) # Pretty print the whole JSON
                            is_valid_json_format = True
                        except (json.JSONDecodeError, TypeError, AttributeError):
                            # Value was a string but not valid JSON, or not a string at all
                            final_display_value = value # Keep it raw
                            is_valid_json_format = False
                    
                    # Print statement
                    if is_valid_json_format:
                        print(f"  '{key}': {final_display_value} (Returned as JSON)")
                    else:
                        print(f"  '{key}': '{final_display_value}' (Returned as Raw String, not the requested JSON format)")
            else:
                print("  No results for this document.")
        else:
            print("\nEncountered a None result item, skipping display for it.")

    # Save to JSON file
    # results_filename = "batch_license_extraction_single_session_timed.json"
    # with open(results_filename, "w") as f:
    #     json.dump(all_collected_results, f, indent=2)
    # print(f"\nResults saved to {results_filename}")

Using Model ID: gguf_local_Llama-3_2-1B-Instruct-Q8_0

--- Processing Document: california_license_1 ---
Created new session: fd26d80a-5148-4a3e-9e9b-5aa7e93b0cff

[Session: fd26d80a-5148-4a3e-9e9b-5aa7e93b0cff, Model: gguf_local_Llama-3_2-1B-Instruct-Q8_0] Asking: What is the FULL address?

License Text:

        California
        DRIVER LICENSe
        dl 11234...
System Prompt: Share answer in a single word in JSON ONLY. DON'T ADD ANY OTHER WORD/REMARK/NOTE WITH THE ANSWER....
[Session: fd26d80a-5148-4a3e-9e9b-5aa7e93b0cff] Full Response: "2570 24TH STREET ANYTOWN, CA 95818"...
  Question 'What is the FULL address?...' processed in 3.26s

[Session: fd26d80a-5148-4a3e-9e9b-5aa7e93b0cff, Model: gguf_local_Llama-3_2-1B-Instruct-Q8_0] Asking: What's the state?...
System Prompt: Share answer in a single word in JSON ONLY. DON'T ADD ANY OTHER WORD/REMARK/NOTE WITH THE ANSWER....
[Session: fd26d80a-5148-4a3e-9e9b-5aa7e93b0cff] Full Response: California...
  Question 'What's the state?...'

In [28]:
# Cell 6: Main Execution Block (Single Session per License, License in First Prompt, Execution Time)

# --- Configuration ---
CHOSEN_MODEL_ID = "gguf_local_Llama-3_2-1B-Instruct-UD-Q8_K_XL" # <--- !!! SET YOUR MODEL ID HERE !!!

# --- License Data ---
license_text_1 = """
        California
        DRIVER LICENSe
        dl 11234568
        CLASS C
        EXP 08/31/2014
        END NONE
        LNCARDHOLDER FNIMA
        2570 24TH STREET ANYTOWN, CA 95818
        doB 08/31/1977 RSTR NONE
        08311977
        VETERAN
        Cordhslde
        SEX F HGT 5'-05"
        HAIR BRN WGT 125 lb
        EYES BRN
        DD 00/00/0000NNNAN/ANFD/YY
        ISS 08/31/2009
"""

# --- Query Structure (Only one license document now) ---
QUERY_DOCUMENTS = [
    {
        "document_id": "california_license_1",
        "license_text": license_text_1,
        "system_prompt": "Share answer in a single word in JSON ONLY. DON'T ADD ANY OTHER WORD/REMARK/NOTE WITH THE ANSWER.",
        "questions_and_keys": [
            {"question": "What is the FULL address?", "key": "full_address", "is_first_question": True},
            {"question": "What's the state?", "key": "state"},
            {"question": "What's the gender (SEX)?", "key": "gender"},
            {"question": "What's the FULL NAME?", "key": "full_name"},

            # {"question": "What is the Date of Birth (doB)?", "key": "date_of_birth"},
            # {"question": "What is the License Expiry Date (EXP)?", "key": "expiry_date"}
        ],
        "temperature": 0.01,
        "generation_params": {"max_tokens": 150},
        "model_load_params": {"n_gpu_layers": -1}
    }
]

# --- Worker Function (Sequential Processing for a single document/license) ---
def process_license_document_sequentially(document_config, model_id, api_base_url):
    doc_id = document_config["document_id"]
    current_license_text = document_config["license_text"]
    system_prompt = document_config["system_prompt"]
    temperature = document_config.get("temperature", 0.1)
    generation_params = document_config.get("generation_params", {})
    model_load_params = document_config.get("model_load_params", {})

    doc_start_time = time.time() # Start timer for this document
    print(f"\n--- Processing Document: {doc_id} ---")

    session_id = create_new_session(api_base_url)
    if not session_id:
        print(f"Failed to create session for {doc_id}. Skipping this document.")
        return {
            "document_id": doc_id,
            "session_id": None,
            "status": "failed_session_creation",
            "results": {}
        }

    document_results_data = {}

    for i, q_item in enumerate(document_config["questions_and_keys"]):
        question_start_time = time.time() # Start timer for this question
        user_question_text = q_item["question"]
        answer_key = q_item["key"]
        
        effective_user_prompt = ""
        if q_item.get("is_first_question", False):
            effective_user_prompt = f"{user_question_text}\n\nLicense Text:\n{current_license_text}"
        else:
            effective_user_prompt = user_question_text
        
        answer = ask_question_on_session(
            api_base_url,
            model_id,
            session_id,
            effective_user_prompt,
            system_prompt,
            generation_params,
            model_load_params,
            temperature=temperature
        )
        document_results_data[answer_key] = answer
        question_end_time = time.time()
        print(f"  Question '{user_question_text[:50]}...' processed in {question_end_time - question_start_time:.2f}s")
        # time.sleep(0.1) # Optional small delay

    doc_end_time = time.time()
    print(f"--- Finished processing Document: {doc_id} in {doc_end_time - doc_start_time:.2f}s ---")
    return {
        "document_id": doc_id,
        "session_id": session_id,
        "status": "completed",
        "results": document_results_data,
        "processing_time_seconds": doc_end_time - doc_start_time
    }

# --- Execution ---
if 'AVAILABLE_MODELS' not in globals():
    print("Warning: AVAILABLE_MODELS not found. Running Cell 2 to fetch models is recommended.")
    AVAILABLE_MODELS = []

if CHOSEN_MODEL_ID == "default_model_id_placeholder" and not AVAILABLE_MODELS:
    print("CRITICAL: CHOSEN_MODEL_ID is a placeholder, or AVAILABLE_MODELS is empty.")
    print("Please ensure your Flask server is running, Cell 2 ran successfully, and update CHOSEN_MODEL_ID if needed.")
else:
    print(f"Using Model ID: {CHOSEN_MODEL_ID}")

    all_collected_results = []
    overall_start_time = time.time() # Start timer for the entire batch

    for document_config in QUERY_DOCUMENTS:
        result_item = process_license_document_sequentially(
            document_config,
            CHOSEN_MODEL_ID,
            API_BASE_URL
        )
        all_collected_results.append(result_item)

    overall_end_time = time.time()
    total_processing_time = overall_end_time - overall_start_time
    print(f"\n--- All documents processed sequentially in {total_processing_time:.2f} seconds ---")

    # --- Display Results ---
    print("\n--- Collected Results ---")
    for item in all_collected_results:
        if item: 
            print(f"\nDocument ID: {item.get('document_id', 'N/A')}")
            print(f"Session ID: {item.get('session_id', 'N/A')}")
            print(f"Status: {item.get('status', 'N/A')}")
            if "processing_time_seconds" in item:
                print(f"Processing Time for Doc: {item['processing_time_seconds']:.2f}s")

# In Cell 6, within the "--- Display Results ---" section:

            if item.get("results"):
                for key, value in item["results"].items():
                    is_valid_json_format = False
                    parsed_json_value = None
                    final_display_value = value # Default to raw value

                    if isinstance(value, str): # Only attempt to parse if it's a string
                        try:
                            cleaned_value = value.strip()
                            if cleaned_value.startswith("```json"):
                                cleaned_value = cleaned_value[len("```json"):].strip()
                            if cleaned_value.startswith("```"):
                                cleaned_value = cleaned_value[len("```"):].strip()
                            if cleaned_value.endswith("```"):
                                cleaned_value = cleaned_value[:-len("```")].strip()
                            
                            # Now, try to parse the cleaned string as JSON
                            parsed_json_value = json.loads(cleaned_value)
                            
                            # If the parsed value is a dictionary and contains our original key,
                            # and the prompt asked for {"key": "value"}, we might want the inner value.
                            # However, the prompt "Example: {\"key\": \"value\"}" implies the model should return
                            # a dict where 'key' is actually the 'answer_key' from your config.
                            # Example: if answer_key is "full_name", model should return {"full_name": "FNIMA"}
                            
                            # For now, let's assume the model is trying to return a JSON object
                            # as per the system prompt's example.
                            final_display_value = json.dumps(parsed_json_value) # Pretty print the whole JSON
                            is_valid_json_format = True
                        except (json.JSONDecodeError, TypeError, AttributeError):
                            # Value was a string but not valid JSON, or not a string at all
                            final_display_value = value # Keep it raw
                            is_valid_json_format = False
                    
                    # Print statement
                    if is_valid_json_format:
                        print(f"  '{key}': {final_display_value} (Returned as JSON)")
                    else:
                        print(f"  '{key}': '{final_display_value}' (Returned as Raw String, not the requested JSON format)")
            else:
                print("  No results for this document.")
        else:
            print("\nEncountered a None result item, skipping display for it.")

    # Save to JSON file
    # results_filename = "batch_license_extraction_single_session_timed.json"
    # with open(results_filename, "w") as f:
    #     json.dump(all_collected_results, f, indent=2)
    # print(f"\nResults saved to {results_filename}")

Using Model ID: gguf_local_Llama-3_2-1B-Instruct-UD-Q8_K_XL

--- Processing Document: california_license_1 ---
Created new session: fe34dde0-30f4-491d-b354-a5923fb7dc72

[Session: fe34dde0-30f4-491d-b354-a5923fb7dc72, Model: gguf_local_Llama-3_2-1B-Instruct-UD-Q8_K_XL] Asking: What is the FULL address?

License Text:

        California
        DRIVER LICENSe
        dl 11234...
System Prompt: Share answer in a single word in JSON ONLY. DON'T ADD ANY OTHER WORD/REMARK/NOTE WITH THE ANSWER....
[Session: fe34dde0-30f4-491d-b354-a5923fb7dc72] Full Response: "2570 24TH STREET ANYTOWN, CA 95818"...
  Question 'What is the FULL address?...' processed in 10.35s

[Session: fe34dde0-30f4-491d-b354-a5923fb7dc72, Model: gguf_local_Llama-3_2-1B-Instruct-UD-Q8_K_XL] Asking: What's the state?...
System Prompt: Share answer in a single word in JSON ONLY. DON'T ADD ANY OTHER WORD/REMARK/NOTE WITH THE ANSWER....
[Session: fe34dde0-30f4-491d-b354-a5923fb7dc72] Full Response: California...
  Question 'Wh