In [1]:
import os
import torch
import pandas as pd
import oracledb
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from huggingface_hub import login
from dotenv import load_dotenv

load_dotenv()

device = "cuda" if torch.cuda.is_available() else "cpu"

# Ensure less fragmentation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

login(token=os.getenv("HF_TOKEN"))
print(device)

  from .autonotebook import tqdm as notebook_tqdm
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


cuda


In [None]:
# Model ID
model_id = "RedHatAI/Mistral-7B-Instruct-v0.3-quantized.w4a16"

# Load tokenizer safely
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    trust_remote_code=True,
    use_fast=False
)

# Load model with memory-efficient settings
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",  # Will place parts on available GPU or CPU automatically
    trust_remote_code=True
)

# Load pipeline with device fallback
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1,
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)

`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Found modules on cpu/disk. Using Exllama/Exllamav2 backend requires all the modules to be on GPU. Setting `disable_exllama=True`


RuntimeError: no device index

In [None]:
# Setup Oracle DB connection
DB_CONFIG = {
    "host": os.getenv("DB_HOST"),
    "port": os.getenv("DB_PORT"),
    "service_name": os.getenv("DB_SERVICE"),
    "user": os.getenv("DB_USER"),
    "password": os.getenv("DB_PASSWORD")
}

dsn = f"{DB_CONFIG['host']}:{DB_CONFIG['port']}/{DB_CONFIG['service_name']}"
connection = oracledb.connect(user=DB_CONFIG['user'], password=DB_CONFIG['password'], dsn=dsn)
cursor = connection.cursor()

In [None]:
# SQL runner function
def run_sql(query: str):
    try:
        cursor.execute(query)
        columns = [col[0] for col in cursor.description]
        data = cursor.fetchall()
        df = pd.DataFrame(data, columns=columns)
        return df.head(5).to_markdown(index=False)
    except Exception as e:
        return f"[SQL Error] {str(e)}"

In [None]:
# Read initial prompt from file
with open("initial_prompt.txt", "r") as f:
    history = f.read().strip()

In [None]:
# Query tracking
executed_queries = set()

# Step loop
for step in range(5):
    print(f"\n--- Step {step + 1} ---")

    response = pipe(
        history,
        max_new_tokens=256,
        do_sample=True,
        temperature=0.7,
        pad_token_id=tokenizer.eos_token_id
    )[0]["generated_text"]

    print(f"LLM Response:\n{response}")

    # Match response types
    sql_match = re.search(r"SQL:\s*(```sql)?\s*(SELECT .*?)(```)?\s*$", response, re.DOTALL | re.IGNORECASE)
    explanation_match = re.search(r"Explanation:\s*(.*?)\s*(SQL:|$)", response, re.DOTALL | re.IGNORECASE)
    error_match = re.search(r"Error:\s*(.*?)\s*(SQL:|Explanation:|$)", response, re.DOTALL | re.IGNORECASE)

    # Explanation block
    if explanation_match:
        explanation = explanation_match.group(1).strip()
        print(f"\nExplanation:\n{explanation}")
        history += f"\n\nExplanation: {explanation}"

    # Error block
    if error_match:
        error = error_match.group(1).strip()
        print(f"\n[SQL Error]\n{error}")
        history += f"\n\nError: {error}"
        break

    # SQL block
    if sql_match:
        query = sql_match.group(2).strip().rstrip(";")
        if query in executed_queries:
            print("[Repeated query detected. Ending exploration.]")
            break
        executed_queries.add(query)

        output = run_sql(query)
        print("\nQuery Result Preview:\n", output)
        history += f"\n\nSQL: {query}\nResult Preview:\n{output}"
    else:
        print("[No valid SQL found. Ending interaction.]")
        break

    history += "\n\nNext?"

# Optionally save new history for reloading
with open("history_prompt.txt", "w") as f:
    f.write(history)




--- Step 1 ---


OutOfMemoryError: CUDA out of memory. Tried to allocate 2.94 GiB. GPU 0 has a total capacity of 7.62 GiB of which 1.60 GiB is free. Including non-PyTorch memory, this process has 5.79 GiB memory in use. Of the allocated memory 5.62 GiB is allocated by PyTorch, and 57.66 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

: 