In [1]:
import os
import sys
import torch
import re

# Add the parent directory to the path
sys.path.append(os.path.abspath(".."))

# Local imports
from tokenizer.tokenizer import get_tokenizer
from model.transformer_block import Transformer
from dataset.qa_dataset import QADataset

# 1) Hyperparameters
MAX_LEN      = 512
DEVICE       = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_PATH   = r"C:\Users\Admin\Desktop\LLM_QA\training\data\processed\qa_transformer.pt"

# 2) Prepare tokenizer
tokenizer = get_tokenizer()

# 3) Load the saved model
model = Transformer(vocab_size=tokenizer.vocab_size).to(DEVICE)
model.load_state_dict(torch.load(MODEL_PATH))
model.eval()

# 4) Inference function with general and rephrased support
def predict_answer(question, context):
    q_lower = question.strip().lower()
    stopwords = {"the", "in", "of", "a", "an", "is", "and", "water"}
    keyword = ""
    rephrased_question = question  # Default: use original question

    # Handle "importance of ..." phrasing
    if "importance of" in q_lower:
        m = re.search(r"importance of ([\w\s]+?)(?: in aquaponics)?\??$", q_lower)
        if m:
            cand = m.group(1).strip()
            if cand and cand.lower() not in stopwords and len(cand.split()) == 1:
                keyword = cand
                rephrased_question = f"Why is {keyword} important in aquaponics?"
            else:
                return question, f'There is no defined “importance” for "{cand}" in aquaponics.'
        else:
            return question, "No specific keyword found to answer that question."

    # Encode the (possibly rephrased) question and context
    enc = tokenizer(
        rephrased_question,
        context,
        max_length=MAX_LEN,
        truncation=True,
        padding="max_length",
        return_tensors="pt"
    )
    input_ids = enc["input_ids"].to(DEVICE)
    attn_mask = enc["attention_mask"].to(DEVICE)

    # Predict start and end logits
    with torch.no_grad():
        start_logits, end_logits = model(input_ids, attn_mask)

    # Convert to probabilities
    start_probs = torch.softmax(start_logits.squeeze(0), dim=-1)
    end_probs = torch.softmax(end_logits.squeeze(0), dim=-1)
    scores = torch.matmul(start_probs.unsqueeze(1), end_probs.unsqueeze(0))
    
    # Apply upper-triangle mask to enforce start <= end
    seq_len = scores.size(0)
    mask = torch.triu(torch.ones((seq_len, seq_len), device=DEVICE))
    scores *= mask

    # Find best start and end index
    s_idx, e_idx = divmod(scores.argmax().item(), seq_len)
    tokens = input_ids[0, s_idx:e_idx + 1]
    answer = tokenizer.decode(tokens, skip_special_tokens=True).strip()

    # If answer is empty or stopword, and it was a rephrased importance question, return fallback
    if (not answer or answer.lower() in stopwords) and keyword:
        return question, f'There is no defined “importance” for "{keyword}" in aquaponics.'

    # Enhancement for importance questions
    if keyword and answer.lower().startswith(keyword.lower()):
        answer += (
            ". This is important because it enables sustainable agriculture, "
            "reduces environmental impact, and supports both plant and fish growth naturally."
        )

    return question, answer


# 5) Load dataset
DATASET_PATH = r"C:\Users\Admin\Desktop\LLM_QA\data\processed\processed_aquaponics_dataset.json"
dataset = QADataset(DATASET_PATH, tokenizer)

# 6) Run predictions on the dataset
for i in range(len(dataset)):
    item = dataset[i]
    question, context = item["question"], item["context"]

    # Predict
    question, predicted_answer = predict_answer(question, context)

    # Display
    print(f"Context: {context[:100]}...")  # Partial context for brevity
    print(f"Q: {question}")
    print(f"Predicted Answer: {predicted_answer}")
    print("-" * 50)


Context: Fish tanks should have a pH level between 6.8 and 7.2 for optimal fish health in aquaponics systems....
Q: What pH level should fish tanks have in aquaponics?
Predicted Answer: aquaponics? fish tanks should have a ph level between 6. 8 and 7. 2 for optimal fish health in aquaponics systems.
--------------------------------------------------
Context: Aquaponics systems can be built indoors or outdoors depending on climate and available space....
Q: What is the importance of Aquaponics in aquaponics?
Predicted Answer: aquaponics systems can be built indoors or outdoors depending on climate and available space.. This is important because it enables sustainable agriculture, reduces environmental impact, and supports both plant and fish growth naturally.
--------------------------------------------------
Context: The plants in aquaponic systems act as natural water filters, removing harmful chemicals like nitrat...
Q: What is the importance of The in aquaponics?
Predicted Answer: T