<a href="https://colab.research.google.com/github/DilkiSandunika/VGTU_Thesis_Project/blob/main/notebooks/03_end_to_end_pipeline_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ===================================================================
# CELL 1: Install All Necessary Libraries
# ===================================================================
print("Installing required libraries for the full RAG pipeline with Gemma...")
# We need transformers and accelerate for Hugging Face models, and bitsandbytes for quantization
!pip install pandas faiss-cpu sentence-transformers torch transformers accelerate bitsandbytes -q
print("Libraries installed successfully.")


# ===================================================================
# CELL 2: Import Libraries and Log in to Hugging Face
# ===================================================================
import os
import numpy as np
import pandas as pd
import faiss
from sentence_transformers import SentenceTransformer
import pickle
from google.colab import userdata
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# Securely load the Hugging Face token from Colab secrets
try:
    HF_TOKEN = userdata.get('HF_TOKEN')
    print("Hugging Face token loaded successfully.")
except Exception as e:
    print("ERROR: Could not load Hugging Face token. Please add it to Colab's secrets (key icon on the left) with the name HF_TOKEN.")

# Log in to Hugging Face Hub
from huggingface_hub import login
login(token=HF_TOKEN)
print("Successfully logged in to Hugging Face.")


# ===================================================================
# CELL 3: Load All Pre-processed Data and Models
# ===================================================================
print("\nLoading all necessary components...")

# --- 1. Load the Parsed Requirements ---
df_requirements = pd.read_csv('/content/parsed_requirements.csv')
print(f"Loaded {len(df_requirements)} requirements from the CSV file.")

# --- 2. Load the Knowledge Base ---
index = faiss.read_index('/content/knowledge_base.index')
with open('/content/knowledge_base_docs.pkl', 'rb') as f:
    knowledge_base_docs = pickle.load(f)
print("Loaded FAISS index and knowledge base documents.")

# --- 3. Load the Sentence Transformer Model ---
retrieval_model = SentenceTransformer('all-MiniLM-L6-v2')
print("Sentence Transformer model loaded.")

# --- 4. Load the Gemma Model for Generation ---
print("\nLoading Google Gemma model... This will take a few minutes and use significant RAM.")
model_id = "google/gemma-2b-it"

quantization_config = BitsAndBytesConfig(load_in_4bit=True)

tokenizer = AutoTokenizer.from_pretrained(model_id)
gemma_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config,
    device_map="auto"
)
print("Google Gemma model loaded successfully!")
print("\n--- Setup is complete and all components are ready! ---")


# ===================================================================
# CELL 4: The RAG Core and Validation Functions
# ===================================================================

def retrieve_relevant_knowledge(query_text, top_k=3):
    """
    Searches the FAISS index for the most relevant knowledge base documents for a given query.
    """
    query_vector = retrieval_model.encode([query_text])
    distances, indices = index.search(query_vector.astype('float32'), top_k)
    retrieved_docs = [knowledge_base_docs[i] for i in indices[0]]
    return retrieved_docs

def generate_compliant_requirement_with_gemma(original_requirement, retrieved_docs):
    """
    Builds a prompt and calls the Gemma model to generate a refined requirement.
    """
    retrieved_knowledge = "\n- ".join(retrieved_docs)

    # --- THIS IS THE UPGRADED PROMPT ---
    chat = [
        { "role": "user", "content": f"""
You are an expert Software Requirements Analyst. Your task is to analyze an original requirement and rewrite it to be compliant with a set of standard formats.

**Compliance Rules to Follow:**
- {retrieved_knowledge}

**Original Requirement to Analyze:**
"{original_requirement}"

**Your Task:**
1.  First, analyze the intent of the original requirement.
2.  Next, select the MOST APPROPRIATE format from the list of five standard formats provided in the compliance rules.
3.  Finally, rewrite the original requirement to perfectly match your chosen format.

Your entire response MUST be only the single, rewritten requirement sentence. Do not add any other text, preambles, or explanations about which format you chose.
"""
        }
    ]
    # --- END OF UPGRADED PROMPT ---

    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to("cuda")

    outputs = gemma_model.generate(input_ids=inputs, max_new_tokens=150)

    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response_text[len(prompt)-7:]

# def generate_compliant_requirement_with_gemma(original_requirement, retrieved_docs):
#     """
#     Builds a prompt and calls the Gemma model to generate a refined requirement.
#     """
#     retrieved_knowledge = "\n- ".join(retrieved_docs)

#     chat = [
#         { "role": "user", "content": f"""
# You are an expert Software Requirements Analyst. Your task is to refine a given software requirement to ensure it is compliant with a set of rules and well-formed according to a template.

# **Compliance Rules and Template Guide to Follow:**
# - {retrieved_knowledge}

# **Original Requirement to Refine:**
# "{original_requirement}"

# **Your Task:**
# Rewrite the original requirement to be fully compliant with the rules provided above.
# - Ensure the output strictly follows the format: "The system shall [action description] for the [user role]."
# - Your response MUST begin with "The system shall" and be a single sentence. Do not add any other text, preambles, or explanations. Your entire response must be only the single, rewritten requirement.
# """
#         }
#     ]

#     prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
#     inputs = tokenizer.encode(prompt, add_special_tokens=False, return_tensors="pt").to("cuda")

#     outputs = gemma_model.generate(input_ids=inputs, max_new_tokens=150)

#     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
#     return response_text[len(prompt)-7:]

def calculate_compliance_score(generated_text, retrieved_rules):
    """
    Calculates a compliance score based on a set of rules and multiple possible formats.
    """
    feedback = []

    # Define the valid requirement patterns
    patterns = [
        "The <system name> shall <system response>",
        "WHILE <in a specific state> the <system name> shall <system response>",
        "WHEN <trigger> <optional precondition> the <system name> shall <system response>",
        "WHERE <feature is included> the <system name> shall <system response>",
        "IF <unwanted condition or event>, THEN the <system name> shall <system response>"
    ]

    # --- Rule Check 1: Check for adherence to one of the standard formats ---
    # We create simplified keywords to check for each pattern
    pattern_keywords = [
        "The ", "WHILE ", "WHEN ", "WHERE ", "IF "
    ]

    found_pattern = False
    for keyword in pattern_keywords:
        if generated_text.startswith(keyword):
            found_pattern = True
            break

    if found_pattern:
        feedback.append("PASS: Requirement follows one of the standard formats.")
    else:
        feedback.append("FAIL: Requirement does not begin with a standard pattern keyword (The, WHILE, WHEN, WHERE, IF).")

    # --- Rule Check 2: Check for active voice ---
    if any("active voice" in rule for rule in retrieved_rules):
        if "should be" not in generated_text:
            feedback.append("PASS: Requirement is written in an active voice.")
        else:
            feedback.append("FAIL: Requirement may be in a passive voice (contains 'should be').")

    # --- Rule Check 3: Check for the core "shall" statement ---
    if "shall" in generated_text:
        feedback.append("PASS: Requirement contains the mandatory keyword 'shall'.")
    else:
        feedback.append("FAIL: Requirement is missing the mandatory keyword 'shall'.")

    # Calculate the final score
    pass_count = sum(1 for note in feedback if note.startswith("PASS"))
    total_checks = len(feedback)

    if total_checks == 0:
        return "N/A", ["No applicable rules found to score against."]

    return f"{pass_count}/{total_checks}", feedback

# def calculate_compliance_score(generated_text, retrieved_rules):
#     """
#     Calculates a simple compliance score based on the retrieved rules.
#     This function directly implements the "Compliance Score" metric from the thesis.
#     """
#     score = 0
#     total_possible_score = 0
#     feedback = []

#     # Rule Check 1: Check for the standard format
#     template_guide = next((rule for rule in retrieved_rules if "format:" in rule), None)
#     if template_guide:
#         total_possible_score += 1
#         if "The system shall" in generated_text and "for the" in generated_text:
#             score += 1
#             feedback.append("PASS: Requirement follows the standard 'The system shall...' format.")
#         else:
#             feedback.append("FAIL: Requirement does not follow the standard format.")

#     # Rule Check 2: Check for active voice
#     if any("active voice" in rule for rule in retrieved_rules):
#         total_possible_score += 1
#         if "should be" not in generated_text:
#             score += 1
#             feedback.append("PASS: Requirement is written in an active voice.")
#         else:
#             feedback.append("FAIL: Requirement may be in a passive voice (contains 'should be').")

#     # Rule Check 3: Check for specified user role
#     if any("user role" in rule for rule in retrieved_rules):
#         total_possible_score += 1
#         if "for the user" not in generated_text.lower() and "for the" in generated_text.lower():
#             score += 1
#             feedback.append("PASS: Requirement appears to specify a user role.")
#         else:
#             feedback.append("FAIL: Requirement uses a generic role ('user') or is missing one.")

#     if total_possible_score == 0:
#         return "N/A", ["No applicable rules found to score against."]

#     return f"{score}/{total_possible_score}", feedback
# ===================================================================
# CELL 5: Run on Multiple Requirements and Create (X, y) Examples
# ===================================================================

# --- Configuration ---
num_samples_to_process = 10
results_list = [] # List to store our (X, y) pairs

print("=====================================================================")
print(f"  RUNNING RAG PIPELINE to generate {num_samples_to_process} (X, y) examples  ")
print("=====================================================================")

for idx, row in df_requirements.head(num_samples_to_process).iterrows():

    original_req_text = row['text']
    print(f"\n\nProcessing Requirement #{idx + 1}...")

    # --- RAG Pipeline Steps ---
    relevant_rules = retrieve_relevant_knowledge(original_req_text)
    refined_requirement = generate_compliant_requirement_with_gemma(original_req_text, relevant_rules)
    compliance_score, feedback_notes = calculate_compliance_score(refined_requirement, relevant_rules)

    # --- Store the results ---
    results_list.append({
        'X_Input_Requirement': original_req_text,
        'y_Output_Requirement': refined_requirement,
        'Compliance_Score': compliance_score,
        'Retrieved_Context_Rules': " | ".join(relevant_rules) # Join rules into a single string
    })

    # --- Print the output for viewing ---
    print("---------------------------------------------------------------------")
    print(f"[INPUT - X] Original:\n'{original_req_text}'")
    print(f"\n[OUTPUT - y] Refined:\n'{refined_requirement}'")
    print(f"\n[VALIDATION] Compliance Score: {compliance_score}")
    print("=====================================================================")

# --- Create a final DataFrame with the results ---
df_results = pd.DataFrame(results_list)

# --- Save the results to a new CSV file in the processed data folder ---
output_examples_path = '/content/X_y_examples.csv'
df_results.to_csv(output_examples_path, index=False)

print(f"\nSuccessfully generated and saved {len(df_results)} (X, y) examples to '{output_examples_path}'")

# Trigger a download of this new file
from google.colab import files
files.download(output_examples_path)
# ===================================================================

# --- Configuration ---
num_samples_to_process = 5

print("=====================================================================")
print(f"       RUNNING RAG PIPELINE DEMO on the First {num_samples_to_process} Requirements      ")
print("=====================================================================")


# # ===================================================================
# # CELL 5: Run the Full End-to-End Demo with Validation
# # --- Loop through the first N samples of the DataFrame ---
# for idx, row in df_requirements.head(num_samples_to_process).iterrows():

#     original_req_text = row['text']

#     print(f"\n\nProcessing Requirement #{idx + 1}...")
#     print("---------------------------------------------------------------------")

#     # --- Step 1: Input ---
#     print(f"[INPUT] Original Requirement:\n'{original_req_text}'")

#     # --- Step 2: Retrieval ---
#     print("\n[STEP 1 - RETRIEVAL] Finding the most relevant rules from the knowledge base...")
#     relevant_rules = retrieve_relevant_knowledge(original_req_text)
#     print("  - Found the following rules:")
#     for rule in relevant_rules:
#         print(f"    - {rule}")

#     # --- Step 3: Generation ---
#     print("\n[STEP 2 - GENERATION] Sending the original requirement and retrieved rules to Gemma for refinement...")
#     refined_requirement = generate_compliant_requirement_with_gemma(original_req_text, relevant_rules)

#     # --- Step 4: Validation ---
#     print("\n[STEP 3 - VALIDATION] Calculating the automated compliance score...")
#     compliance_score, feedback_notes = calculate_compliance_score(refined_requirement, relevant_rules)
#     print(f"  - Compliance Score: {compliance_score}")
#     for note in feedback_notes:
#         print(f"    - {note}")

#     # --- Step 5: Final Output ---
#     print("\n---------------------------------------------------------------------")
#     print(f"[OUTPUT] Final, Compliant Requirement:\n'{refined_requirement}'")
#     print("=====================================================================")

Installing required libraries for the full RAG pipeline with Gemma...
Libraries installed successfully.
Hugging Face token loaded successfully.
Successfully logged in to Hugging Face.

Loading all necessary components...
Loaded 115 requirements from the CSV file.
Loaded FAISS index and knowledge base documents.
Sentence Transformer model loaded.

Loading Google Gemma model... This will take a few minutes and use significant RAM.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Google Gemma model loaded successfully!

--- Setup is complete and all components are ready! ---
  RUNNING RAG PIPELINE to generate 10 (X, y) examples  


Processing Requirement #1...
---------------------------------------------------------------------
[INPUT - X] Original:
'The solution should provide detailed context-sensitive help material for all the possible actions and scenarios on all user interfaces in the application.'

[OUTPUT - y] Refined:
'entence:

"The application shall provide context-sensitive help materials for each user interface action and scenario, enabling users to understand and perform actions effectively."'

[VALIDATION] Compliance Score: 1/2


Processing Requirement #2...
---------------------------------------------------------------------
[INPUT - X] Original:
'The solution should provide detailed context-sensitive help material for all the possible actions and scenarios on all user interfaces in the application.'

[OUTPUT - y] Refined:
'entence:

"The appli

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

       RUNNING RAG PIPELINE DEMO on the First 5 Requirements      
