# Cell 1 — Install required packages
Installs Hugging Face client, sentence-transformers, CodeCarbon, and other required libraries.
These are needed for embeddings, API calls, and energy tracking.


In [1]:
# Cell 1
!pip install --upgrade huggingface_hub sentence-transformers codecarbon pypdf torch scikit-learn pandas


Collecting huggingface_hub
  Using cached huggingface_hub-1.1.4-py3-none-any.whl.metadata (13 kB)
Collecting pandas
  Downloading pandas-2.3.3-cp313-cp313-win_amd64.whl.metadata (19 kB)
Collecting hf-xet<2.0.0,>=1.2.0 (from huggingface_hub)
  Using cached hf_xet-1.2.0-cp37-abi3-win_amd64.whl.metadata (5.0 kB)
Collecting typer-slim (from huggingface_hub)
  Using cached typer_slim-0.20.0-py3-none-any.whl.metadata (16 kB)
Downloading pandas-2.3.3-cp313-cp313-win_amd64.whl (11.0 MB)
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
   ---------------------------------------- 0.0/11.0 MB ? eta -:--:--
    --------------------------------------- 0.3/11.0 MB ? eta -:--:--
   - -------------------------------------- 0.5/11.0 MB 1.3 MB/s eta 0:00:09
   -- ------------------------------------- 0.8/11.0 MB 1.0 MB/s eta 0:00:10
   -- ------------------------------------- 0.8/11.0 MB 1.0 MB/s eta 0:00:10
   --

# Cell 2 — Configuration and imports
Set HF API key, model ID (Gemma), data folder path, and import core libraries.
Also define file paths for answer and emission logging.


In [None]:
# Cell 2
import os
import time
import textwrap
import glob
import pandas as pd
import numpy as np
from huggingface_hub import InferenceClient
from sentence_transformers import SentenceTransformer
from codecarbon import OfflineEmissionsTracker

# === EDIT THIS: paste your HF API key here (read permission) ===
HF_API_KEY = "api_org_your_hf_api_key_here"

# Model to use
HF_MODEL_ID = "google/gemma-2-2b-it"

# Data folder must be ./data containing source.txt
DATA_PATH = os.path.join(os.getcwd(), "data")

# Country for CodeCarbon
YOUR_COUNTRY_ISO_CODE = "EGY"

# Files for logging answers & emissions
LOG_FILE = os.path.join(os.getcwd(), "answers_log.csv")
EMISSIONS_FILE = os.path.join(os.getcwd(), "emissions_log.csv")

# Quick checks
print("HF model:", HF_MODEL_ID)
print("Data folder:", DATA_PATH)
print("Answer log file:", LOG_FILE)
print("Emissions log file:", EMISSIONS_FILE)
print("API key loaded?", (HF_API_KEY[:10] + "...") if HF_API_KEY else "MISSING")


HF model: google/gemma-2-2b-it
Data folder: c:\Users\DELL\ELO2_GREEN_AI\google-gemma\data
Answer log file: c:\Users\DELL\ELO2_GREEN_AI\google-gemma\answers_log.csv
Emissions log file: c:\Users\DELL\ELO2_GREEN_AI\google-gemma\emissions_log.csv
API key loaded? hf_FiHWyoy...


# Cell 3 — Define prompts
Defines the system/user templates for the draft, critic, and refiner steps.


In [11]:
# Cell 3
DRAFT_SYSTEM_PROMPT = (
    "You are an expert assistant. Answer the user's question based *only* on the provided context. "
    "Do not make up facts beyond the context. If the answer is not present in the context, say so clearly."
)
DRAFT_USER_TEMPLATE = "Context:\n{context_str}\n\nQuestion:\n{query_str}"

CRITIC_SYSTEM_PROMPT = (
    "You are a 'Critic' AI. Evaluate the 'Draft Answer' using ONLY the Source Context. "
    "Check Faithfulness (is each claim supported by the context?) and Relevance (does it answer?). "
    "Provide short bullet points describing problems or write 'The draft is perfect.' if no problems."
)
CRITIC_USER_TEMPLATE = "Source Context:\n{context}\n\nOriginal Question:\n{question}\n\nDraft Answer:\n{draft}"

REFINER_SYSTEM_PROMPT = (
    "You are a 'Refiner' AI. Rewrite the Draft Answer to incorporate the Critic's Feedback. "
    "Do not add new factual information beyond what is present in the draft or context. Output only the improved answer."
)
REFINER_USER_TEMPLATE = "Original Draft:\n{draft}\n\nCritic's Feedback:\n{feedback}"

print("Prompts ready to use.")


Prompts ready to use.


# Cell 4 — Load & index documents
Loads all text files from ./data, chunks text into smaller segments,
computes embeddings with SentenceTransformer, and builds a simple
cosine-similarity retriever. Each chunk is stored as a Node object
for easy retrieval during the RAG loop.


In [12]:
# Cell 4

class Node:
    def __init__(self, content, doc_id, chunk_id, fname=None):
        self._content = content
        self.doc_id = doc_id
        self.chunk_id = chunk_id
        self.fname = fname

    def get_content(self):
        return self._content


def load_text_files(data_path):
    texts = []
    all_files = glob.glob(os.path.join(data_path, "*"))
    print("All files found in folder:", all_files)
    for p in sorted(all_files):
        if os.path.isdir(p):
            continue
        try:
            with open(p, "r", encoding="utf-8") as f:
                texts.append((os.path.basename(p), f.read()))
        except Exception as e:
            print(f"Skipping file {p} (could not read): {e}")
    return texts


def chunk_text(text, chunk_size=400, overlap=80):
    tokens = text.split()
    chunks = []
    i = 0
    while i < len(tokens):
        chunk = tokens[i : i + chunk_size]
        chunks.append(" ".join(chunk))
        i += chunk_size - overlap
    return chunks


# Load files
raw_docs = load_text_files(DATA_PATH)
if len(raw_docs) == 0:
    raise FileNotFoundError(
        f"No readable text files found in {DATA_PATH}. Put source.txt in ./data/"
    )

# Build chunks
doc_chunks = []
for doc_id, (fname, text) in enumerate(raw_docs):
    chunks = chunk_text(text, chunk_size=400, overlap=80)
    for cid, c in enumerate(chunks):
        doc_chunks.append((fname, c, doc_id, cid))

# Embedding model
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
texts_for_embedding = [c[1] for c in doc_chunks]
print(f"Computing embeddings for {len(texts_for_embedding)} chunks...")
embeddings = embed_model.encode(
    texts_for_embedding, convert_to_numpy=True, show_progress_bar=True
)
embeddings_norm = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)
nodes = [Node(content=c[1], doc_id=c[2], chunk_id=c[3], fname=c[0]) for c in doc_chunks]

print(f"Built {len(nodes)} chunks and embeddings. Retriever ready ")


def retrieve_top_k(query, k=3):
    q_emb = embed_model.encode(query, convert_to_numpy=True)
    q_emb = q_emb / np.linalg.norm(q_emb)
    sims = embeddings_norm @ q_emb
    topk_idx = np.argsort(-sims)[:k]
    return [nodes[i] for i in topk_idx]
print("Retriever function ready to use.")

All files found in folder: ['c:\\Users\\DELL\\ELO2_GREEN_AI\\google-gemma\\data\\source.txt']
Computing embeddings for 5 chunks...


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Built 5 chunks and embeddings. Retriever ready 
Retriever function ready to use.


# Cell 5 — Initialize Hugging Face client and define API call
Initializes the Hugging Face InferenceClient and defines a robust `call_hf_api` function.
Also runs a tiny test to verify connectivity with the model.


In [22]:
# Cell 5
# --- Initialize HF client ---
client = InferenceClient(token=HF_API_KEY)
print("Hugging Face API Client initialized ")


# --- Helper to truncate long context ---
def truncate_context(context, max_words=900):
    words = context.split()
    if len(words) <= max_words:
        return context
    return " ".join(words[-max_words:])


# --- Updated safe API wrapper for conversational models ---
def call_hf_api(system_prompt, user_prompt, max_tokens=300, retries=3, delay=10):
    """
    Sends prompts to the HF API using chat_completion and returns the text response.
    Compatible with conversational models like Gemma.
    """
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]

    for attempt in range(1, retries + 1):
        try:
            response_full = client.chat_completion(
                messages=messages,
                model=HF_MODEL_ID,
                max_tokens=max_tokens,
                temperature=0.0,
            )
            return response_full.choices[0].message.content.strip()
        except Exception as e:
            print(f"  API Error (Attempt {attempt}/{retries}): {e}")
            if attempt < retries:
                time.sleep(delay * attempt)
            else:
                return "Error: Could not get response from API."


# --- Tiny API test ---
print("\nRunning a tiny API test...")
test_prompt = "Who was the first person on the Moon?"
resp = call_hf_api("You are a helpful assistant.", test_prompt, max_tokens=60)
print("\n--- Test response ---\n", resp)


Hugging Face API Client initialized 

Running a tiny API test...

--- Test response ---
 The first person to walk on the Moon was **Neil Armstrong**, an American astronaut. He took his famous "one small step" on July 20, 1969, during the Apollo 11 mission.


In [None]:
# Cell 6
# --- Main RAG loop with refinement ---
REFINEMENT_CYCLES = 3
tracker = OfflineEmissionsTracker(country_iso_code=YOUR_COUNTRY_ISO_CODE)
tracker.start()
print("CodeCarbon tracker started ✅")

print(f"\n--- Query Engine Ready ({REFINEMENT_CYCLES} refinement cycles) ---")
print("Type 'exit' to quit.\n")

try:
    while True:
        query = input("Your Question: ").strip()
        if not query:
            continue
        if query.lower() == "exit":
            break

        tracker.start_task("RAG Query")
        start_time = time.time()

        # --- Retrieve context ---
        retrieved_nodes = retrieve_top_k(query, k=3)
        context_str_full = "\n---\n".join([n.get_content() for n in retrieved_nodes])
        context_str = truncate_context(context_str_full, max_words=900)

        # --- Draft Answer ---
        user_prompt = DRAFT_USER_TEMPLATE.format(
            context_str=context_str, query_str=query
        )
        draft_text = call_hf_api(DRAFT_SYSTEM_PROMPT, user_prompt, max_tokens=400)
        print("\n Initial Draft:\n", textwrap.fill(draft_text, width=80))
        current_draft = draft_text

        # --- Refinement Loop ---
        for i in range(REFINEMENT_CYCLES):
            print(f"\n Refinement Cycle {i + 1}/{REFINEMENT_CYCLES}")

            # Critic step
            critic_prompt = CRITIC_USER_TEMPLATE.format(
                context=context_str, question=query, draft=current_draft
            )
            feedback = call_hf_api(CRITIC_SYSTEM_PROMPT, critic_prompt, max_tokens=200)
            print("\n Critic Feedback:\n", textwrap.fill(feedback, width=80))

            if "the draft is perfect" in (feedback or "").lower():
                print(" Draft approved early!")
                break

            # Refiner step
            refiner_prompt = REFINER_USER_TEMPLATE.format(
                draft=current_draft, feedback=feedback
            )
            current_draft = call_hf_api(
                REFINER_SYSTEM_PROMPT, refiner_prompt, max_tokens=400
            )
            print("\n Refined Draft:\n", textwrap.fill(current_draft, width=80))

        # --- Final Answer ---
        final_answer = current_draft
        end_time = time.time()
        emissions = tracker.stop_task()

        print("\n FINAL ANSWER:\n", textwrap.fill(final_answer, width=80))
        print(f"\n--- Metrics ---\nTime: {end_time - start_time:.2f}s")
        try:
            print(f"Emissions (local only): {emissions.emissions * 1000:.6f} gCO2eq")
        except Exception:
            print("Emissions: (couldn't read emissions data)")

except KeyboardInterrupt:
    print("\nLoop interrupted by user.")

finally:
    total_emissions = tracker.stop()
    print("\n Total Emissions Summary:")
    print(f"{total_emissions * 1000:.6f} gCO2eq")
    print("Notebook finished.")


CodeCarbon tracker started ✅

--- Query Engine Ready (3 refinement cycles) ---
Type 'exit' to quit.


 Initial Draft:
 The program alarms (1201 and 1202) occurred because the guidance computer (LGC)
could not complete all its tasks in real-time. This was due to "executive
overflows", meaning the computer was overloaded with tasks.

 Refinement Cycle 1/3

 Critic Feedback:
 The draft answer is **Faithful** and **Relevant**.   * It accurately states the
reason for the alarms: the guidance computer (LGC) was overloaded with tasks. *
It correctly identifies the cause: "executive overflows".

 Refined Draft:
 The program alarms (1201 and 1202) occurred because the guidance computer (LGC)
experienced "executive overflows", meaning it was overloaded with tasks and
could not complete all its real-time tasks.

 Refinement Cycle 2/3

 Critic Feedback:
 The draft answer is **Faithful** and **Relevant**.   * It accurately states the
reason for the alarms: "executive overflows" meaning the guidance

  df = pd.concat([df, new_df], ignore_index=True)
