In [None]:
!pip install huggingface_hub
!pip install llama-index-core
!pip install llama-index-embeddings-huggingface
!pip install sentence-transformers
!pip install pypdf
!pip install codecarbon
!pip install tf-keras

In [None]:
%pip install llama-index-llms-llama-cpp llama-index-embeddings-huggingface llama-index-core

In [None]:
import textwrap
from huggingface_hub import InferenceClient
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from codecarbon import OfflineEmissionsTracker
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# --- 1. Configuration ---
HF_API_KEY = (
    "hf_abcdefghijklmnopqrstuvwxyz"  # Replace with your actual Hugging Face API key
)
CRITIC_MODEL_ID = "Qwen/Qwen2.5-7B-Instruct"

# Local Config (Refiner)
LAMINI_MODEL_PATH = r"C:\Users\user\.cache\huggingface\hub\models--MBZUAI--LaMini-Flan-T5-248M\snapshots\4e871ba5f20216feaa3b845fc782229cd64eba47"
DATA_PATH = (
    r"C:\Users\user\ELO2_GREEN_AI\2_open_source_models\quantized_models\mistral7b\data"
)
YOUR_COUNTRY_ISO_CODE = "EGY"

# --- 2. Define Robust Prompts ---

# Critic: Forced to start with a status tag
CRITIC_SYSTEM_PROMPT = """You are a strict Editor.
Compare the 'Draft' to the 'Source Context'.

Output format:
- Start with "[OK]" if the draft is accurate and needs no changes.
- Start with "[REVISE]" if there are errors or missing key facts.
- Then provide a bulleted list of feedback.

Rules:
1. If the Draft contradicts the Context, mark it [REVISE].
2. If the Draft is missing a CRITICAL fact, mark it [REVISE].
3. Do NOT nitpick small details."""

CRITIC_USER_TEMPLATE = """--- Source Context ---
{context}
--- User Question ---
{query}
--- Draft Answer ---
{draft}

Critique:"""

REFINER_PROMPT_TEMPLATE = """You are a professional Writer.
Rewrite the 'Draft Answer' to incorporate the 'Editor's Feedback'.

Rules:
- Only fix what the Editor asked for.
- Do NOT cut off the answer; write the complete response.
- Do not add external info.

--- Draft Answer ---
{draft}

--- Editor's Feedback ---
{feedback}

--- Rewritten Answer ---
"""

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
if not HF_API_KEY:
    raise ValueError("HF_TOKEN not set.")

# --- 1. Initialize API Client (Critic) ---
client = InferenceClient(token=HF_API_KEY)
print("API Client (Critic) initialized.")

# --- 2. Initialize Local Mistral (Refiner) ---


print("Loading LaMini-Flan-T5 (Refiner)...")
lamini_tokenizer = AutoTokenizer.from_pretrained(LAMINI_MODEL_PATH)
lamini_model = AutoModelForSeq2SeqLM.from_pretrained(
    LAMINI_MODEL_PATH, torch_dtype=torch.float16, device_map="auto"
)


class LaMiniWrapper:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def complete(self, prompt):
        inputs = self.tokenizer(
            prompt, return_tensors="pt", max_length=512, truncation=True
        ).to(self.model.device)

        outputs = self.model.generate(
            **inputs,
            max_new_tokens=1024,
            temperature=0.1,
            do_sample=True,
            top_p=0.95,
            repetition_penalty=1.1,
            no_repeat_ngram_size=3,
        )

        text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

        class Response:
            def __init__(self, text):
                self.text = text

        return Response(text)


llm_local = LaMiniWrapper(lamini_model, lamini_tokenizer)
print("Local Mistral 7B (Refiner) loaded.")

# --- 3. Initialize Local Retriever ---
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
documents = SimpleDirectoryReader(DATA_PATH).load_data()
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model)
retriever = index.as_retriever(similarity_top_k=3)
print("Retriever ready.")

API Client (Critic) initialized.
Loading LaMini-Flan-T5 (Refiner)...


`torch_dtype` is deprecated! Use `dtype` instead!


Local Mistral 7B (Refiner) loaded.
Retriever ready.


In [5]:
# --- Helper to call API (Increased Tokens) ---
def call_critic_api(system_prompt, user_prompt):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]
    try:
        response = client.chat_completion(
            messages=messages,
            model=CRITIC_MODEL_ID,
            max_tokens=2048,  # INCREASED from 512 to prevent cutting
            temperature=0.1,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"API Error: {e}"


# --- Start Hybrid Loop ---
REFINEMENT_CYCLES = 2
tracker = OfflineEmissionsTracker(country_iso_code=YOUR_COUNTRY_ISO_CODE)
tracker.start()

print("\n--- Hybrid Studio (Robust Version) ---")
print("Type 'exit' to quit.")

try:
    while True:
        # 1. Inputs
        query = input("\n(1/3) Enter User Query: ")
        if query.lower() in ["exit", "quit"]:
            break

        # Retrieve Context locally
        retrieved_nodes = retriever.retrieve(query)
        context_str = "\n---\n".join([node.get_content() for node in retrieved_nodes])

        draft_text = input("(2/3) Paste Draft Text: ")

        tracker.start_task("Hybrid Refinement")
        current_draft = draft_text

        for i in range(REFINEMENT_CYCLES):
            print(f"\n--- Cycle {i + 1} ---")

            # --- A. CRITIC STEP (API) ---
            print("1. Cloud API (Critic) is evaluating...")
            critic_input = CRITIC_USER_TEMPLATE.format(
                context=context_str, query=query, draft=current_draft
            )
            critique = call_critic_api(CRITIC_SYSTEM_PROMPT, critic_input)

            print(f"\n[Editor's Feedback]:\n{critique}\n")

            # --- NEW ROBUST CHECK ---
            # Only stop if it explicitly starts with [OK]
            if critique.startswith("[OK]"):
                print(">> Critic is satisfied. Stopping early.")
                break
            elif "[OK]" in critique[:20]:  # Fallback if it has a small prefix
                print(">> Critic is satisfied. Stopping early.")
                break

            # --- B. REFINER STEP (Local) ---
            print("2. Local GPU (Refiner) is rewriting...")
            refiner_input = REFINER_PROMPT_TEMPLATE.format(
                draft=current_draft, feedback=critique
            )

            # Local generation with sufficient length
            refined_response = llm_local.complete(refiner_input)
            current_draft = refined_response.text

            print("\n[Refined Draft]:")
            print(textwrap.fill(current_draft, width=80))

        tracker.stop_task()
        print("\n" + "=" * 50)
        print("FINAL RESULT:")
        print(textwrap.fill(current_draft, width=80))
        print("=" * 50)

finally:
    tracker.stop()


--- Hybrid Studio (Robust Version) ---
Type 'exit' to quit.

--- Cycle 1 ---
1. Cloud API (Critic) is evaluating...

[Editor's Feedback]:
[REVISE]

- The draft does not accurately describe the computer alarms that appeared during the landing. The draft should include the specific computer alarms (1201 and 1202) and the crew's reaction to them.
- The draft does not mention the specific actions taken by the crew in response to the computer alarms, such as Armstrong taking semi-automatic control.
- The draft does not include the details about Mission Control's role in addressing the alarms and the reassurance given to the crew.
- The draft does not mention the post-mission analysis showing the real fuel remaining was probably closer to 50 seconds, which is critical information.

Feedback:
- The draft should include the specific computer alarms (1201 and 1202) and the crew's reaction to them.
- Include the details about Mission Control's role in addressing the alarms and the reassurance g

  df = pd.concat([df, new_df], ignore_index=True)
