In [1]:
!pip install spacy==3.7.6
!python -m spacy download en_core_web_md
!pip install "transformers==4.40.2" "accelerate==0.30.1"
!pip install -q nest_asyncio    



Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m23.4 MB/s[0m  [33m0:00:01[0mm0:00:01[0m00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')


In [2]:
import spacy
nlp = spacy.load("en_core_web_md")
print("SpaCy model loaded OK!")
import nest_asyncio
nest_asyncio.apply()


SpaCy model loaded OK!


In [5]:
%pip install -U groq


try:
    from dotenv import load_dotenv
    # load .env at repo root if present
    load_dotenv(dotenv_path=Path.cwd() / ".env")
except Exception:
    pass

assert os.getenv("GROQ_API_KEY"), "GROQ_API_KEY not found. export it or put it in .env"
print("Key prefix:", os.getenv("GROQ_API_KEY")[:8], "...OK")


Collecting groq
  Downloading groq-0.32.0-py3-none-any.whl.metadata (16 kB)
Downloading groq-0.32.0-py3-none-any.whl (135 kB)
Installing collected packages: groq
Successfully installed groq-0.32.0
Note: you may need to restart the kernel to use updated packages.
Key prefix: gsk_zeOc ...OK


In [13]:
# --- make src/ importable ---
!pip install pdfplumber
!pip install sentence_transformers
!pip install chromadb
!pip install streamlit

import sys, os
from pathlib import Path

SRC = (Path.cwd() / "src").resolve()
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

print("sys.path OK:", sys.path[0])

# --- backend imports (your 4 RL files) ---
from backend.state import encode_state
from backend.rl_agent import RLAgent
from backend.reward import feedback_reward
from backend.retrieval_policies import action_to_filter

# --- RAG + LLM (your existing code) ---
from backend.rag_process import rag_process
from backend.ghg_assistant import GHGAssistant

print("Backend modules loaded.")


Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Using cached pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.50.0-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m203.2 MB/s[0m  [33m0:00:00[0m
[?25hUsing cached pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
Installing collected packages: watchdog, pydeck, streamlit
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3/3[0m [streamlit]/3[0m [streamlit]
[1A[2KSuccessfully installed pydeck-0.9.1 streamlit-1.50.0 watchdog-6.0.0
sys.path OK: /home/sagemaker-user/RL_2025/src
Backend modules loaded.


In [14]:
import asyncio
import nest_asyncio

def run_async(coro):
    """
    Run an async coroutine safely from a notebook OR a normal script.
    - In a notebook (loop already running): reuse it via nest_asyncio.
    - In a script: create a fresh loop with asyncio.run.
    """
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        loop = None

    if loop and loop.is_running():
        nest_asyncio.apply(loop)            # patch the running loop once
        return loop.run_until_complete(coro)
    else:
        return asyncio.run(coro)


In [25]:
# RL agent persists its Q-table under src/data/q_table.json
agent = RLAgent()

# Your RAG layer + assistant (as in your repo)
rag = rag_process()
assistant = GHGAssistant()

print("Agent, RAG and Assistant are ready.")


Agent, RAG and Assistant are ready.


In [26]:
# --- notebook UI: thumbs + text feedback ------------------------------------
from pathlib import Path
from IPython.display import display, Markdown, clear_output
import ipywidgets as w
from datetime import datetime

# where to log quick feedback rows (optional but handy)
LOG_CSV = Path("src/data/rl_logs.csv")
LOG_CSV.parent.mkdir(parents=True, exist_ok=True)
if not LOG_CSV.exists():
    LOG_CSV.write_text("ts,action,tag,comment\n")

# A place to keep the latest step so feedback knows what to update
last_step = {
    "state": None,
    "action": None,
    "company_info": None,
    "response": None,
}

# Widgets
prompt_box   = w.Textarea(
    placeholder="Ask the assistant…",
    description="Prompt:",
    layout=w.Layout(width="100%", height="90px")
)
company_box  = w.Textarea(
    placeholder='Optional company info as JSON, e.g. {"name":"Acme","sector":"energy","size":"large"}',
    description="Company:",
    layout=w.Layout(width="100%", height="70px")
)

ask_btn      = w.Button(description="Ask", button_style="primary", icon="play")

thumb_up     = w.Button(description="Helpful", icon="thumbs-up", layout=w.Layout(width="140px"))
thumb_down   = w.Button(description="Not helpful", icon="thumbs-down", layout=w.Layout(width="140px"))

tag_dd       = w.Dropdown(
    options=[("choose a tag…",""), ("good / helpful","up"), ("ok / neutral","neutral"),
             ("bad / not helpful","down"), ("off-topic","down"), ("hallucination","down")],
    description="Tag:"
)

comment_box  = w.Textarea(
    placeholder="Optional written feedback (what was good / missing?)",
    description="Comment:",
    layout=w.Layout(width="100%", height="70px")
)
submit_text  = w.Button(description="Submit text feedback", icon="paper-plane")

status_lbl   = w.HTML()          # small status line
answer_out   = w.Output()        # where we print the assistant answer

ui = w.VBox([
    prompt_box, company_box, ask_btn,
    w.HBox([thumb_up, thumb_down]),
    w.HBox([tag_dd, submit_text]),
    comment_box,
    status_lbl,
    w.HTML("<hr>"),
    answer_out
])

display(ui)


VBox(children=(Textarea(value='', description='Prompt:', layout=Layout(height='90px', width='100%'), placehold…

In [21]:
import json
from backend.state import encode_state
from backend.rl_agent import RLAgent
from backend.reward import feedback_reward
from backend.retrieval_policies import action_to_filter
from backend.rag_process import rag_process
from backend.ghg_assistant import GHGAssistant

# initialize once
rag = rag_process()
assistant = GHGAssistant()
agent = RLAgent()  # persists Q table under src/data/q_table.json

def _safe_company(text: str):
    try:
        return json.loads(text) if text.strip() else None
    except Exception:
        return None

def _answer_with_rag(prompt: str, company_info: dict | None):
    """
    Runs the full flow:
      1) state = encode_state(prompt, company_info)
      2) action = agent.select(state)
      3) meta_filter = action_to_filter(action, company_name)
      4) get relevant chunks via your rag class (already inside Stream/RAG code)
      5) call your assistant to produce response
    Stores 'last_step' for feedback use.
    """
    # 1) compact state
    s = encode_state(prompt, company_info)
    # 2) choose action
    a = agent.select(s)
    # 3) map action -> metadata filter
    meta = action_to_filter(a, (company_info or {}).get("name"))
    # 4+5) produce the answer with your code
    #    You already pass metadata_filter into your RAG query; if that arg
    #    lives inside your rag layer, keep it consistent with your codebase.
    relevant_chunks, metadatas = rag.query_documents(
        question=prompt, n_results=4, metadata_filter=meta
    )
    answer = assistant.generate_response(
        user_prompt=prompt,
        relevant_chunks=relevant_chunks,
        results_metadata=metadatas
    )

    # remember for feedback
    last_step.update(dict(state=s, action=a, company_info=company_info, response=answer))
    return a, meta, answer

# display helper
def _show_answer(action: str, meta_filter: dict | None, text: str):
    with answer_out:
        clear_output()
        display(Markdown(f"**[Action]** `{action}` &nbsp;&nbsp; **Filter**: `{meta_filter}`"))
        display(Markdown("---"))
        display(Markdown("**Assistant:**"))
        print(text)

# glue to Ask button
def on_ask(_):
    status_lbl.value = ""
    prompt = prompt_box.value.strip()
    company = _safe_company(company_box.value)
    if not prompt:
        status_lbl.value = "<span style='color:#c00'>Please enter a prompt.</span>"
        return
    a, meta, ans = _answer_with_rag(prompt, company)
    _show_answer(a, meta, ans)
    status_lbl.value = "<span style='color:#2a2'>Answer ready. You can now leave feedback.</span>"

ask_btn.on_click(on_ask)


In [18]:
def _log_row(action: str, tag: str, comment: str = ""):
    LOG_CSV.open("a", encoding="utf-8").write(
        f"{datetime.utcnow().isoformat()},{action},{tag},{comment.replace(',', ' ').replace('\n',' ').strip()}\n"
    )

def _update_from_tag(tag: str, comment: str = ""):
    if not last_step["state"] or not last_step["action"]:
        status_lbl.value = "<span style='color:#c00'>Ask something first.</span>"
        return
    r = feedback_reward(tag)                      # -> +1, 0, or -1
    agent.update(last_step["state"], last_step["action"], r)
    _log_row(last_step["action"], tag, comment)
    status_lbl.value = f"<span style='color:#2a2'>Thanks — learning updated (tag: {tag}).</span>"

def on_thumb_up(_):
    _update_from_tag("up")

def on_thumb_down(_):
    _update_from_tag("down")

def on_submit_text(_):
    tag = (tag_dd.value or "neutral")  # default if user forgot to choose
    comment = comment_box.value or ""
    _update_from_tag(tag, comment)
    comment_box.value = ""

thumb_up.on_click(on_thumb_up)
thumb_down.on_click(on_thumb_down)
submit_text.on_click(on_submit_text)


In [22]:
from pathlib import Path, PurePosixPath
import json, itertools
q_path = Path("src/data/q_table.json")
print("Q exists:", q_path.exists(), "| path:", q_path)
if q_path.exists():
    q = json.loads(q_path.read_text())
    # show a few entries
    for i, (state_key, actions) in enumerate(q.items()):
        if i >= 5: break
        print(i, state_key, "->", actions)


Q exists: True | path: src/data/q_table.json
0 {"len": "short", "month": "2025-10", "sector": "energy", "size": "large", "topic": "fin"} -> {'broad': 0.7599, 'legal_only': 0.0, 'financial_only': 0.0, 'company_only': 0.0}
1 {"len": "short", "month": "2025-10", "sector": "energy", "size": "large", "topic": "ghg"} -> {'broad': 0.3, 'legal_only': 0.0, 'financial_only': 0.0, 'company_only': 0.0}
2 {"len": "short", "month": "2025-10", "sector": "finance", "size": "medium", "topic": "ghg"} -> {'broad': 0.3, 'legal_only': 0.0, 'financial_only': 0.0, 'company_only': 0.0}
