In [1]:
!pip install spacy==3.7.6
!python -m spacy download en_core_web_md
!pip install "transformers==4.40.2" "accelerate==0.30.1"
!pip install -q nest_asyncio



Collecting en-core-web-md==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.7.1/en_core_web_md-3.7.1-py3-none-any.whl (42.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 MB[0m [31m196.3 MB/s[0m  [33m0:00:00[0m0:00:01[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')


In [11]:
import spacy
nlp = spacy.load("en_core_web_md")
print("SpaCy model loaded OK!")
import nest_asyncio
nest_asyncio.apply()


SpaCy model loaded OK!


In [3]:
# --- make src/ importable ---
import sys, os
from pathlib import Path

SRC = (Path.cwd() / "src").resolve()
if str(SRC) not in sys.path:
    sys.path.insert(0, str(SRC))

print("sys.path OK:", sys.path[0])

# --- backend imports (your 4 RL files) ---
from backend.state import encode_state
from backend.rl_agent import RLAgent
from backend.reward import feedback_reward
from backend.retrieval_policies import action_to_filter

# --- RAG + LLM (your existing code) ---
from backend.rag_process import rag_process
from backend.ghg_assistant import GHGAssistant

print("Backend modules loaded.")


sys.path OK: /home/sagemaker-user/RL_2025/src


2025-10-01 06:16:13.487047: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
[nltk_data] Downloading package punkt to /home/sagemaker-
[nltk_data]     user/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Backend modules loaded.


In [4]:
# If your assistant requires GROQ_API_KEY in env:
try:
    from dotenv import load_dotenv
    # load .env at repo root if present
    load_dotenv(dotenv_path=Path.cwd() / ".env")
except Exception:
    pass

assert os.getenv("GROQ_API_KEY"), "GROQ_API_KEY not found. export it or put it in .env"
print("Key prefix:", os.getenv("GROQ_API_KEY")[:8], "...OK")


Key prefix: gsk_zeOc ...OK


In [None]:
import asyncio
import nest_asyncio

def run_async(coro):
    """
    Run an async coroutine safely from a notebook OR a normal script.
    - In a notebook (loop already running): reuse it via nest_asyncio.
    - In a script: create a fresh loop with asyncio.run.
    """
    try:
        loop = asyncio.get_running_loop()
    except RuntimeError:
        loop = None

    if loop and loop.is_running():
        nest_asyncio.apply(loop)            # patch the running loop once
        return loop.run_until_complete(coro)
    else:
        return asyncio.run(coro)


In [5]:
# RL agent persists its Q-table under src/data/q_table.json
agent = RLAgent()

# Your RAG layer + assistant (as in your repo)
rag = rag_process()
assistant = GHGAssistant()

print("Agent, RAG and Assistant are ready.")


Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


Agent, RAG and Assistant are ready.


In [6]:
from typing import List, Dict, Any, Tuple

def retrieve_chunks(question: str, action: str, company_name: str | None) -> Tuple[List[str], List[Dict[str, Any]]]:
    """Converts the chosen action into a metadata filter and queries your RAG."""
    meta = action_to_filter(action, company_name)
    try:
        # If you already extended `query_documents` to accept metadata_filter
        chunks, metas = rag.query_documents(question=question, n_results=4, metadata_filter=meta)
    except TypeError:
        # Backward-compatible path if your method doesn't take metadata_filter yet
        chunks, metas = rag.query_documents(question=question, n_results=4)
    return chunks, metas

def format_context(chunks: List[str], metas: List[Dict[str, Any]]) -> str:
    """Optional: pretty join chunks + sources."""
    formatted = []
    for ch, md in zip(chunks, metas):
        src = md.get("source", "Unknown")
        pg  = md.get("page") or md.get("chunk_number")
        tag = f"{src}" + (f" (p.{pg})" if pg else "")
        formatted.append(f"Source: {tag}\n{ch}")
    return "\n\n---\n\n".join(formatted)


In [7]:
# We'll keep the last (state, action) so you can send 👍/👎 afterwards
last_step = {
    "state": None,
    "action": None,
    "company": None,
    "response": None
}


In [8]:
def ask(prompt: str, company_info: dict | None = None) -> str:
    """
    1) Build RL state from prompt + company_info
    2) Choose action via RL
    3) Retrieve chunks respecting the action (filter)
    4) Ask the assistant
    5) Cache state/action so you can call thumb_up()/thumb_down() afterwards
    """
    # 1) state
    s = encode_state(prompt, company_info or {})

    # 2) RL action
    a = agent.select(s)

    # 3) retrieval
    company_name = (company_info or {}).get("name")
    chunks, metas = retrieve_chunks(prompt, a, company_name)
    context = format_context(chunks, metas)

    # 4) assistant
    # Your GHGAssistant uses: await generate_response(user_prompt, context)
    # This wrapper calls it synchronously.
    import asyncio
    async def _call():
        return await assistant.generate_response(user_prompt=prompt, context=context)
    try:
        answer = asyncio.run(_call())
    except RuntimeError:
        # If there's already an event loop (sometimes happens in notebooks)
        answer = asyncio.get_event_loop().run_until_complete(_call())

    # 5) remember for feedback
    last_step.update(state=s, action=a, company=company_info, response=answer)

    print(f"[Action] {a}")
    return answer


In [9]:
from datetime import datetime
import csv

LOG_PATH = (SRC / "data" / "rl_logs.csv")
LOG_PATH.parent.mkdir(parents=True, exist_ok=True)

def _log_feedback(state, action, tag):
    LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
    new_file = not LOG_PATH.exists()
    with LOG_PATH.open("a", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        if new_file:
            w.writerow(["ts", "action", "tag"])
        w.writerow([datetime.utcnow().isoformat(), action, tag])

def thumb_up():
    assert last_step["state"] is not None, "Call ask(...) first."
    r = feedback_reward("up")
    agent.update(last_step["state"], last_step["action"], r)
    _log_feedback(last_step["state"], last_step["action"], "up")
    print("👍 thanks — learning updated.")

def thumb_down():
    assert last_step["state"] is not None, "Call ask(...) first."
    r = feedback_reward("down")
    agent.update(last_step["state"], last_step["action"], r)
    _log_feedback(last_step["state"], last_step["action"], "down")
    print("👎 got it — learning updated.")


In [20]:
company = {"name": "Acme", "sector": "energy", "size": "large"}

answer = ask("What are the financial risks of carbon pricing for us?", company)
print("\nAssistant:\n", answer)


[Action] broad

Assistant:
 As an Australian company, you are subject to the Australian government's climate change policies, including the Treasury Act 2024 and the National Greenhouse and Energy Reporting (NGER) Scheme. The financial risks of carbon pricing for your company may include:

1. **Direct costs**: Under the Safeguard Mechanism, if your company's emissions exceed the baseline threshold, you may be required to purchase Australian Carbon Credit Units (ACCUs) or pay a fixed price for excess emissions.
2. **Indirect costs**: Carbon pricing can lead to increased costs for electricity, fuel, and other goods and services, which can impact your company's profitability.
3. **Supply chain risks**: If your suppliers are not adequately prepared for carbon pricing, they may pass on increased costs to your company, or experience disruptions that impact your operations.
4. **Compliance costs**: Your company may incur costs associated with measuring, reporting, and verifying greenhouse gas

In [15]:
thumb_up()    # or: thumb_down()


👍 thanks — learning updated.


  w.writerow([datetime.utcnow().isoformat(), action, tag])


In [16]:
answer = ask("How can we lower Scope 3 emissions next year?", company)
print("\nAssistant:\n", answer)


[Action] broad

Assistant:
 To lower Scope 3 emissions next year, I recommend the following steps, aligned with the Australian Sustainability Reporting Standards (ASRS) and the National Greenhouse and Energy Reporting (NGER) Scheme:

1. **Conduct a Scope 3 emissions assessment**: Identify and quantify your company's Scope 3 emissions sources, such as:
	* Purchased goods and services
	* Fuel- and energy-related activities (e.g., transportation, distribution)
	* Waste generated in operations
	* Business travel
	* Employee commuting
2. **Set science-based targets**: Establish reduction targets for Scope 3 emissions, considering your company's specific circumstances and industry benchmarks.
3. **Engage with suppliers**: Collaborate with suppliers to reduce their emissions and encourage the adoption of sustainable practices, such as:
	* Requesting environmental performance data
	* Incorporating sustainability criteria into procurement processes
	* Providing incentives for suppliers to reduc