In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd /content/drive/MyDrive/assignment2-rag

/content/drive/MyDrive/assignment2-rag


In [3]:
import os, json, pandas as pd
from pathlib import Path
os.makedirs("results", exist_ok=True)

In [4]:
import matplotlib.pyplot as plt

In [5]:
%pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


### Run enhanced rag

In [5]:
# Naive
!python -m src.naive_rag --run --top_k 10 --prompt_style instruction

# Enhanced (query rewrite + rerank)
!python -m src.enhanced_rag --run --top_k 10 --prompt_style instruction

2025-09-22 18:17:31.282692: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758565051.302558    5097 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758565051.308608    5097 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758565051.323509    5097 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758565051.323533    5097 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758565051.323537    5097 computation_placer.cc:177] computation placer alr

### RAGAs evaluation

In [6]:
!pip uninstall -y pydantic pydantic-core

# Pin a compatible set (works on Colab Py3.12)
!pip install -U \
  "pydantic>=2.7.4,<3" "pydantic-core>=2.18,<3" \
  "langchain-core>=0.2.40,<0.3" "langchain-community>=0.2.6,<0.3" \
  "ragas==0.1.6" "datasets>=2.19,<3" \
  "transformers>=4.41,<4.44" accelerate "sentence-transformers>=2.6,<3" \
  einops tiktoken

Found existing installation: pydantic 2.11.9
Uninstalling pydantic-2.11.9:
  Successfully uninstalled pydantic-2.11.9
Found existing installation: pydantic_core 2.33.2
Uninstalling pydantic_core-2.33.2:
  Successfully uninstalled pydantic_core-2.33.2
Collecting pydantic<3,>=2.7.4
  Downloading pydantic-2.11.9-py3-none-any.whl.metadata (68 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.4/68.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydantic-core<3,>=2.18
  Downloading pydantic_core-2.39.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting langchain-core<0.3,>=0.2.40
  Downloading langchain_core-0.2.43-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-community<0.3,>=0.2.6
  Downloading langchain_community-0.2.19-py3-none-any.whl.metadata (2.7 kB)
Collecting ragas==0.1.6
  Downloading ragas-0.1.6-py3-none-any.whl.metadata (5.2 kB)
Collecting datasets<3,>=2.19
  Downloading datasets-2.21.0-py3-none-a

In [8]:
!python src/run_ragas_local.py \
  --naive results/predictions_naive.jsonl \
  --llm microsoft/Phi-3.5-mini-instruct \
  --embed BAAI/bge-small-en-v1.5 \
  --judge_tokens 128 \
  --mode naive

2025-09-25 15:12:22.374374: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758813142.394265   13426 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758813142.400353   13426 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758813142.415665   13426 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758813142.415692   13426 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758813142.415696   13426 computation_placer.cc:177] computation placer alr

In [15]:
!python src/run_ragas_local.py \
  --enhanced results/predictions_enhanced.jsonl \
  --llm microsoft/Phi-3.5-mini-instruct \
  --embed BAAI/bge-small-en-v1.5 \
  --judge_tokens 128 \
  --mode enhanced

2025-09-25 21:44:52.045104: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1758836692.065387  112612 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1758836692.071686  112612 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1758836692.087478  112612 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758836692.087505  112612 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1758836692.087510  112612 computation_placer.cc:177] computation placer alr

In [16]:
import json, csv, pathlib

resdir = pathlib.Path("results")
with open(resdir/"ragas_naive_scores.json", encoding="utf-8") as f:
    n = json.load(f)
with open(resdir/"ragas_enhanced_scores.json", encoding="utf-8") as f:
    e = json.load(f)

metrics = ["faithfulness", "answer_relevancy", "context_precision", "context_recall"]
rows = []
for m in metrics:
    nval = float(n.get(m, "nan")) if isinstance(n.get(m, ()), (int, float)) else float("nan")
    eval_ = float(e.get(m, "nan")) if isinstance(e.get(m, ()), (int, float)) else float("nan")
    d = (eval_ - nval) if (isinstance(nval, float) and isinstance(eval_, float)) else float("nan")
    rows.append({
        "metric": m,
        "naive": f"{nval:.4f}" if nval == nval else "nan",
        "enhanced": f"{eval_:.4f}" if eval_ == eval_ else "nan",
        "delta (enhanced - naive)": f"{d:+.4f}" if d == d else "nan",
    })

out_csv = resdir / "ragas_comparison.csv"
with open(out_csv, "w", newline="", encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=list(rows[0].keys()))
    w.writeheader(); w.writerows(rows)

print("Wrote", out_csv)

Wrote results/ragas_comparison.csv


In [8]:
!zip -r assignment2-rag.zip /content/assignment2-rag

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
  adding: content/assignment2-rag/.venv/lib/python3.9/site-packages/transformers/models/deprecated/nat/configuration_nat.py (deflated 66%)
  adding: content/assignment2-rag/.venv/lib/python3.9/site-packages/transformers/models/deprecated/nat/__init__.py (deflated 55%)
  adding: content/assignment2-rag/.venv/lib/python3.9/site-packages/transformers/models/deprecated/nat/modeling_nat.py (deflated 79%)
  adding: content/assignment2-rag/.venv/lib/python3.9/site-packages/transformers/models/deprecated/qdqbert/ (stored 0%)
  adding: content/assignment2-rag/.venv/lib/python3.9/site-packages/transformers/models/deprecated/qdqbert/__init__.py (deflated 61%)
  adding: content/assignment2-rag/.venv/lib/python3.9/site-packages/transformers/models/deprecated/qdqbert/configuration_qdqbert.py (deflated 66%)
  adding: content/assignment2-rag/.venv/lib/python3.9/site-packages/transformers/models/deprecated/qdqbert/modeling_qdqbert.py (deflated 82%)
  adding: con

In [9]:
from google.colab import files
files.download("assignment2-rag.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>