# SummEval TrustScore Inference

Run TrustScore on SummEval using vLLM


In [None]:
!pip install transformers datasets vllm tqdm pyyaml


In [None]:
from google.colab import drive
drive.mount('/content/drive')


## Load and Preprocess SummEval Data


In [None]:
import sys
sys.path.append('..')

from scripts.load_summeval import load_summeval_with_sources
import os

# Load SummEval
jsonl_path = "datasets/raw/summeval/model_annotations.aligned.jsonl"
data = load_summeval_with_sources(jsonl_path, max_samples=100)
print(f"Loaded {len(data)} samples")


In [None]:
from scripts.preprocess_summeval import preprocess_to_trustscore_format

# Preprocess to TrustScore format
output_path = preprocess_to_trustscore_format(max_samples=100)
print(f"Preprocessed data saved to: {output_path}")


## Run TrustScore Inference


In [None]:
from scripts.run_summeval_inference import run_summeval_inference

# Run inference and save to Google Drive
run_summeval_inference(
    input_file="datasets/processed/summeval_trustscore_format.jsonl",
    output_file="results/summeval_trustscore_100samples.jsonl",
    max_samples=100,
    batch_size=10,
    use_vllm=True,
    save_to_drive=True,  # Save results to Google Drive
)


## Analyze Results


In [None]:
import json
import numpy as np

# Load results from Drive or local
results_path = "/content/drive/MyDrive/TrustScore/summeval_trustscore_100samples.jsonl"

# Try Drive first, fallback to local
try:
    results = []
    with open(results_path, 'r') as f:
        for line in f:
            results.append(json.loads(line))
    print(f"[Info] Loaded {len(results)} results from Google Drive")
except FileNotFoundError:
    # Fallback to local
    results = []
    with open("results/summeval_trustscore_100samples.jsonl", 'r') as f:
        for line in f:
            results.append(json.loads(line))
    print(f"[Info] Loaded {len(results)} results from local storage")

# Extract trust scores
trust_scores = []
for r in results:
    if 'trustscore_output' in r:
        try:
            ts = r['trustscore_output']['summary']['trust_score']
            trust_scores.append(ts)
        except:
            pass

if trust_scores:
    print(f"\nTrust Score Statistics:")
    print(f"  Mean: {np.mean(trust_scores):.3f}")
    print(f"  Std: {np.std(trust_scores):.3f}")
    print(f"  Min: {np.min(trust_scores):.3f}")
    print(f"  Max: {np.max(trust_scores):.3f}")
