In [26]:
# environment/LLM/smart_citation_llm_rater.py

from environment.LLM.rater import LLMRater

class SmartCitationLLMRater(LLMRater):
    def __init__(self, llm, current_items_features_list=[], previous_items_features_list=[], llm_render=False, llm_query_explanation=False):
        super().__init__(llm, current_items_features_list, previous_items_features_list, llm_render, llm_query_explanation)
        self.request_scale = "0-9"
        self.system_prompt = (
            '''You are simulating a user rating scientific papers.
                When rating, you MUST consider the following:

                - Topic Matching: Rate higher if paper topics match user's interests.
                - Novelty Preference: The user has a novelty preference score from 0 (does not care about recency) to 1 (cares strongly about recency). 
                You should reward papers with a Normalized Year closer to 1 based on the user's novelty preference.
                - Reputability Bias: The user has a reputability bias score from 0 (does not care about citations) to 1 (cares strongly about highly cited papers).
                You should reward papers with a Normalized Citations (Reputability) closer to 1 based on the user's reputability bias.
                
                You must **combine these factors** logically to decide if the user would rate the paper highly or not.
                ONLY output a single rating from 0 (not interested) to 9 (extremely interested).
'''
        )

        # Interested in: Web visibility and informetrics. Prefers novelty: 0.74, reputability bias: 0.47
    def _get_few_shot_prompts(self):
        # (Optional for smarter few-shot prompts later)
        return []
    def _get_prompt(self, user, item, num_interacted, interactions, retrieved_items):
        user_info = (
            f"User Profile:\n"
            f" {user.description}"
            # f"- Novelty Preference (0-1): {user.novelty_preference:.2f}\n"
            # f"- Reputability Bias (0-1): {user.reputability_bias:.2f}\n"
        )

        paper_info = (
            f"Paper Details:\n"
            f"- Title: {item.title}\n"
            f"- Topics: {', '.join(item.topics)}\n"
            f"- Normalized Year (0-1): {item.norm_year:.2f}\n"
            f"- Normalized Citations (Reputability) (0-1): {item.norm_cite:.2f}\n"
        )
        # print(f"User Info: {user_info}")
        # print(f"Paper Info: {paper_info}")
        question = "Question:\nHow much would the user like this paper? (ONLY output a number from 0 to 9)"

        full_prompt = f"{self.system_prompt}\n\n{user_info}\n\n{paper_info}\n\n{question}"

        return [{"role": "user", "content": full_prompt}]

    def adjust_rating_in(self, rating):
        return rating

    def adjust_rating_out(self, rating):
        return rating

    def adjust_text_in(self, text):
        return text


# environment/LLM/small_hf_llm.py

from environment.LLM.llm import LLM
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

class SmallHuggingfaceLLM(LLM):
    def __init__(self, model_name="google/flan-t5-large", device="cpu"):
        super().__init__(model_name)
        self.device = device
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
        self.model.eval()

    def _generate(self, prompt):
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        outputs = self.model.generate(
            **inputs,
            max_new_tokens=10,
            pad_token_id=self.tokenizer.eos_token_id
        )
        text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return text

    def request_rating_0_9(self, system_prompt, dialog):
        prompt = self._dialog_to_text(dialog)
        response = self._generate(prompt)
        return prompt, self._extract_number(response, 0, 9)

    def request_rating_1_10(self, system_prompt, dialog):
        prompt = self._dialog_to_text(dialog)
        response = self._generate(prompt)
        return prompt, self._extract_number(response, 1, 10)

    def request_rating_text(self, system_prompt, dialog):
        prompt = self._dialog_to_text(dialog)
        response = self._generate(prompt)
        return prompt, response

    def request_explanation(self, system_prompt, dialog):
        prompt = self._dialog_to_text(dialog)
        response = self._generate(prompt)
        return prompt, response

    def _dialog_to_text(self, dialog):
        return "\n".join([f"{d['role'].capitalize()}: {d['content']}" for d in dialog])

    def _extract_number(self, text, min_val, max_val):
        import re
        numbers = re.findall(r'\d+', text)
        for num in numbers:
            n = int(num)
            if min_val <= n <= max_val:
                return str(n)
        return str((min_val + max_val) // 2)  # fallback: middle value



llm = SmallHuggingfaceLLM(model_name="google/flan-t5-small", device="cpu")
rater = SmartCitationLLMRater(llm)

In [43]:
import sys
import os
sys.path.append(os.path.abspath(".."))  

from stable_baselines3 import DQN
from environment.env import Simulatio4RecSys
from environment.citations.citation_loader import CitationsLoader
from environment.users.citation_users_loader import CitationUsersLoader
from environment.citations.citations_retrieval import CitationsRetrieval
from environment.items_selection import GreedySelector
from environment.reward_shaping import IdentityRewardShaping
from environment.reward_perturbator import NoPerturbator
from environment.flat_obs_wrapper import FlatObsWrapper
from environment.LLM.rater import DummyLLMRater
from environment.LLM.dummy_llm import DummyLLM
from stable_baselines3.common.monitor import Monitor


csv_path = "../environment/citations/datasets/cleaned-scientometrics-and-bibliometrics-research.csv"
users_path = "../environment/users/datasets/citation_users.json"

# Load
items_loader = CitationsLoader(csv_path)
users_loader = CitationUsersLoader(users_path)

raw_env = Simulatio4RecSys(
    render_mode="human",
    items_loader=items_loader,
    users_loader=users_loader,
    items_selector=GreedySelector(),
    reward_perturbator=NoPerturbator(),
    items_retrieval=CitationsRetrieval(),
    reward_shaping=IdentityRewardShaping(),
    llm_rater=rater
)

env = FlatObsWrapper(raw_env)
env = Monitor(env)

# Load model
model = DQN.load("models/dqn_citation_recommender_llm", env=env, device="cpu")


Wrapping the env in a DummyVecEnv.


In [44]:
import numpy as np

def evaluate_precision_at_k(env, model, rater, users_loader, K=5, threshold=6):
    """
    Evaluate Precision@K across all users.

    Args:
        env: The environment (wrapped with Monitor, FlatObsWrapper, etc.)
        model: Trained RL model (e.g., DQN, A2C)
        rater: LLMRater or RuleBasedRater to simulate user feedback
        users_loader: Loader to access users
        K: Number of top recommendations
        threshold: Minimum score to count as a "click"

    Returns:
        mean_precision: Mean precision@K across users
    """

    all_precisions = []
    n_users = len(users_loader.get_users())

    for user_idx in range(n_users)[:2]:
        # Reset user
        obs = env.reset()[0]
        user = users_loader.get_users()[user_idx]

        clicked = []

        for _ in range(K):
            action, _ = model.predict(obs, deterministic=True)
            action = int(action)
            item_id = env.unwrapped.action_to_item[action]
            paper = env.unwrapped.items_loader.load_items_from_ids([item_id])[0]
            print(f"User {user_idx}: Paper ID: {item_id}, Title: {paper.title}")

            # Get simulated "rating" from user for this paper
            rating, _, _ = rater.query(user, paper, 0, [], [])

            # Check if user "clicked" (liked it)
            click = (rating >= threshold)
            clicked.append(click)

            # Step environment
            obs, reward, done, truncated, info = env.step(action)

            if done or truncated:
                break

        precision = np.sum(clicked) / K
        all_precisions.append(precision)

    mean_precision = np.mean(all_precisions)
    return mean_precision


In [45]:
mean_precision = evaluate_precision_at_k(env, model, rater, users_loader, K=5, threshold=1)
print(f"📊 Precision@5 = {mean_precision:.4f}")


User 0: Paper ID: 825, Title: Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
User 0: Paper ID: 825, Title: Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
User 0: Paper ID: 825, Title: Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
User 0: Paper ID: 825, Title: Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
User 0: Paper ID: 825, Title: Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
User 1: Paper ID: 825, Title: Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
User 1: Paper ID: 825, Title: Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
User 1: Paper ID: 825, Title: Is commercialization good or bad for science? Individual-lev

In [48]:
import numpy as np

def evaluate_precision_at_k(env, model, rater, users_loader, K=5, threshold=6):
    """
    Evaluate Precision@K across first `num_users_eval` users.

    Args:
        env: The environment (wrapped with Monitor, FlatObsWrapper, etc.)
        model: Trained RL model (e.g., DQN, A2C)
        rater: LLMRater or RuleBasedRater to simulate user feedback
        users_loader: Loader to access users
        K: Number of top recommendations
        threshold: Minimum score to count as a "click"
        num_users_eval: Number of users to evaluate (default 2)

    Returns:
        mean_precision: Mean precision@K across users
    """

    all_precisions = []
    n_users = len(users_loader.get_users())
    # num_users_eval = min(num_users_eval, n_users)

    for user_idx in range(n_users)[:2]:
        obs = env.reset()[0]
        user = users_loader.get_users()[user_idx]

        clicked = []
        recommended_actions = set()

        print(f"\n🔵 User {user_idx}: {user.description}")

        for rec_idx in range(K):
            action, _ = model.predict(obs, deterministic=True)
            action = int(action)

            # Avoid recommending the same paper again
            while action in recommended_actions:
                action = env.action_space.sample()
                action = int(action)

            recommended_actions.add(action)

            item_id = env.unwrapped.action_to_item[action]
            paper = env.unwrapped.items_loader.load_items_from_ids([item_id])[0]

            print(f"    ➔ Recommendation {rec_idx+1}: Paper ID {item_id} - {paper.title}")

            # Get simulated "rating" from user for this paper
            rating, _, _ = rater.query(user, paper, 0, [], [])

            # Check if user "clicked" (liked it)
            click = (rating >= threshold)
            clicked.append(click)

            # Step environment
            obs, reward, done, truncated, info = env.step(action)

            if done or truncated:
                obs = env.reset()[0]

        precision = np.sum(clicked) / K
        all_precisions.append(precision)

    mean_precision = np.mean(all_precisions)
    print(f"\n📊 Precision@{K}: {mean_precision:.4f}")
    return mean_precision
mean_precision = evaluate_precision_at_k(env, model, rater, users_loader, K=5, threshold=2)


🔵 User 0: Interested in: Web visibility and informetrics. Prefers novelty: 0.74, reputability bias: 0.47
    ➔ Recommendation 1: Paper ID 825 - Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
    ➔ Recommendation 2: Paper ID 14589 - PROCEEDINGS OF THE AMERICAN PHYSIOLOGICAL SOCIETY
    ➔ Recommendation 3: Paper ID 8507 - Metrics of activity in social networks are correlated with traditional metrics of scientific impact in endocrinology journals
    ➔ Recommendation 4: Paper ID 40492 - Casey, Authority on Ink Chemistry, Wins Iowa Award
    ➔ Recommendation 5: Paper ID 43716 - You set the standards

🔵 User 1: Interested in: Science and Science Education, Intellectual Capital and Performance Analysis, Innovation Policy and R&D. Prefers novelty: 0.5, reputability bias: 0.65
    ➔ Recommendation 1: Paper ID 825 - Is commercialization good or bad for science? Individual-level evidence from the Max Planck Society
    ➔ Recommendation 2: Pap