In [1]:
%load_ext autoreload
import os
import sys

os.environ['TRANSFORMERS_CACHE'] = '/mnt/swordfish-pool2/milad/hf-cache-new'
os.environ['HF_DATASETS_CACHE'] = '/mnt/swordfish-pool2/milad/hf-cache-new'
os.environ["OPENAI_API_KEY"]= 'xxx'
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
sys.path.append('../src-py')

In [2]:
import json
keys = json.load(open('/mnt/swordfish-pool2/milad/code/llms-as-science-communicators/keys.json'))
for key, val in keys.items():
    os.environ[key] = val

In [3]:
import datasets
import json
import os
import numpy as np
import pandas as pd
import copy
import matplotlib.pyplot as plt
import re
from collections import Counter

pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)


%autoreload
import utils
import datadreamer_generation

import tiktoken
from transformers import AutoTokenizer



In [4]:
from huggingface_hub import login

login(keys['hf_token'])

In [5]:
output_dir = '/mnt/swordfish-pool2/milad/communicating-science-to-the-public/'

In [38]:
from openai import OpenAI
from openai import OpenAIError  # Base class for OpenAI exceptions
import openai
from pydantic import BaseModel
from openai.lib._pydantic import to_strict_json_schema
from tqdm import tqdm
from enum import Enum

In [15]:
eval_client = OpenAI(base_url="http://localhost:9988/v1", api_key="-")
evaluator_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"

In [54]:
class BinaryDecision(int, Enum):
    Yes  = 1
    No   = 0    

class EvalSchema(BaseModel):
    Vaguness: BinaryDecision
    ComplexAspects : list[str]   

def rate_vagueness(dataset, evaluator_name, eval_prompt, input_name):
    eval_scores = []
    for row in tqdm(dataset):
        instruction = '{}\n\n{}'.format(eval_prompt, "[TEXT]: {}".format(row[input_name]))
        print(instruction)
        completion = eval_client.chat.completions.create(
            model= evaluator_name,
            messages=[
                {
                    "role": "user",
                    "content": instruction,
                },
            ],
            response_format={"type": "json_schema", "json_schema": {"name":"eval_scheme", "schema": to_strict_json_schema(EvalSchema)}},
        )
        print(completion.choices[0].message)
        score = json.loads(completion.choices[0].message.reasoning_content)
        eval_scores.append(score)
        
    dataset = dataset.add_column('scoring_parsed', eval_scores)
    return dataset

In [58]:
texts_ds  = datasets.Dataset.from_list([
    {"text": 'IL-6 is involved in various inflammatory processes. Conditions like obesity or cardiovascular disease, which are associated with elevated IL-6 levels, could make the liver more receptive to cancer metastasis, even in the absence of a primary tumor.'},
    {"text": 'something is involved in various inflammatory processes. Conditions like obesity or cardiovascular disease'}
])
prompt = """
    Evaluate the following quality aspects in the text:
    1. Vaguness: it is defined as the lack of clear, specific, or precise meaning, often resulting in a response that is not directly useful or helpful to the question asked, potentially leading to confusion or multiple interpretations
    Your answer should be either Yes if the text is vague or no if the clear is clear.
    2. Complex aspects: Extract list of concepts that are highly technical that only an expert in the filed would understand
"""

result = rate_vagueness(texts_ds, evaluator_name, prompt, "text")

  0%|                                                                                           | 0/2 [00:00<?, ?it/s]


    Evaluate the following quality aspects in the text:
    1. Vaguness: it is defined as the lack of clear, specific, or precise meaning, often resulting in a response that is not directly useful or helpful to the question asked, potentially leading to confusion or multiple interpretations
    Your answer should be either Yes if the text is vague or no if the clear is clear.
    2. Complex aspects: Extract list of concepts that are highly technical that only an expert in the filed would understand


[TEXT]: IL-6 is involved in various inflammatory processes. Conditions like obesity or cardiovascular disease, which are associated with elevated IL-6 levels, could make the liver more receptive to cancer metastasis, even in the absence of a primary tumor.


 50%|█████████████████████████████████████████▌                                         | 1/2 [00:04<00:04,  4.50s/it]

ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[], reasoning_content='{ "Vaguness": 0, "ComplexAspects": ["IL-6", "inflammatory processes", "obesity", "cardiovascular disease", "elevated IL-6 levels", "liver", "cancer metastasis", "primary tumor"] }')

    Evaluate the following quality aspects in the text:
    1. Vaguness: it is defined as the lack of clear, specific, or precise meaning, often resulting in a response that is not directly useful or helpful to the question asked, potentially leading to confusion or multiple interpretations
    Your answer should be either Yes if the text is vague or no if the clear is clear.
    2. Complex aspects: Extract list of concepts that are highly technical that only an expert in the filed would understand


[TEXT]: something is involved in various inflammatory processes. Conditions like obesity or cardiovascular disease


100%|███████████████████████████████████████████████████████████████████████████████████| 2/2 [00:05<00:00,  2.69s/it]

ChatCompletionMessage(content=None, refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=[], reasoning_content='{"Vaguness": 0, "ComplexAspects": []}')





In [60]:
result['scoring_parsed']

[{'ComplexAspects': ['IL-6',
   'inflammatory processes',
   'obesity',
   'cardiovascular disease',
   'elevated IL-6 levels',
   'liver',
   'cancer metastasis',
   'primary tumor'],
  'Vaguness': 0},
 {'ComplexAspects': [], 'Vaguness': 0}]