In [1]:
import re

import pandas as pd
import vllm
import torch

# Define the available topics
TOPICS = [
    "Trump’s Legal Convictions and Felony Charges",
    "Biden vs. Trump Presidential Debates",
    "Israel-Hamas Conflict and Biden’s Ceasefire Proposal",
    "Hunter Biden’s Legal Troubles (e.g., Gun Charges)",
    "U.S. Policy on Ukraine and Russia",
    "Trump’s Tax Promises and Election Campaign",
    "Biden’s Immigration Policies and Executive Orders",
    "Legal Proceedings in Georgia’s 2020 Election Case Against Trump",
    "Trump’s Rallies and Live Events Coverage",
    "Celebrations of Trump (e.g., Birthdays and Tributes)",
    "Pro-Trump and MAGA Advocacy",
    "Nonsense",
]
model_id = "unsloth/Llama-3.3-70B-Instruct-bnb-4bit"
# Initialize vLLM model

llm = vllm.LLM(
    model=model_id,
    dtype=torch.bfloat16,
    trust_remote_code=True,
    quantization="bitsandbytes",
    load_format="bitsandbytes",
)
tokenizer = vllm.LLMTokenizer("unsloth/Llama-3.3-70B-Instruct-bnb-4bit")

# Define the prompt template
PROMPT_TEMPLATE = """
You are an AI assistant that evaluates whether a given post aligns with its assigned topic. 
Please follow these steps:
1. Determine if the post aligns with the assigned topic. If yes, respond with "Labeled correctly: True".
2. If not, respond with "Labeled correctly: False" and suggest the correct topic from the provided list.
3. If none of the topics fit, generate a new topic and respond accordingly.

Format your response strictly as follows:
Response:
Labeled correctly: [True/False]
If not, the correct label is: [Correct Topic or "Generated Topic: XYZ"]

Post: "{post}"

Assigned Topic: "{assigned_topic}"

List of Topics:
{topics}

Your Response:
"""


def evaluate_post(post, assigned_topic):
    """
    Sends a post to the LLM to evaluate topic alignment.
    Returns the raw response.
    """
    prompt = PROMPT_TEMPLATE.format(
        post=post, assigned_topic=assigned_topic, topics="\n".join(TOPICS)
    )

    # Generate response using vLLM
    outputs = model.generate([prompt], tokenizer=tokenizer, max_tokens=200)
    response = outputs[0].outputs[0].text.strip()

    return response


def parse_response(response):
    """
    Parses the LLM response to extract whether the post was correctly labeled and, if not, what the correct topic is.
    Returns a dictionary with alignment status and suggested topic.
    """
    labeled_correctly = (
        "False" not in response
    )  # If False is in response, it was misclassified

    if labeled_correctly:
        return {"Labeled correctly": True, "New Topic": None}

    # Extract new topic
    match = re.search(r"If not, the correct label is: (.+)", response)
    new_topic = match.group(1) if match else None

    return {"Labeled correctly": False, "New Topic": new_topic}


# Example usage
if __name__ == "__main__":
    bsky_df = pd.read_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_bsky_sample.csv"
    )
    ts_df = pd.read_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_ts_sample.csv"
    )
    for i, row in bsky_df.iterrows():
        sample_post = row["post"]
        assigned_topic = row["topic_label"]
        raw_result = evaluate_post(sample_post, assigned_topic)
        parsed_result = parse_response(raw_result)
        bsky_df.loc[i, "llm_response"] = raw_result
        bsky_df.loc[i, "parsed_judgement"] = parsed_result["Labeled correctly"]
        bsky_df.loc[i, "parsed_topic"] = parsed_result["New Topic"]

    for i, row in ts_df.iterrows():
        sample_post = row["post"]
        assigned_topic = row["topic_label"]
        raw_result = evaluate_post(sample_post, assigned_topic)
        parsed_result = parse_response(raw_result)
        ts_df.loc[i, "llm_response"] = raw_result
        ts_df.loc[i, "parsed_judgement"] = parsed_result["Labeled correctly"]
        ts_df.loc[i, "parsed_topic"] = parsed_result["New Topic"]

    bsky_df.to_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_bsky_sample.csv"
    )
    ts_df.to_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_ts_sample.csv"
    )

INFO 02-04 17:07:48 __init__.py:183] Automatically detected platform cuda.


2025-02-04 17:07:48,889	INFO util.py:154 -- Outdated packages:
  ipywidgets==7.6.5 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


config.json:   0%|          | 0.00/1.56k [00:00<?, ?B/s]

ERROR 02-04 17:07:52 registry.py:299] Error in inspecting model architecture 'LlamaForCausalLM'
ERROR 02-04 17:07:52 registry.py:299] Traceback (most recent call last):
ERROR 02-04 17:07:52 registry.py:299]   File "/home/maolee/.local/lib/python3.11/site-packages/vllm/model_executor/models/registry.py", line 495, in _run_in_subprocess
ERROR 02-04 17:07:52 registry.py:299]     returned.check_returncode()
ERROR 02-04 17:07:52 registry.py:299]   File "/sw/pkgs/arc/python3.11-anaconda/2024.02-1/lib/python3.11/subprocess.py", line 502, in check_returncode
ERROR 02-04 17:07:52 registry.py:299]     raise CalledProcessError(self.returncode, self.args, self.stdout,
ERROR 02-04 17:07:52 registry.py:299] subprocess.CalledProcessError: Command '['/sw/pkgs/arc/python3.11-anaconda/2024.02-1/bin/python', '-m', 'vllm.model_executor.models.registry']' returned non-zero exit status 1.
ERROR 02-04 17:07:52 registry.py:299] 
ERROR 02-04 17:07:52 registry.py:299] The above exception was the direct cause of

ValueError: Model architectures ['LlamaForCausalLM'] failed to be inspected. Please check the logs for more details.

In [2]:
import re
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define the available topics
TOPICS = [
    "Trump’s Legal Convictions and Felony Charges",
    "Biden vs. Trump Presidential Debates",
    "Israel-Hamas Conflict and Biden’s Ceasefire Proposal",
    "Hunter Biden’s Legal Troubles (e.g., Gun Charges)",
    "U.S. Policy on Ukraine and Russia",
    "Trump’s Tax Promises and Election Campaign",
    "Biden’s Immigration Policies and Executive Orders",
    "Legal Proceedings in Georgia’s 2020 Election Case Against Trump",
    "Trump’s Rallies and Live Events Coverage",
    "Celebrations of Trump (e.g., Birthdays and Tributes)",
    "Pro-Trump and MAGA Advocacy",
    "Nonsense",
]

# Model identifier (ensure this model supports loading with 4-bit quantization)
model_id = "meta-llama/Llama-3.3-70B-Instruct"

# Initialize the tokenizer and model using Transformers.
# The model is loaded with 4-bit quantization, bfloat16 precision, and automatic device mapping.
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    load_in_4bit=True,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Define the prompt template
PROMPT_TEMPLATE = """
You are an AI assistant that evaluates whether a given post aligns with its assigned topic. 
Please follow these steps:
1. Determine if the post aligns with the assigned topic. If yes, respond with "Labeled correctly: True".
2. If not, respond with "Labeled correctly: False" and suggest the correct topic from the provided list.
3. If none of the topics fit, generate a new topic and respond accordingly.

Format your response strictly as follows:
Response:
Labeled correctly: [True/False]
If not, the correct label is: [Correct Topic or "Generated Topic: XYZ"]

Post: "{post}"

Assigned Topic: "{assigned_topic}"

List of Topics:
{topics}

Your Response:
"""

def evaluate_post(post, assigned_topic):
    """
    Uses the Transformers model to generate an evaluation response for the given post.
    Returns the generated text (the LLM's response).
    """
    prompt = PROMPT_TEMPLATE.format(
        post=post,
        assigned_topic=assigned_topic,
        topics="\n".join(TOPICS)
    )

    # Tokenize the prompt and move inputs to the same device as the model.
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    input_length = inputs.input_ids.shape[1]

    # Generate up to 200 new tokens. Adjust parameters as needed.
    output_ids = model.generate(
        **inputs,
        max_new_tokens=200,
        do_sample=False  # or set to True with sampling parameters if desired
    )

    # Extract only the generated text (exclude the prompt)
    generated_ids = output_ids[0][input_length:]
    response = tokenizer.decode(generated_ids, skip_special_tokens=True).strip()
    return response

def parse_response(response):
    """
    Parses the LLM response to extract whether the post was correctly labeled and, if not,
    what the correct topic is.
    Returns a dictionary with alignment status and suggested topic.
    """
    # Check for misclassification by looking for "False" in the response
    labeled_correctly = "False" not in response

    if labeled_correctly:
        return {"Labeled correctly": True, "New Topic": None}

    # Extract the suggested topic using a regular expression.
    match = re.search(r"If not, the correct label is: (.+)", response)
    new_topic = match.group(1).strip() if match else None
    return {"Labeled correctly": False, "New Topic": new_topic}

if __name__ == "__main__":
    # Read the CSV files containing the posts and assigned topics
    bsky_df = pd.read_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_bsky_sample.csv"
    )
    ts_df = pd.read_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_ts_sample.csv"
    )

    # Process the Bluesky data
    for i, row in bsky_df.iterrows():
        sample_post = row["post"]
        assigned_topic = row["topic_label"]
        raw_result = evaluate_post(sample_post, assigned_topic)
        parsed_result = parse_response(raw_result)
        bsky_df.loc[i, "llm_response"] = raw_result
        bsky_df.loc[i, "parsed_judgement"] = parsed_result["Labeled correctly"]
        bsky_df.loc[i, "parsed_topic"] = parsed_result["New Topic"]

    # Process the TS data
    for i, row in ts_df.iterrows():
        sample_post = row["post"]
        assigned_topic = row["topic_label"]
        raw_result = evaluate_post(sample_post, assigned_topic)
        parsed_result = parse_response(raw_result)
        ts_df.loc[i, "llm_response"] = raw_result
        ts_df.loc[i, "parsed_judgement"] = parsed_result["Labeled correctly"]
        ts_df.loc[i, "parsed_topic"] = parsed_result["New Topic"]

    # Save the updated Bluesky dataframe to CSV
    bsky_df.to_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_bsky_sample_llama3_70b.csv",
        index=False
    )

    # Optionally, you could also save the TS dataframe if needed.
    # ts_df.to_csv("/path/to/your/output_ts_file.csv", index=False)


tokenizer_config.json:   0%|          | 0.00/55.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors.index.json:   0%|          | 0.00/331k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/8 [00:00<?, ?it/s]

model-00001-of-00008.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00002-of-00008.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00008.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00008.safetensors:   0%|          | 0.00/4.93G [00:00<?, ?B/s]

model-00005-of-00008.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00006-of-00008.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00007-of-00008.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00008-of-00008.safetensors:   0%|          | 0.00/4.75G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]



In [8]:
bsky_df.to_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_bsky_sample_llama3_70b.csv",
        index=False
    )
ts_df.to_csv(
        "/nfs/turbo/isr-fconrad1/Mao/projects/information-diffusion/data/topic_eval_ts_sample_llama3_70b.csv",
        index=False
    )

In [3]:
import pandas as pd
bsky_df = pd.read_csv(
    "../../data/topic_eval_bsky_sample_llama3_70b.csv"
)
ts_df = pd.read_csv(
    "../../data/topic_eval_ts_sample_llama3_70b.csv"
)


In [7]:
bsky_df['parsed_judgement'].sum()

np.int64(113)

In [6]:
ts_df['parsed_judgement'].value_counts()

parsed_judgement
True     144
False    113
Name: count, dtype: int64

In [16]:
bsky_df.loc[23,:]

index                  at://did:plc:l7dn34bre6lgyu3q4vkomw32/app.bsky...
max_depth                                                            0.0
size                                                                 1.0
breadth                                                              1.0
structural_virality                                                  0.0
reach                                                                1.0
post                   Maybe Trump is just trying to get out ahead of...
lang                                                                  en
topic                                                                9.0
platform                                                            bsky
topic_label            Biden’s Immigration Policies and Executive Orders
llm_response           Response:\nLabeled correctly: False\nIf not th...
parsed_judgement                                                   False
parsed_topic                                       