# Spam Classification using LLM


In [58]:
import pandas as pd
from tqdm import tqdm

## Data Preparation


In [59]:
train_df = pd.read_csv("./input/deceptive-opinion-spam-corpus/deceptive-opinion.csv")

In [60]:
test_df = train_df.sample(1600)

In [None]:
test_df

## OpenAI API


In [62]:
from openai import OpenAI

client = OpenAI()

In [63]:
def get_few_shot_prompt(n_shots=3):
    """
    Generate a few-shot prompt by randomly selecting n examples from the training data.
    Ensure that the number of truthful and deceptive examples are equal, and handle odd n_shots.
    Formats the prompt in a user/assistant conversation format.

    Args:
        n_shots (int): Number of examples to include in the prompt

    Returns:
        str: Formatted prompt with few-shot examples in ChatGPT conversation format
    """
    # Randomly sample n examples from training data
    # Sample equal numbers of truthful and deceptive reviews, handling odd n_shots
    n_per_class = n_shots // 2
    remaining = n_shots % 2

    # Get base samples for each class
    deceptive = train_df[train_df["deceptive"] == "deceptive"].sample(n=n_per_class)
    truthful = train_df[train_df["deceptive"] == "truthful"].sample(n=n_per_class)

    # If n_shots is odd, randomly add one more example from either class
    if remaining:
        extra_sample = train_df.sample(n=1)
        few_shot_examples = pd.concat([deceptive, truthful, extra_sample]).sample(
            frac=1
        )
    else:
        few_shot_examples = pd.concat([deceptive, truthful]).sample(frac=1)

    few_shot_prompts = []
    # Add the few-shot examples in conversation format
    for _, example in few_shot_examples.iterrows():
        few_shot_prompts.append({"role": "user", "content": f"{example['text']}"})
        few_shot_prompts.append(
            {"role": "assistant", "content": f"{example['deceptive']}"}
        )

    return few_shot_prompts

In [64]:
def get_response(prompt, model="gpt-4o-mini", n_shots=0):
    """
    Get a response from the OpenAI API for a given prompt.

    Args:
        prompt (str): The text message to be classified
        model (str, optional): The OpenAI model to use. Defaults to "gpt-4o-mini"
        n_shots (int, optional): Number of examples to include. Defaults to 0.

    Returns:
        str: The model's classification response (ideally 'truthful' or 'deceptive')
    """

    system_role = f"""You are a classification model trained to identify whether incoming messages are spam. 
    Given a message, analyze its content, structure, and intent to determine if it is 'truthful' (not spam) or 'deceptive' (spam). 
    Return 'truthful' if the message does not contain signs of spam and is a genuine communication. 
    Return 'deceptive' if the message displays characteristics commonly associated with spam, such as misleading claims, requests for sensitive information, or excessive promotions."""

    # Build messages list starting with system role
    messages = [{"role": "system", "content": system_role}]

    # Add example messages based on n_shots
    if n_shots > 0:
        # Get few-shot examples - In-Context Learning
        context = get_few_shot_prompt(n_shots)
        # Add pairs of examples up to n_shots
        messages.extend(context)

    # Add the actual prompt
    messages.append({"role": "user", "content": prompt})

    completion = client.chat.completions.create(model=model, messages=messages)

    return completion.choices[0].message.content

## Helper Functions


In [65]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import classification_report, confusion_matrix

In [66]:
def count_predictions(result):
    """
    Count occurrences of each class and check for invalid predictions.
    """
    # Count occurrences of each class
    deceptive_count = result.count("deceptive")
    truthful_count = result.count("truthful")

    # Print the counts
    print(f"Total predictions: {len(result)}")
    print(f"Number of deceptive predictions: {deceptive_count}")
    print(f"Number of truthful predictions: {truthful_count}")
    print(
        f"Number of invalid predictions: {len(result) - deceptive_count - truthful_count}"
    )
    print()

    # Check for any invalid predictions
    for i in result:
        if i != "deceptive" and i != "truthful":
            print(f"Invalid prediction found: \n{i}")

In [67]:
def get_classification_report(y_true, y_pred, result):
    """
    Get classification report.
    """
    # Filter out invalid predictions
    for pred in result:
        if pred in ["deceptive", "truthful"]:
            y_pred.append(pred)
        else:
            # For invalid predictions, we'll count them as incorrect by using the opposite of true label
            true_idx = len(y_pred)  # Get index to find corresponding true label
            y_pred.append(
                "truthful" if y_true[true_idx] == "deceptive" else "deceptive"
            )

    # Print classification report with all metrics
    print("Classification Report:")
    print(classification_report(y_true, y_pred))

In [68]:
def get_confusion_matrix(y_true, y_pred):
    """
    Get confusion matrix and visualize it.
    """
    # Calculate confusion matrix using sklearn
    conf_matrix = confusion_matrix(y_true, y_pred, labels=["deceptive", "truthful"])

    # Create heatmap visualization of confusion matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(
        conf_matrix,
        annot=True,
        fmt="d",
        xticklabels=["deceptive", "truthful"],
        yticklabels=["deceptive", "truthful"],
    )
    plt.title("Confusion Matrix")
    plt.ylabel("True Label")
    plt.xlabel("Predicted Label")
    plt.show()

In [69]:
def save_results(result, n_shots):
    """
    Save results to a CSV file.
    """
    # Create a DataFrame with test data and predictions
    results_df = test_df.copy()
    results_df["predicted"] = result

    # Save to CSV file
    results_df.to_csv(f"./results/llm/prediction_results_{n_shots}.csv", index=False)
    print(f"Results saved to prediction_results_{n_shots}.csv")

## Zero-shot


In [70]:
# # Get response from LLM and store in result
# result = []

# for text in tqdm(test_df.text):
#     response = get_response(text)
#     result.append(response)

### Save result to csv file


In [71]:
# save_results(result, 0)

### Validation


In [72]:
# count_predictions(result)

_Note_: LLM response containing "truthful" or "deceptive" may be considered as respective valid predictions, but for the purpose of evaluation, we will count them as invalid here.


In [73]:
# Convert predictions and actual values to lists
y_true = list(test_df.deceptive)
y_pred = []

In [74]:
# get_classification_report(result)
# get_confusion_matrix(y_true, y_pred)

### Analysis


For "deceptive" class, 71% of the times the model predicted "deceptive", it was correct; 29% of the times it was incorrect. For "truthful" class, 50% of the times the model predicted "truthful", it was correct; 50% of the times it was incorrect.

For "deceptive" class, only 1% of actual deceptive cases were correctly identified, and 99% of deceptive cases were misclassified as truthful. For "truthful" class, 100% of actual truthful cases were correctly identified, but it was not meaningful since the model predicted "truthful" for most cases (99.56%).

The f1-score for "deceptive" class is 0.01, which is very low, although the f1-score for "truthful" class is 0.67, again it is not meaningful for the reason mentioned above.

In conclusion, the statistics above show that the LLM model is not able to distinguish between deceptive and truthful messages.


## Few-shot


In [None]:
def get_few_shot_results(n_shots):
    """
    Helper function to pipeline the process of getting few-shot results.
    """
    result = []

    # Iterate through each text in the test dataset
    for text in tqdm(test_df.text):
        response = get_response(text, n_shots=n_shots)
        result.append(response)

    # Save results to csv file
    save_results(result, n_shots)

    # Count and print predictions
    count_predictions(result)

    # Convert predictions and actual values to lists
    y_true = list(test_df.deceptive)
    y_pred = []

    # Get classification report
    get_classification_report(y_true, y_pred, result)

    # Get confusion matrix
    get_confusion_matrix(y_true, y_pred)

### 1-shot


In [None]:
result = []

for text in tqdm(test_df.text):
    response = get_response(text, n_shots=1)
    result.append(response)

In [None]:
save_results(result, 1)

In [None]:
count_predictions(result)

In [89]:
# Convert predictions and actual values to lists
y_true = list(test_df.deceptive)
y_pred = []

In [None]:
get_classification_report(y_true, y_pred, result)
get_confusion_matrix(y_true, y_pred)

### 2-shot


In [None]:
get_few_shot_results(2)

### 3-shot


In [None]:
get_few_shot_results(3)

### 4-shot


In [None]:
get_few_shot_results(4)