In [None]:
# !pip install --upgrade llama-cpp-python
# !pip install openai
# !pip install sse_starlette
# !pip install starlette_context
# !pip install pydantic_settings
# !pip install "fastapi[all]"
# !pip install pandas
# !pip install scikit-learn


In [None]:
# import openai
# from openai import OpenAI
# # Point to the server
# client = OpenAI(base_url="http://localhost:8000/v1", api_key="cltl")
# # Sentences to classify
# sentences = [
# "I hate you and I hope you fail.",
# "What a beautiful day to go for a walk!",
# "Your idea is stupid and nobody cares."
# ]
# # Build a single prompt
# prompt = "Classify each of the following sentences as 'hate' or 'non-hate':\n\n"
# for i, s in enumerate(sentences, 1):
#     prompt += f"{i}. {s}\n"
    
# prompt += "\nReturn the results in the format:\n<number>. <label>\n"
# # Make one request for all sentences
# response = client.completions.create(
#     model="local model", # currently unused
#     prompt=prompt,
#     max_tokens=50,
#     temperature=0,
#     stop=["Classify", "\n\n"]
#     )
# # Print the raw model output
# print(response.choices[0].text.strip())

In [1]:
import openai
from openai import OpenAI
import pandas as pd
import json
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import random
from typing import List, Tuple
import re

# Initialize OpenAI client pointing to local LLaMA server
client = OpenAI(base_url="http://localhost:8000/v1", api_key="cltl")

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)

### Load dataset

In [3]:
# Load datasets
train_df = pd.read_csv(r'Subjectivity_mining_assignment_4_5_data\olid-train-small.csv')
test_df = pd.read_csv(r'Subjectivity_mining_assignment_4_5_data\OLID-test.csv')


print(f"Train set size: {len(train_df)}")
print(f"Test set size: {len(test_df)}")
print(f"Train label distribution:\n{train_df['labels'].value_counts()}")
print(f"Test label distribution:\n{test_df['labels'].value_counts()}")

Train set size: 5852
Test set size: 860
Train label distribution:
labels
0    3591
1    2261
Name: count, dtype: int64
Test label distribution:
labels
0    620
1    240
Name: count, dtype: int64


In [4]:
print(train_df.info())
print(test_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5852 entries, 0 to 5851
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      5852 non-null   int64 
 1   text    5852 non-null   object
 2   labels  5852 non-null   int64 
dtypes: int64(2), object(1)
memory usage: 137.3+ KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 860 entries, 0 to 859
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      860 non-null    int64 
 1   text    860 non-null    object
 2   labels  860 non-null    int64 
dtypes: int64(2), object(1)
memory usage: 20.3+ KB
None


### Helper methodes

In [None]:
def get_prediction(prompt):
    try:
        response = client.completions.create(
            model="local model",
            prompt=prompt,
            max_tokens=10,
            temperature=0,
            stop=["\n", "Text:", "Classification:"]
        )
        return response.choices[0].text.strip()
    except:
        return "non-offensive"

def parse_response(response, use_toxic_labels=False):
    response_lower = response.lower().strip()
    
    if use_toxic_labels:
        if 'toxic' in response_lower and 'non-toxic' not in response_lower:
            return 1
        return 0
    else:
        if 'offensive' in response_lower and 'non-offensive' not in response_lower:
            return 1
        return 0

def get_random_examples(n_examples):
    n_per_class = n_examples // 2
    
    offensive = train_df[train_df['labels'] == 1].sample(n=n_per_class, random_state=42)
    non_offensive = train_df[train_df['labels'] == 0].sample(n=n_per_class, random_state=42)
    
    examples = []
    for _, row in offensive.iterrows():
        examples.append((row['text'], 1))
    for _, row in non_offensive.iterrows():
        examples.append((row['text'], 0))
    
    random.shuffle(examples)
    return examples

def evaluate_prompts(test_df, predictions):
    true_labels = test_df['labels'].tolist()
    
    report = classification_report(true_labels, predictions, 
                                   target_names=['Non-Offensive', 'Offensive'],
                                   output_dict=True)
    cm = confusion_matrix(true_labels, predictions)
    
    return report, cm

def print_results(name, report, cm):
    print(f"\n{'='*60}")
    print(f"Strategy: {name}")
    print(f"{'='*60}")
    print(f"Macro F1: {report['macro avg']['f1-score']:.3f}")
    print(f"Offensive F1: {report['Offensive']['f1-score']:.3f}")
    print(f"Non-Offensive F1: {report['Non-Offensive']['f1-score']:.3f}")
    print(f"\nConfusion Matrix:")
    print(f"                 Predicted")
    print(f"               Non-OFF  OFF")
    print(f"Actual Non-OFF  {cm[0][0]:>6}  {cm[0][1]:>6}")
    print(f"       OFF      {cm[1][0]:>6}  {cm[1][1]:>6}")


### VANILLA ZERO-SHOT

In [6]:
print("\n" + "="*60)
print("EXPERIMENT 1: VANILLA ZERO-SHOT")
print("="*60)

predictions_vanilla = []

for idx, row in test_df.iterrows():
    prompt = f"""Classify the following text as 'offensive' or 'non-offensive':

Text: {row['text']}

Classification:"""
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_vanilla.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_vanilla, cm_vanilla = evaluate_prompts(test_df, predictions_vanilla)
print_results("Vanilla Zero-Shot", report_vanilla, cm_vanilla)



EXPERIMENT 1: VANILLA ZERO-SHOT
Processed 0/860
Processed 100/860
Processed 200/860
Processed 300/860
Processed 400/860
Processed 500/860
Processed 600/860
Processed 700/860
Processed 800/860

Strategy: Vanilla Zero-Shot
Macro F1: 0.694
Offensive F1: 0.564
Non-Offensive F1: 0.825

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-OFF     506     114
       OFF         101     139


### DEFINITION-AUGMENTED

In [7]:
print("\n" + "="*60)
print("EXPERIMENT 2: DEFINITION-AUGMENTED")
print("="*60)

predictions_def = []

for idx, row in test_df.iterrows():
    prompt = f"""Definitions:
- Offensive: Contains insults, threats, profanity, or targets individuals/groups based on identity.
- Non-offensive: Does not contain offensive language or harmful content.

Classify the following text as 'offensive' or 'non-offensive':

Text: {row['text']}

Classification:"""
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_def.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_def, cm_def = evaluate_prompts(test_df, predictions_def)
print_results("Definition-Augmented", report_def, cm_def)


EXPERIMENT 2: DEFINITION-AUGMENTED
Processed 0/860
Processed 100/860
Processed 200/860
Processed 300/860
Processed 400/860
Processed 500/860
Processed 600/860
Processed 700/860
Processed 800/860

Strategy: Definition-Augmented
Macro F1: 0.693
Offensive F1: 0.528
Non-Offensive F1: 0.858

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-OFF     567      53
       OFF         135     105


### ALTERNATIVE DEFINITION

In [8]:
print("\n" + "="*60)
print("EXPERIMENT 3: ALTERNATIVE DEFINITION")
print("="*60)

predictions_alt_def = []

for idx, row in test_df.iterrows():
    prompt = f"""Definitions:
- Offensive: Text that could cause harm, distress, or is intended to demean, dehumanize, or attack individuals or groups.
- Non-offensive: Text that is respectful, neutral, or positive in nature.

Classify the following text as 'offensive' or 'non-offensive':

Text: {row['text']}

Classification:"""
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_alt_def.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_alt_def, cm_alt_def = evaluate_prompts(test_df, predictions_alt_def)
print_results("Alternative Definition (Harm-based)", report_alt_def, cm_alt_def)


EXPERIMENT 3: ALTERNATIVE DEFINITION
Processed 0/860
Processed 100/860
Processed 200/860
Processed 300/860
Processed 400/860
Processed 500/860
Processed 600/860
Processed 700/860
Processed 800/860

Strategy: Alternative Definition (Harm-based)
Macro F1: 0.718
Offensive F1: 0.615
Non-Offensive F1: 0.821

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-OFF     482     138
       OFF          72     168


### ALTERNATIVE LABELS

In [9]:
print("\n" + "="*60)
print("EXPERIMENT 4: ALTERNATIVE LABELS (toxic/non-toxic)")
print("="*60)

predictions_toxic = []

for idx, row in test_df.iterrows():
    prompt = f"""Definitions:
- Toxic: Contains harmful, abusive, or hateful language.
- Non-toxic: Does not contain harmful or abusive language.

Classify the following text as 'toxic' or 'non-toxic':

Text: {row['text']}

Classification:"""
    
    response = get_prediction(prompt)
    pred = parse_response(response, use_toxic_labels=True)
    predictions_toxic.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_toxic, cm_toxic = evaluate_prompts(test_df, predictions_toxic)
print_results("Alternative Labels (Toxic)", report_toxic, cm_toxic)



EXPERIMENT 4: ALTERNATIVE LABELS (toxic/non-toxic)
Processed 0/860
Processed 100/860
Processed 200/860
Processed 300/860
Processed 400/860
Processed 500/860
Processed 600/860
Processed 700/860
Processed 800/860

Strategy: Alternative Labels (Toxic)
Macro F1: 0.737
Offensive F1: 0.630
Non-Offensive F1: 0.844

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-OFF     510     110
       OFF          79     161


### FEW-SHOT RANDOM

In [10]:
# 5a: 2-shot
print("\n" + "="*60)
print("EXPERIMENT 5a: FEW-SHOT RANDOM (2 examples)")
print("="*60)

examples_2 = get_random_examples(2)
predictions_2shot = []

for idx, row in test_df.iterrows():
    prompt = "Classify texts as 'offensive' or 'non-offensive'.\n\nExamples:\n\n"
    
    for ex_text, ex_label in examples_2:
        label_str = "offensive" if ex_label == 1 else "non-offensive"
        prompt += f"Text: {ex_text}\nClassification: {label_str}\n\n"
    
    prompt += f"Text: {row['text']}\nClassification:"
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_2shot.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_2shot, cm_2shot = evaluate_prompts(test_df, predictions_2shot)
print_results("Few-Shot Random (2 examples)", report_2shot, cm_2shot)

# 5b: 4-shot
print("\n" + "="*60)
print("EXPERIMENT 5b: FEW-SHOT RANDOM (4 examples)")
print("="*60)

examples_4 = get_random_examples(4)
predictions_4shot = []

for idx, row in test_df.iterrows():
    prompt = "Classify texts as 'offensive' or 'non-offensive'.\n\nExamples:\n\n"
    
    for ex_text, ex_label in examples_4:
        label_str = "offensive" if ex_label == 1 else "non-offensive"
        prompt += f"Text: {ex_text}\nClassification: {label_str}\n\n"
    
    prompt += f"Text: {row['text']}\nClassification:"
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_4shot.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_4shot, cm_4shot = evaluate_prompts(test_df, predictions_4shot)
print_results("Few-Shot Random (4 examples)", report_4shot, cm_4shot)

# 5c: 6-shot
print("\n" + "="*60)
print("EXPERIMENT 5c: FEW-SHOT RANDOM (6 examples)")
print("="*60)

examples_6 = get_random_examples(6)
predictions_6shot = []

for idx, row in test_df.iterrows():
    prompt = "Classify texts as 'offensive' or 'non-offensive'.\n\nExamples:\n\n"
    
    for ex_text, ex_label in examples_6:
        label_str = "offensive" if ex_label == 1 else "non-offensive"
        prompt += f"Text: {ex_text}\nClassification: {label_str}\n\n"
    
    prompt += f"Text: {row['text']}\nClassification:"
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_6shot.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_6shot, cm_6shot = evaluate_prompts(test_df, predictions_6shot)
print_results("Few-Shot Random (6 examples)", report_6shot, cm_6shot)



EXPERIMENT 5a: FEW-SHOT RANDOM (2 examples)
Processed 0/860
Processed 100/860
Processed 200/860
Processed 300/860
Processed 400/860
Processed 500/860
Processed 600/860
Processed 700/860
Processed 800/860

Strategy: Few-Shot Random (2 examples)
Macro F1: 0.672
Offensive F1: 0.479
Non-Offensive F1: 0.864

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-OFF     590      30
       OFF         155      85

EXPERIMENT 5b: FEW-SHOT RANDOM (4 examples)
Processed 0/860
Processed 100/860
Processed 200/860
Processed 300/860
Processed 400/860
Processed 500/860
Processed 600/860
Processed 700/860
Processed 800/860

Strategy: Few-Shot Random (4 examples)
Macro F1: 0.634
Offensive F1: 0.413
Non-Offensive F1: 0.856

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-OFF     591      29
       OFF         170      70

EXPERIMENT 5c: FEW-SHOT RANDOM (6 examples)
Processed 0/860
Processed 100/860
Processed 200/860
Processed 300/860
Pro

### SUMMARY TABLE

In [11]:
print("\n" + "="*80)
print("FINAL RESULTS SUMMARY")
print("="*80)
print(f"{'Strategy':<40} {'Macro F1':<12} {'OFF F1':<12} {'NON-OFF F1':<12}")
print("-"*80)

all_results = [
    ("Vanilla Zero-Shot", report_vanilla),
    ("Definition-Augmented", report_def),
    ("Alternative Definition", report_alt_def),
    ("Alternative Labels (Toxic)", report_toxic),
    ("Few-Shot Random (2 examples)", report_2shot),
    ("Few-Shot Random (4 examples)", report_4shot),
    ("Few-Shot Random (6 examples)", report_6shot)
]

for name, report in all_results:
    macro_f1 = report['macro avg']['f1-score']
    off_f1 = report['Offensive']['f1-score']
    non_off_f1 = report['Non-Offensive']['f1-score']
    print(f"{name:<40} {macro_f1:<12.3f} {off_f1:<12.3f} {non_off_f1:<12.3f}")



FINAL RESULTS SUMMARY
Strategy                                 Macro F1     OFF F1       NON-OFF F1  
--------------------------------------------------------------------------------
Vanilla Zero-Shot                        0.694        0.564        0.825       
Definition-Augmented                     0.693        0.528        0.858       
Alternative Definition                   0.718        0.615        0.821       
Alternative Labels (Toxic)               0.737        0.630        0.844       
Few-Shot Random (2 examples)             0.672        0.479        0.864       
Few-Shot Random (4 examples)             0.634        0.413        0.856       
Few-Shot Random (6 examples)             0.673        0.491        0.856       


### SAVE RESULTS

In [12]:
results_dict = {
    'vanilla': {'report': report_vanilla, 'confusion_matrix': cm_vanilla.tolist()},
    'definition': {'report': report_def, 'confusion_matrix': cm_def.tolist()},
    'alt_definition': {'report': report_alt_def, 'confusion_matrix': cm_alt_def.tolist()},
    'toxic_labels': {'report': report_toxic, 'confusion_matrix': cm_toxic.tolist()},
    'few_shot_2': {'report': report_2shot, 'confusion_matrix': cm_2shot.tolist()},
    'few_shot_4': {'report': report_4shot, 'confusion_matrix': cm_4shot.tolist()},
    'few_shot_6': {'report': report_6shot, 'confusion_matrix': cm_6shot.tolist()}
}

# Save to JSON
with open('results_summary.json', 'w') as f:
    json.dump(results_dict, f, indent=2)

print("\n\nResults saved to results_summary.json")
print("Experiment completed successfully!")




Results saved to results_summary.json
Experiment completed successfully!


### DETAILED ANALYSIS OF TWO STRATEGIES


In [15]:
print("\n" + "="*80)
print("DETAILED ANALYSIS OF ALL STRATEGIES")
print("="*80)

all_reports = [
    ("Vanilla Zero-Shot", report_vanilla, cm_vanilla),
    ("Definition-Augmented", report_def, cm_def),
    ("Alternative Definition", report_alt_def, cm_alt_def),
    ("Alternative Labels (Toxic)", report_toxic, cm_toxic),
    ("Few-Shot Random (2 examples)", report_2shot, cm_2shot),
    ("Few-Shot Random (4 examples)", report_4shot, cm_4shot),
    ("Few-Shot Random (6 examples)", report_6shot, cm_6shot)
]

for strategy_name, report, cm in all_reports:
    print(f"\n{'='*60}")
    print(f"Strategy: {strategy_name}")
    print(f"{'='*60}")
    
    print(f"\nOffensive Class Metrics:")
    print(f"  Precision: {report['Offensive']['precision']:.3f}")
    print(f"  Recall:    {report['Offensive']['recall']:.3f}")
    print(f"  F1-Score:  {report['Offensive']['f1-score']:.3f}")
    
    print(f"\nNon-Offensive Class Metrics:")
    print(f"  Precision: {report['Non-Offensive']['precision']:.3f}")
    print(f"  Recall:    {report['Non-Offensive']['recall']:.3f}")
    print(f"  F1-Score:  {report['Non-Offensive']['f1-score']:.3f}")
    
    print(f"\nConfusion Matrix:")
    print(f"                 Predicted")
    print(f"               Non-OFF  OFF")
    print(f"Actual Non-OFF  {cm[0][0]:>6}  {cm[0][1]:>6}")
    print(f"       OFF      {cm[1][0]:>6}  {cm[1][1]:>6}")



DETAILED ANALYSIS OF ALL STRATEGIES

Strategy: Vanilla Zero-Shot

Offensive Class Metrics:
  Precision: 0.549
  Recall:    0.579
  F1-Score:  0.564

Non-Offensive Class Metrics:
  Precision: 0.834
  Recall:    0.816
  F1-Score:  0.825

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-OFF     506     114
       OFF         101     139

Strategy: Definition-Augmented

Offensive Class Metrics:
  Precision: 0.665
  Recall:    0.438
  F1-Score:  0.528

Non-Offensive Class Metrics:
  Precision: 0.808
  Recall:    0.915
  F1-Score:  0.858

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-OFF     567      53
       OFF         135     105

Strategy: Alternative Definition

Offensive Class Metrics:
  Precision: 0.549
  Recall:    0.700
  F1-Score:  0.615

Non-Offensive Class Metrics:
  Precision: 0.870
  Recall:    0.777
  F1-Score:  0.821

Confusion Matrix:
                 Predicted
               Non-OFF  OFF
Actual Non-O

In [10]:
from openai import OpenAI

client = OpenAI(base_url="http://127.0.0.1:8080/v1", api_key="cltl")

text = "I hate you and I hope you fail."

prompt = f"""You are a content moderation classifier.

Return EXACTLY one word: offensive OR non-offensive

Text:
{text}

Answer:
"""

r = client.completions.create(model="local", prompt=prompt, max_tokens=6, temperature=0)
print(r.choices[0].text.strip())

offensive

Text:
I


In [11]:
text = "What a beautiful day to go for a walk!"

prompt = f"""You are a content moderation classifier.

Return EXACTLY one word: toxic OR non-toxic

Text:
{text}

Answer:
"""

r = client.completions.create(model="local", prompt=prompt, max_tokens=6, temperature=0)
label = r.choices[0].text.strip().lower()
label = {"toxic":"offensive","non-toxic":"non-offensive"}.get(label, label)
print(label)

non-toxic

text:


In [12]:
texts = [
    "You are stupid. Go away.",
    "Thanks for the update.",
    "Nobody cares about your idea.",
    "What a beautiful day!"
]

def classify(text):
    prompt = f"""Return EXACTLY one word: offensive OR non-offensive

Text:
{text}

Answer:
"""
    r = client.completions.create(model="local", prompt=prompt, max_tokens=6, temperature=0)
    return r.choices[0].text.strip().lower()

preds = [classify(t) for t in texts]
print(list(zip(texts, preds)))

[('You are stupid. Go away.', 'offensive\n\ntext:\nthank'), ('Thanks for the update.', 'non-offensive\n\nexplanation:'), ('Nobody cares about your idea.', 'non-offensive\n\nexplanation:'), ('What a beautiful day!', 'non-offensive\n\ntext:')]
