In [12]:
# !pip install --upgrade llama-cpp-python
# !pip install openai
# !pip install sse_starlette
# !pip install starlette_context
# !pip install pydantic_settings
# !pip install "fastapi[all]"
# !pip install pandas
# !pip install scikit-learn


In [13]:
# import openai
# from openai import OpenAI
# # Point to the server
# client = OpenAI(base_url="http://localhost:8000/v1", api_key="cltl")
# # Sentences to classify
# sentences = [
# "I hate you and I hope you fail.",
# "What a beautiful day to go for a walk!",
# "Your idea is stupid and nobody cares."
# ]
# # Build a single prompt
# prompt = "Classify each of the following sentences as 'hate' or 'non-hate':\n\n"
# for i, s in enumerate(sentences, 1):
#     prompt += f"{i}. {s}\n"
    
# prompt += "\nReturn the results in the format:\n<number>. <label>\n"
# # Make one request for all sentences
# response = client.completions.create(
#     model="local model", # currently unused
#     prompt=prompt,
#     max_tokens=50,
#     temperature=0,
#     stop=["Classify", "\n\n"]
#     )
# # Print the raw model output
# print(response.choices[0].text.strip())

In [14]:
import openai
from openai import OpenAI
import pandas as pd
import json
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import random
from typing import List, Tuple
import re

# Initialize OpenAI client pointing to local LLaMA server
client = OpenAI(base_url="http://localhost:8000/v1", api_key="cltl")

# Set random seed for reproducibility
random.seed(42)
np.random.seed(42)

### Load dataset

In [15]:
train_df = pd.read_csv(r'Subjectivity_mining_assignment_4_5_data\olid-train-small.csv')
test_df = pd.read_csv(r'Subjectivity_mining_assignment_4_5_data\OLID-test.csv')

### Helper methodes

In [16]:
def get_prediction(prompt):
    """Get prediction from model"""
    try:
        response = client.completions.create(
            model="local model",
            prompt=prompt,
            max_tokens=10,
            temperature=0,
            stop=["\n", "Text:", "Classification:"]
        )
        return response.choices[0].text.strip()
    except:
        return "non-offensive"

def parse_response(response, use_toxic_labels=False):
    """Convert model response to 0 or 1"""
    response_lower = response.lower().strip()
    
    if use_toxic_labels:
        if 'toxic' in response_lower and 'non-toxic' not in response_lower:
            return 1
        return 0
    else:
        if 'offensive' in response_lower and 'non-offensive' not in response_lower:
            return 1
        return 0

def get_random_examples(n_examples):
    """Get random balanced examples from train set"""
    n_per_class = n_examples // 2
    
    offensive = train_df[train_df['label'] == 1].sample(n=n_per_class, random_state=42)
    non_offensive = train_df[train_df['label'] == 0].sample(n=n_per_class, random_state=42)
    
    examples = []
    for _, row in offensive.iterrows():
        examples.append((row['text'], 1))
    for _, row in non_offensive.iterrows():
        examples.append((row['text'], 0))
    
    random.shuffle(examples)
    return examples

def evaluate_prompts(test_df, predictions):
    """Calculate metrics"""
    true_labels = test_df['label'].tolist()
    
    report = classification_report(true_labels, predictions, 
                                   target_names=['Non-Offensive', 'Offensive'],
                                   output_dict=True)
    cm = confusion_matrix(true_labels, predictions)
    
    return report, cm

def print_results(name, report, cm):
    """Print results"""
    print(f"\n{'='*60}")
    print(f"Strategy: {name}")
    print(f"{'='*60}")
    print(f"Macro F1: {report['macro avg']['f1-score']:.3f}")
    print(f"Offensive F1: {report['Offensive']['f1-score']:.3f}")
    print(f"Non-Offensive F1: {report['Non-Offensive']['f1-score']:.3f}")
    print(f"\nConfusion Matrix:")
    print(f"                 Predicted")
    print(f"               Non-OFF  OFF")
    print(f"Actual Non-OFF  {cm[0][0]:>6}  {cm[0][1]:>6}")
    print(f"       OFF      {cm[1][0]:>6}  {cm[1][1]:>6}")


### VANILLA ZERO-SHOT

In [None]:
print("\n" + "="*60)
print("EXPERIMENT 1: VANILLA ZERO-SHOT")
print("="*60)

predictions_vanilla = []

for idx, row in test_df.iterrows():
    prompt = f"""Classify the following text as 'offensive' or 'non-offensive':

Text: {row['text']}

Classification:"""
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_vanilla.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_vanilla, cm_vanilla = evaluate_prompts(test_df, predictions_vanilla)
print_results("Vanilla Zero-Shot", report_vanilla, cm_vanilla)



EXPERIMENT 1: VANILLA ZERO-SHOT
Processed 0/860
Processed 100/860


### DEFINITION-AUGMENTED

In [None]:
print("\n" + "="*60)
print("EXPERIMENT 2: DEFINITION-AUGMENTED")
print("="*60)

predictions_def = []

for idx, row in test_df.iterrows():
    prompt = f"""Definitions:
- Offensive: Contains insults, threats, profanity, or targets individuals/groups based on identity.
- Non-offensive: Does not contain offensive language or harmful content.

Classify the following text as 'offensive' or 'non-offensive':

Text: {row['text']}

Classification:"""
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_def.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_def, cm_def = evaluate_prompts(test_df, predictions_def)
print_results("Definition-Augmented", report_def, cm_def)

### ALTERNATIVE DEFINITION

In [None]:
print("\n" + "="*60)
print("EXPERIMENT 3: ALTERNATIVE DEFINITION")
print("="*60)

predictions_alt_def = []

for idx, row in test_df.iterrows():
    prompt = f"""Definitions:
- Offensive: Text that could cause harm, distress, or is intended to demean, dehumanize, or attack individuals or groups.
- Non-offensive: Text that is respectful, neutral, or positive in nature.

Classify the following text as 'offensive' or 'non-offensive':

Text: {row['text']}

Classification:"""
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_alt_def.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_alt_def, cm_alt_def = evaluate_prompts(test_df, predictions_alt_def)
print_results("Alternative Definition (Harm-based)", report_alt_def, cm_alt_def)

### ALTERNATIVE LABELS

In [None]:
print("\n" + "="*60)
print("EXPERIMENT 4: ALTERNATIVE LABELS (toxic/non-toxic)")
print("="*60)

predictions_toxic = []

for idx, row in test_df.iterrows():
    prompt = f"""Definitions:
- Toxic: Contains harmful, abusive, or hateful language.
- Non-toxic: Does not contain harmful or abusive language.

Classify the following text as 'toxic' or 'non-toxic':

Text: {row['text']}

Classification:"""
    
    response = get_prediction(prompt)
    pred = parse_response(response, use_toxic_labels=True)
    predictions_toxic.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_toxic, cm_toxic = evaluate_prompts(test_df, predictions_toxic)
print_results("Alternative Labels (Toxic)", report_toxic, cm_toxic)


### FEW-SHOT RANDOM

In [None]:
# 5a: 2-shot
print("\n" + "="*60)
print("EXPERIMENT 5a: FEW-SHOT RANDOM (2 examples)")
print("="*60)

examples_2 = get_random_examples(2)
predictions_2shot = []

for idx, row in test_df.iterrows():
    prompt = "Classify texts as 'offensive' or 'non-offensive'.\n\nExamples:\n\n"
    
    for ex_text, ex_label in examples_2:
        label_str = "offensive" if ex_label == 1 else "non-offensive"
        prompt += f"Text: {ex_text}\nClassification: {label_str}\n\n"
    
    prompt += f"Text: {row['text']}\nClassification:"
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_2shot.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_2shot, cm_2shot = evaluate_prompts(test_df, predictions_2shot)
print_results("Few-Shot Random (2 examples)", report_2shot, cm_2shot)

# 5b: 4-shot
print("\n" + "="*60)
print("EXPERIMENT 5b: FEW-SHOT RANDOM (4 examples)")
print("="*60)

examples_4 = get_random_examples(4)
predictions_4shot = []

for idx, row in test_df.iterrows():
    prompt = "Classify texts as 'offensive' or 'non-offensive'.\n\nExamples:\n\n"
    
    for ex_text, ex_label in examples_4:
        label_str = "offensive" if ex_label == 1 else "non-offensive"
        prompt += f"Text: {ex_text}\nClassification: {label_str}\n\n"
    
    prompt += f"Text: {row['text']}\nClassification:"
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_4shot.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_4shot, cm_4shot = evaluate_prompts(test_df, predictions_4shot)
print_results("Few-Shot Random (4 examples)", report_4shot, cm_4shot)

# 5c: 6-shot
print("\n" + "="*60)
print("EXPERIMENT 5c: FEW-SHOT RANDOM (6 examples)")
print("="*60)

examples_6 = get_random_examples(6)
predictions_6shot = []

for idx, row in test_df.iterrows():
    prompt = "Classify texts as 'offensive' or 'non-offensive'.\n\nExamples:\n\n"
    
    for ex_text, ex_label in examples_6:
        label_str = "offensive" if ex_label == 1 else "non-offensive"
        prompt += f"Text: {ex_text}\nClassification: {label_str}\n\n"
    
    prompt += f"Text: {row['text']}\nClassification:"
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_6shot.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_6shot, cm_6shot = evaluate_prompts(test_df, predictions_6shot)
print_results("Few-Shot Random (6 examples)", report_6shot, cm_6shot)


### FEW-SHOT RELEVANT

In [None]:
print("\n" + "="*60)
print("EXPERIMENT 6: FEW-SHOT RELEVANT EXAMPLES")
print("="*60)

# Manually selected clear examples
relevant_examples = [
    ("You are a worthless piece of trash", 1),
    ("This is a wonderful day!", 0),
    ("Go kill yourself idiot", 1),
    ("I disagree with your opinion but respect it", 0)
]

predictions_relevant = []

for idx, row in test_df.iterrows():
    prompt = """Classify texts as 'offensive' or 'non-offensive'.

Offensive texts contain insults, threats, or derogatory language.
Non-offensive texts are neutral or positive.

Examples:

"""
    
    for ex_text, ex_label in relevant_examples:
        label_str = "offensive" if ex_label == 1 else "non-offensive"
        prompt += f"Text: {ex_text}\nClassification: {label_str}\n\n"
    
    prompt += f"Text: {row['text']}\nClassification:"
    
    response = get_prediction(prompt)
    pred = parse_response(response)
    predictions_relevant.append(pred)
    
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(test_df)}")

report_relevant, cm_relevant = evaluate_prompts(test_df, predictions_relevant)
print_results("Few-Shot Relevant Examples", report_relevant, cm_relevant)


### SUMMARY TABLE

In [None]:
print("\n" + "="*80)
print("FINAL RESULTS SUMMARY")
print("="*80)
print(f"{'Strategy':<40} {'Macro F1':<12} {'OFF F1':<12} {'NON-OFF F1':<12}")
print("-"*80)

all_results = [
    ("Vanilla Zero-Shot", report_vanilla),
    ("Definition-Augmented", report_def),
    ("Alternative Definition", report_alt_def),
    ("Alternative Labels (Toxic)", report_toxic),
    ("Few-Shot Random (2 examples)", report_2shot),
    ("Few-Shot Random (4 examples)", report_4shot),
    ("Few-Shot Random (6 examples)", report_6shot),
    ("Few-Shot Relevant", report_relevant)
]

for name, report in all_results:
    macro_f1 = report['macro avg']['f1-score']
    off_f1 = report['Offensive']['f1-score']
    non_off_f1 = report['Non-Offensive']['f1-score']
    print(f"{name:<40} {macro_f1:<12.3f} {off_f1:<12.3f} {non_off_f1:<12.3f}")


### SAVE RESULTS

In [None]:
results_dict = {
    'vanilla': {'report': report_vanilla, 'confusion_matrix': cm_vanilla.tolist()},
    'definition': {'report': report_def, 'confusion_matrix': cm_def.tolist()},
    'alt_definition': {'report': report_alt_def, 'confusion_matrix': cm_alt_def.tolist()},
    'toxic_labels': {'report': report_toxic, 'confusion_matrix': cm_toxic.tolist()},
    'few_shot_2': {'report': report_2shot, 'confusion_matrix': cm_2shot.tolist()},
    'few_shot_4': {'report': report_4shot, 'confusion_matrix': cm_4shot.tolist()},
    'few_shot_6': {'report': report_6shot, 'confusion_matrix': cm_6shot.tolist()},
    'few_shot_relevant': {'report': report_relevant, 'confusion_matrix': cm_relevant.tolist()}
}

# Save to JSON
with open('results_summary.json', 'w') as f:
    json.dump(results_dict, f, indent=2)

print("\n\nResults saved to results_summary.json")
print("Experiment completed successfully!")

### DETAILED ANALYSIS OF TWO STRATEGIES


In [None]:
print("\n" + "="*80)
print("DETAILED ANALYSIS OF SELECTED STRATEGIES")
print("="*80)

# Strategy 1: Few-Shot Random (4 examples)
print("\nStrategy 1: Few-Shot Random (4 examples)")
print("-" * 40)
print(f"Precision (Offensive): {report_4shot['Offensive']['precision']:.3f}")
print(f"Recall (Offensive): {report_4shot['Offensive']['recall']:.3f}")
print(f"F1-Score (Offensive): {report_4shot['Offensive']['f1-score']:.3f}")
print(f"\nPrecision (Non-Offensive): {report_4shot['Non-Offensive']['precision']:.3f}")
print(f"Recall (Non-Offensive): {report_4shot['Non-Offensive']['recall']:.3f}")
print(f"F1-Score (Non-Offensive): {report_4shot['Non-Offensive']['f1-score']:.3f}")

# Strategy 2: Few-Shot Relevant
print("\nStrategy 2: Few-Shot Relevant Examples")
print("-" * 40)
print(f"Precision (Offensive): {report_relevant['Offensive']['precision']:.3f}")
print(f"Recall (Offensive): {report_relevant['Offensive']['recall']:.3f}")
print(f"F1-Score (Offensive): {report_relevant['Offensive']['f1-score']:.3f}")
print(f"\nPrecision (Non-Offensive): {report_relevant['Non-Offensive']['precision']:.3f}")
print(f"Recall (Non-Offensive): {report_relevant['Non-Offensive']['recall']:.3f}")
print(f"F1-Score (Non-Offensive): {report_relevant['Non-Offensive']['f1-score']:.3f}")

print("\n" + "="*80)
print("ALL EXPERIMENTS COMPLETED!")
print("="*80)