In [None]:
# GPT2 SST5

import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, pipeline
import torch
from sentence_transformers import SentenceTransformer

class RDESelector:
    def __init__(self, demo_pool, num_classes, q_table=None):
        self.demo_pool = demo_pool
        self.num_classes = num_classes
        self.q_table = q_table if q_table else {}
        self.alpha = 0.1
        self.gamma = 0.9
        self.epsilon = 0.2

        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.demo_embeddings = self.embedding_model.encode(
            [d['text'] for d in demo_pool],
            convert_to_numpy=True
        )

    def diversity_score(self, selected_indices):
        answer_counts = np.zeros(self.num_classes)
        for idx in selected_indices:
            answer_counts[self.demo_pool[idx]['label']] += 1
        entropy = -np.sum((answer_counts/np.sum(answer_counts)) *
                        np.log(answer_counts/np.sum(answer_counts) + 1e-9))
        return entropy

    def get_state_key(self, current_state):
        return tuple(sorted(current_state['selected']))

    def select_demos(self, input_sample, k=5):
        selected = []
        state = {'input': input_sample, 'selected': []}

        input_embedding = self.embedding_model.encode(
            input_sample['text'],
            convert_to_numpy=True
        )

        for _ in range(k):
            valid_demos = [i for i in range(len(self.demo_pool))
                         if i not in state['selected']]

            if np.random.random() < self.epsilon:
                action = int(np.random.choice(valid_demos))
            else:
                q_values = [self.q_table.get((self.get_state_key(state), a), 0)
                          for a in valid_demos]
                action = int(valid_demos[np.argmax(q_values)])

            selected.append(action)
            state['selected'].append(action)

            next_state = state.copy()
            next_state['selected'] = selected.copy()
            reward = self.calculate_reward(input_embedding, selected)

            current_state_key = self.get_state_key(state)
            next_max = max([self.q_table.get((self.get_state_key(next_state), a), 0)
                          for a in valid_demos if a != action], default=0)

            self.q_table[(current_state_key, action)] = (
                (1 - self.alpha) * self.q_table.get((current_state_key, action), 0) +
                self.alpha * (reward + self.gamma * next_max)
            )

        return [self.demo_pool[i] for i in selected]

    def calculate_reward(self, input_embedding, selected_indices):
        demo_embeddings = self.demo_embeddings[selected_indices]
        similarities = np.dot(demo_embeddings, input_embedding)
        relevance = np.mean(similarities)

        diversity = self.diversity_score(selected_indices)
        max_entropy = np.log(self.num_classes)
        normalized_diversity = diversity / max_entropy

        return 0.5 * normalized_diversity + 0.5 * relevance

sst5 = load_dataset("SetFit/sst5")
train_demos = sst5["train"]#.select(range(1000))
test_samples = sst5["validation"]#.select(range(100))

model_id = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

pipe = pipeline(
    "text-generation",
    model=model_id,
    device_map="auto",
    model_kwargs={"low_cpu_mem_usage": True}
)

rde_selector = RDESelector(train_demos, num_classes=5)

label_map = [
    "negative",
    "somewhat negative",
    "neutral",
    "somewhat positive",
    "positive"
]

def format_prompt(demos, test_sample):
    prompt = "Analyze sentiment of these movie reviews. Choose from: negative, somewhat negative, neutral, somewhat positive, positive.\n\n"
    for demo in demos:
        sentiment = label_map[demo['label']]
        prompt += f"Review: {demo['text']}\nSentiment: {sentiment}\n\n"

    prompt += f"Review: {test_sample['text']}\nSentiment:"
    return prompt

correct = 0
for idx, sample in enumerate(test_samples):
    selected_demos = rde_selector.select_demos(sample, k=5)
    prompt = format_prompt(selected_demos, sample)

    outputs = pipe(
        prompt,
        max_new_tokens=15,
        return_full_text=False
    )

    generated = outputs[0]['generated_text'].lower()
    predicted = None

    for label in label_map:
        if label in generated:
            predicted = label_map.index(label)
            break

    actual = sample['label']

    if predicted is None:
        predicted = 2

    if predicted == actual:
        correct += 1

    color_code = "\033[92m" if actual == predicted else "\033[91m"
    print(f"Sample {idx+1}:")
    print(f"  Predicted: {label_map[predicted]} | Actual: {label_map[actual]}")
    print(f"{color_code}  Result: {'CORRECT' if actual == predicted else 'INCORRECT'}\033[0m")
    print("-" * 80)

print(f"\nFinal Accuracy: {correct/len(test_samples):.2%}")


In [None]:
# Gemma SST5

import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, pipeline
import torch
from sentence_transformers import SentenceTransformer

class RDESelector:
    def __init__(self, demo_pool, num_classes, q_table=None):
        self.demo_pool = demo_pool
        self.num_classes = num_classes
        self.q_table = q_table if q_table else {}
        self.alpha = 0.1
        self.gamma = 0.9
        self.epsilon = 0.2

        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.demo_embeddings = self.embedding_model.encode(
            [d['text'] for d in demo_pool],
            convert_to_numpy=True
        )

    def diversity_score(self, selected_indices):
        answer_counts = np.zeros(self.num_classes)
        for idx in selected_indices:
            answer_counts[self.demo_pool[idx]['label']] += 1
        entropy = -np.sum((answer_counts/np.sum(answer_counts)) *
                        np.log(answer_counts/np.sum(answer_counts) + 1e-9))
        return entropy

    def get_state_key(self, current_state):
        return tuple(sorted(current_state['selected']))

    def select_demos(self, input_sample, k=5):
        selected = []
        state = {'input': input_sample, 'selected': []}

        input_embedding = self.embedding_model.encode(
            input_sample['text'],
            convert_to_numpy=True
        )

        for _ in range(k):
            valid_demos = [i for i in range(len(self.demo_pool))
                         if i not in state['selected']]

            if np.random.random() < self.epsilon:
                action = int(np.random.choice(valid_demos))
            else:
                q_values = [self.q_table.get((self.get_state_key(state), a), 0)
                          for a in valid_demos]
                action = int(valid_demos[np.argmax(q_values)])

            selected.append(action)
            state['selected'].append(action)

            next_state = state.copy()
            next_state['selected'] = selected.copy()
            reward = self.calculate_reward(input_embedding, selected)

            current_state_key = self.get_state_key(state)
            next_max = max([self.q_table.get((self.get_state_key(next_state), a), 0)
                          for a in valid_demos if a != action], default=0)

            self.q_table[(current_state_key, action)] = (
                (1 - self.alpha) * self.q_table.get((current_state_key, action), 0) +
                self.alpha * (reward + self.gamma * next_max)
            )

        return [self.demo_pool[i] for i in selected]

    def calculate_reward(self, input_embedding, selected_indices):
        demo_embeddings = self.demo_embeddings[selected_indices]
        similarities = np.dot(demo_embeddings, input_embedding)
        relevance = np.mean(similarities)

        diversity = self.diversity_score(selected_indices)
        max_entropy = np.log(self.num_classes)
        normalized_diversity = diversity / max_entropy

        return 0.5 * normalized_diversity + 0.5 * relevance

# Load SetFit/sst5 dataset [4][5]
sst5 = load_dataset("SetFit/sst5")
train_demos = sst5["train"]#.select(range(1000))
test_samples = sst5["validation"]#.select(range(100))


model_id = "google/gemma-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    model_kwargs={"low_cpu_mem_usage": True}
)
pipe.tokenizer.pad_token_id = pipe.model.config.eos_token_id

rde_selector = RDESelector(train_demos, num_classes=5)

label_map = [
    "negative",
    "somewhat negative",
    "neutral",
    "somewhat positive",
    "positive"
]

def format_prompt(demos, test_sample):
    prompt = "Analyze sentiment of these movie reviews. Choose from: negative, somewhat negative, neutral, somewhat positive, positive.\n\n"
    for demo in demos:
        sentiment = label_map[demo['label']]
        prompt += f"Review: {demo['text']}\nSentiment: {sentiment}\n\n"

    prompt += f"Review: {test_sample['text']}\nSentiment:"
    return prompt

correct = 0
for idx, sample in enumerate(test_samples):
    selected_demos = rde_selector.select_demos(sample, k=5)
    prompt = format_prompt(selected_demos, sample)

    outputs = pipe(
        prompt,
        max_new_tokens=15,
        return_full_text=False
    )

    generated = outputs[0]['generated_text'].lower()
    predicted = None
    for label in label_map:
        if label in generated:
            predicted = label_map.index(label)
            break

    actual = sample['label']

    if predicted is None:
        predicted = 2

    if predicted == actual:
        correct += 1

    color_code = "\033[92m" if actual == predicted else "\033[91m"
    print(f"Sample {idx+1}:")
    print(f"  Predicted: {label_map[predicted]} | Actual: {label_map[actual]}")
    print(f"{color_code}  Result: {'CORRECT' if actual == predicted else 'INCORRECT'}\033[0m")
    print("-" * 80)

print(f"\nFinal Accuracy: {correct/len(test_samples):.2%}")


In [None]:
# Llama SST 5

import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, pipeline
import torch
from sentence_transformers import SentenceTransformer

class RDESelector:
    def __init__(self, demo_pool, num_classes, q_table=None):
        self.demo_pool = demo_pool
        self.num_classes = num_classes
        self.q_table = q_table if q_table else {}
        self.alpha = 0.1
        self.gamma = 0.9
        self.epsilon = 0.2

        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.demo_embeddings = self.embedding_model.encode(
            [d['text'] for d in demo_pool],
            convert_to_numpy=True
        )

    def diversity_score(self, selected_indices):
        answer_counts = np.zeros(self.num_classes)
        for idx in selected_indices:
            answer_counts[self.demo_pool[idx]['label']] += 1
        entropy = -np.sum((answer_counts/np.sum(answer_counts)) *
                        np.log(answer_counts/np.sum(answer_counts) + 1e-9))
        return entropy

    def get_state_key(self, current_state):
        return tuple(sorted(current_state['selected']))

    def select_demos(self, input_sample, k=5):
        selected = []
        state = {'input': input_sample, 'selected': []}

        input_embedding = self.embedding_model.encode(
            input_sample['text'],
            convert_to_numpy=True
        )

        for _ in range(k):
            valid_demos = [i for i in range(len(self.demo_pool))
                         if i not in state['selected']]

            if np.random.random() < self.epsilon:
                action = int(np.random.choice(valid_demos))
            else:
                q_values = [self.q_table.get((self.get_state_key(state), a), 0)
                          for a in valid_demos]
                action = int(valid_demos[np.argmax(q_values)])

            selected.append(action)
            state['selected'].append(action)

            next_state = state.copy()
            next_state['selected'] = selected.copy()
            reward = self.calculate_reward(input_embedding, selected)

            current_state_key = self.get_state_key(state)
            next_max = max([self.q_table.get((self.get_state_key(next_state), a), 0)
                          for a in valid_demos if a != action], default=0)

            self.q_table[(current_state_key, action)] = (
                (1 - self.alpha) * self.q_table.get((current_state_key, action), 0) +
                self.alpha * (reward + self.gamma * next_max)
            )

        return [self.demo_pool[i] for i in selected]

    def calculate_reward(self, input_embedding, selected_indices):
        demo_embeddings = self.demo_embeddings[selected_indices]
        similarities = np.dot(demo_embeddings, input_embedding)
        relevance = np.mean(similarities)

        diversity = self.diversity_score(selected_indices)
        max_entropy = np.log(self.num_classes)
        normalized_diversity = diversity / max_entropy

        return 0.5 * normalized_diversity + 0.5 * relevance

sst5 = load_dataset("SetFit/sst5")
train_demos = sst5["train"]#.select(range(1000))
test_samples = sst5["validation"]#.select(range(100))

model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    model_kwargs={"low_cpu_mem_usage": True}
)

rde_selector = RDESelector(train_demos, num_classes=5)

label_map = [
    "very negative",
    "negative",
    "neutral",
    "positive",
    "very positive"
]

def format_prompt(demos, test_sample):
    messages = [
        {"role": "system", "content": "Perform sentiment analysis on movie reviews. Choose from: very negative, negative, neutral, positive, very positive."}
    ]

    for demo in demos:
        messages.append({
            "role": "user",
            "content": f"Review: {demo['text']}"
        })
        messages.append({
            "role": "assistant",
            "content": label_map[demo['label']]
        })

    messages.append({
        "role": "user",
        "content": f"Review: {test_sample['text']}"
    })

    return tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )

correct = 0
for idx, sample in enumerate(test_samples):
    selected_demos = rde_selector.select_demos(sample, k=5)
    prompt = format_prompt(selected_demos, sample)

    outputs = pipe(
        prompt,
        max_new_tokens=15,
        temperature=0.7,
        top_p=0.9,
        do_sample=True
    )

    generated = outputs[0]['generated_text'][len(prompt):].lower().strip()
    predicted = None

    for i, label in enumerate(label_map):
        if any(term in generated for term in [label.lower(), label.split()[0]]):
            predicted = i
            break

    actual = sample['label']

    if predicted is None:
        predicted = 2

    if predicted == actual:
        correct += 1

    color_code = "\033[92m" if actual == predicted else "\033[91m"
    print(f"Sample {idx+1}:")
    print(f"  Predicted: {label_map[predicted]} | Actual: {label_map[actual]}")
    print(f"{color_code}  Result: {'CORRECT' if actual == predicted else 'INCORRECT'}\033[0m")
    print("-" * 80)

print(f"\nFinal Accuracy: {correct/len(test_samples):.2%}")
