In [None]:
# Gemma AG News

# final accuracy method
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, pipeline
import torch
from sentence_transformers import SentenceTransformer

class RDESelector:
    def __init__(self, demo_pool, num_classes, q_table=None):
        self.demo_pool = demo_pool
        self.num_classes = num_classes
        self.q_table = q_table if q_table else {}
        self.alpha = 0.1  # Learning rate
        self.gamma = 0.9  # Discount factor
        self.epsilon = 0.2  # Exploration rate

        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.demo_embeddings = self.embedding_model.encode(
            [d['text'] for d in demo_pool],
            convert_to_numpy=True
        )

    def diversity_score(self, selected_indices):
        label_counts = np.zeros(self.num_classes)
        for idx in selected_indices:
            label_counts[self.demo_pool[idx]['label']] += 1
        entropy = -np.sum((label_counts/np.sum(label_counts)) *
                        np.log(label_counts/np.sum(label_counts) + 1e-9))
        return entropy

    def get_state_key(self, current_state):
        return tuple(sorted(current_state['selected']))

    def select_demos(self, input_sample, k=5):
        selected = []
        state = {'input': input_sample, 'selected': []}

        input_embedding = self.embedding_model.encode(
            input_sample['text'],
            convert_to_numpy=True
        )

        for _ in range(k):
            valid_demos = [int(i) for i in range(len(self.demo_pool))
                         if i not in state['selected']]

            if np.random.random() < self.epsilon:
                action = int(np.random.choice(valid_demos))
            else:
                q_values = [self.q_table.get((self.get_state_key(state), a), 0)
                          for a in valid_demos]
                action = int(valid_demos[np.argmax(q_values)])

            selected.append(action)
            state['selected'].append(action)

            next_state = state.copy()
            next_state['selected'] = selected.copy()
            reward = self.calculate_reward(input_embedding, selected)

            current_state_key = self.get_state_key(state)
            next_max = max([self.q_table.get((self.get_state_key(next_state), a), 0)
                          for a in valid_demos if a != action], default=0)

            self.q_table[(current_state_key, action)] = (
                (1 - self.alpha) * self.q_table.get((current_state_key, action), 0) +
                self.alpha * (reward + self.gamma * next_max)
            )

        return [self.demo_pool[i] for i in selected]

    def calculate_reward(self, input_embedding, selected_indices):
        demo_embeddings = self.demo_embeddings[selected_indices]
        similarities = np.dot(demo_embeddings, input_embedding)
        relevance = np.mean(similarities)
        diversity = self.diversity_score(selected_indices)
        max_entropy = np.log(self.num_classes)
        normalized_diversity = diversity / max_entropy
        return 0.5 * normalized_diversity + 0.5 * relevance


ag_news = load_dataset("ag_news")
train_demos = ag_news["train"]#.select(range(2500))
test_samples = ag_news["test"]#.select(range(700))

model_id = "google/gemma-2b-it"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
    "text-generation",
    model=model_id,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    pad_token_id=tokenizer.eos_token_id
)

rde_selector = RDESelector(train_demos, num_classes=4)
import re
def format_prompt(demos, test_sample):
      prompt = """Classify news articles into these categories:
                0: World
                1: Sports
                2: Business
                3: Sci/Tech

                Examples:"""

      for demo in demos:
          prompt += f"\nArticle: {demo['text'][:200]}\nLabel: {demo['label']}"

      prompt += f"\n\nNew Article: {test_sample['text'][:200]}\nLabel:"
      return prompt



correct = 0
for idx, sample in enumerate(test_samples):
    selected_demos = rde_selector.select_demos(sample, k=5)
    prompt = format_prompt(selected_demos, sample)

    outputs = pipe(
        prompt,
        max_new_tokens=2,
        do_sample=False,
        temperature=0.0,
        top_k=1,
        num_return_sequences=1
    )

    generated_text = outputs[0]['generated_text']
    match = re.search(r'Label:\s*(\d+)', generated_text)
    predicted_label = match.group(1) if match else None

    actual_label = str(sample['label'])

    color_code = "\033[92m" if str(actual_label) == predicted_label else "\033[91m"
    print(f"Sample {idx+1}:")
    print(f"  Predicted: {predicted_label} | Actual: {actual_label}")
    print(f"  Text: {sample['text'][:100]}...")
    print(f"{color_code}  Result: {'CORRECT' if str(actual_label) == predicted_label else 'INCORRECT'}\033[0m")
    print("-" * 80)

    if predicted_label == actual_label:
        correct += 1

print(f"\nFinal Accuracy: {correct/len(test_samples):.2%}")

In [None]:
# GPT2 AGNews

import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, GPT2LMHeadModel, pipeline
import torch
from sentence_transformers import SentenceTransformer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

class RDESelector:
    def __init__(self, demo_pool, num_classes, q_table=None):
        self.demo_pool = demo_pool
        self.num_classes = num_classes
        self.q_table = q_table if q_table else {}
        self.alpha = 0.1
        self.gamma = 0.9
        self.epsilon = 0.2

        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.demo_embeddings = self.embedding_model.encode(
            [d['text'] for d in demo_pool],
            convert_to_numpy=True
        )

    def diversity_score(self, selected_indices):
        label_counts = np.zeros(self.num_classes)
        for idx in selected_indices:
            label_counts[self.demo_pool[idx]['label']] += 1
        entropy = -np.sum((label_counts/np.sum(label_counts)) *
                        np.log(label_counts/np.sum(label_counts) + 1e-9))
        return entropy

    def get_state_key(self, current_state):
        return tuple(sorted(current_state['selected']))

    def select_demos(self, input_sample, k=5):
        selected = []
        state = {'input': input_sample, 'selected': []}
        input_embedding = self.embedding_model.encode(
            input_sample['text'],
            convert_to_numpy=True
        )

        for _ in range(k):
            valid_demos = [int(i) for i in range(len(self.demo_pool))
                         if i not in state['selected']]

            if np.random.random() < self.epsilon:
                action = int(np.random.choice(valid_demos))
            else:
                q_values = [self.q_table.get((self.get_state_key(state), a), 0)
                          for a in valid_demos]
                action = int(valid_demos[np.argmax(q_values)])

            selected.append(action)
            state['selected'].append(action)
            next_state = state.copy()
            next_state['selected'] = selected.copy()
            reward = self.calculate_reward(input_embedding, selected)

            current_state_key = self.get_state_key(state)
            next_max = max([self.q_table.get((self.get_state_key(next_state), a), 0)
                          for a in valid_demos if a != action], default=0)

            self.q_table[(current_state_key, action)] = (
                (1 - self.alpha) * self.q_table.get((current_state_key, action), 0) +
                self.alpha * (reward + self.gamma * next_max)
            )

        return [self.demo_pool[i] for i in selected]

    def calculate_reward(self, input_embedding, selected_indices):
        demo_embeddings = self.demo_embeddings[selected_indices]
        similarities = np.dot(demo_embeddings, input_embedding)
        relevance = np.mean(similarities)
        diversity = self.diversity_score(selected_indices)
        max_entropy = np.log(self.num_classes)
        normalized_diversity = diversity / max_entropy
        return 0.5 * normalized_diversity + 0.5 * relevance

ag_news = load_dataset("ag_news")
train_demos = ag_news["train"]
test_samples = ag_news["test"]

model_id = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
model.eval()

rde_selector = RDESelector(train_demos, num_classes=4)

def format_prompt(demos, test_sample):
    prompt = "Classify news articles into one of these categories:\n"
    prompt += "0: World News\n1: Sports\n2: Business\n3: Science/Tech\n\nExamples:\n"

    for demo in demos:
        prompt += f"Article: {demo['text']}\nCategory: {demo['label']}\n\n"

    prompt += f"New Article: {test_sample['text']}\nCategory:"
    return prompt

correct = 0
total = 0
invalid_predictions = 0

for idx, sample in enumerate(test_samples):
    selected_demos = rde_selector.select_demos(sample, k=5)
    prompt = format_prompt(selected_demos, sample)

    inputs = tokenizer(prompt, return_tensors="pt", max_length=1024, truncation=True).to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=3,
            pad_token_id=tokenizer.eos_token_id,
            num_return_sequences=1,
            temperature=0.7,
            top_p=0.9,
            early_stopping=True
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    prediction_part = decoded.split("Category:")[-1].strip()
    prediction = ''.join(filter(str.isdigit, prediction_part))

    try:
        predicted_label = int(prediction[0]) if prediction else -1
    except:
        predicted_label = -1
        invalid_predictions += 1

    actual_label = sample['label']

    if predicted_label == actual_label:
        correct += 1
    total += 1

    color_code = "\033[92m" if str(actual_label) == str(predicted_label) else "\033[91m"
    print(f"Sample {idx+1}:")
    print(f"  Predicted: {predicted_label} | Actual: {actual_label}")
    # print(f"  Text: {sample['text'][:100]}...")
    print(f"{color_code}  Result: {'CORRECT' if str(actual_label) == str(predicted_label) else 'INCORRECT'}\033[0m")
    print("-" * 80)

    if (idx + 1) % 100 == 0:
        print(f"Processed {idx+1} samples | Current Accuracy: {correct/total:.2%}")

print(f"\nFinal Results:")
print(f"Correct predictions: {correct}/{len(test_samples)}")
print(f"Invalid predictions: {invalid_predictions}")
print(f"Final Accuracy: {correct/len(test_samples):.2%}")


In [None]:
# llama AG News

# final accuracy method
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, pipeline
import torch
from sentence_transformers import SentenceTransformer

class RDESelector:
    def __init__(self, demo_pool, num_classes, q_table=None):
        self.demo_pool = demo_pool
        self.num_classes = num_classes
        self.q_table = q_table if q_table else {}
        self.alpha = 0.1
        self.gamma = 0.9
        self.epsilon = 0.2
        self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
        self.demo_embeddings = self.embedding_model.encode(
            [d['text'] for d in demo_pool],
            convert_to_numpy=True
        )

    def diversity_score(self, selected_indices):
        label_counts = np.zeros(self.num_classes)
        for idx in selected_indices:
            label_counts[self.demo_pool[idx]['label']] += 1
        entropy = -np.sum((label_counts/np.sum(label_counts)) *
                        np.log(label_counts/np.sum(label_counts) + 1e-9))
        return entropy

    def get_state_key(self, current_state):
        return tuple(sorted(current_state['selected']))

    def select_demos(self, input_sample, k=5):
        selected = []
        state = {'input': input_sample, 'selected': []}
        input_embedding = self.embedding_model.encode(
            input_sample['text'],
            convert_to_numpy=True
        )

        for _ in range(k):
            valid_demos = [int(i) for i in range(len(self.demo_pool))
                         if i not in state['selected']]

            if np.random.random() < self.epsilon:
                action = int(np.random.choice(valid_demos))
            else:
                q_values = [self.q_table.get((self.get_state_key(state), a), 0)
                          for a in valid_demos]
                action = int(valid_demos[np.argmax(q_values)])

            selected.append(action)
            state['selected'].append(action)
            next_state = state.copy()
            next_state['selected'] = selected.copy()
            reward = self.calculate_reward(input_embedding, selected)

            current_state_key = self.get_state_key(state)
            next_max = max([self.q_table.get((self.get_state_key(next_state), a), 0)
                          for a in valid_demos if a != action], default=0)

            self.q_table[(current_state_key, action)] = (
                (1 - self.alpha) * self.q_table.get((current_state_key, action), 0) +
                self.alpha * (reward + self.gamma * next_max)
            )

        return [self.demo_pool[i] for i in selected]

    def calculate_reward(self, input_embedding, selected_indices):
        demo_embeddings = self.demo_embeddings[selected_indices]
        similarities = np.dot(demo_embeddings, input_embedding)
        relevance = np.mean(similarities)
        diversity = self.diversity_score(selected_indices)
        max_entropy = np.log(self.num_classes)
        normalized_diversity = diversity / max_entropy
        return 0.5 * normalized_diversity + 0.5 * relevance


ag_news = load_dataset("ag_news")
train_demos = ag_news["train"]#.shuffle().select(range(100)).with_format("python")
test_samples = ag_news["test"]#.shuffle().select(range(100)).with_format("python")

model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    pad_token_id=tokenizer.eos_token_id
)

rde_selector = RDESelector(train_demos, num_classes=4)

def format_prompt(demos, test_sample):
    prompt = "Classify news articles into categories: World (0), Sports (1), Business (2), Sci/Tech (3)\n\n"
    for demo in demos:
        prompt += f"Article: {demo['text']}\nLabel: {demo['label']}\n\n"
    prompt += f"Article: {test_sample['text']}\nLabel:"
    return prompt


correct = 0
for idx, sample in enumerate(test_samples):
    selected_demos = rde_selector.select_demos(sample, k=5)
    prompt = format_prompt(selected_demos, sample)

    outputs = pipe(
        prompt,
        max_new_tokens=2,
        return_full_text=False
    )

    predicted_label = outputs[0]['generated_text'].strip()
    actual_label = sample['label']

    # color_code = "\033[92m" if str(actual_label) == predicted_label else "\033[91m"
    # print(f"Sample {idx+1}:")
    # print(f"  Predicted: {predicted_label} | Actual: {actual_label}")
    # print(f"  Text: {sample['text'][:100]}...")
    # print(f"{color_code}  Result: {'CORRECT' if str(actual_label) == predicted_label else 'INCORRECT'}\033[0m")
    # print("-" * 80)

    if str(actual_label) == predicted_label:
        correct += 1

print(f"\nFinal Accuracy: {correct/len(test_samples):.2%}")


