### Dependecies

In [None]:
# !pip install transformers
# !pip install datasets

In [None]:
import os
import re
import random
from tqdm import tqdm

from together import Together
from datasets import load_dataset

### Prompting 

#### Dataset

In [None]:
imdb = load_dataset("imdb")

In [None]:
train_dataset = []
test_dataset = []
for i in range(1, 31):
    train_dataset.append(imdb["train"][i])
    train_dataset.append(imdb["train"][-i])
    test_dataset.append(imdb["test"][i])
    test_dataset.append(imdb["test"][-i])

random.seed(42)
random.shuffle(train_dataset)
random.shuffle(test_dataset)

print(sum(sample["label"] for sample in train_dataset), "положительных примеров в train датасете")
print(sum(sample["label"] for sample in test_dataset), "положительных примеров в test датасете")

true_labels = [sample["label"] for sample in test_dataset]
pred_labels = []

In [None]:
API_KEY = os.getenv("TOGETHER_API_KEY")
client = Together(api_key=API_KEY)
model_8b = "databricks/dbrx-instruct"

#### Zero-Shot 

In [None]:
import time
true_labels = [sample["label"] for sample in test_dataset]
accuracy = []

for sample in tqdm(test_dataset):
    messages = [{"role": "system", "content": "You are a helpful assistant that classifies movies reviews. Please, read the review and answer with symbol - 1, if user liked the movie, or with symbol 0, if he did not. Do not answer in text, answer in one of the following two options: 1, 0"}]
    messages.append({"role": "user", "content": f"Do not answer in text and classify this review in one word with one of following options - 0 or 1: {sample['text'].replace('<br />', '')}"})
    response = client.chat.completions.create(messages=messages, model=model_8b, timeout=10, max_tokens=20, temperature=0.4)
    if '1' in response.choices[0].message.content.lower():
        pred_label = 1
    elif '0' in response.choices[0].message.content.lower():
        pred_label = 0
    else:
        raise Exception(f'Нет решения по отзыву:\n\t{sample["text"]}\nМодель сказала:\n\t{response.choices[0].message.content.lower()}')
    accuracy.append(int(pred_label == sample['label']))
    time.sleep(3)

In [None]:
print(sum(accuracy) / len(accuracy))
# Грамотная пост-обработка позволила бы значительно повысить метрику

#### ReAsking

In [None]:
true_labels = [sample["label"] for sample in test_dataset]
accuracy = []

for sample in tqdm(test_dataset):
    messages = [{"role": "system", "content": "You are a helpful assistant that classifies movies reviews. Please, read the review and answer with symbol - 1, if user liked the movie, or with symbol 0, if he did not. Do not answer in text, answer in one of the following two options: 1, 0"}]
    messages.append({"role": "user", "content": f"Do not answer in text and classify this review in one word with one of following options - 0 or 1: {sample['text'].replace('<br />', '')}"})
    response = client.chat.completions.create(messages=messages, model=model_8b, timeout=10, max_tokens=20, temperature=0.2)
    messages.append({"role": "assistant", "content": response.choices[0].message.content.strip().lower()})
    messages.append({"role": "user", "content": "Are you sure that your answer is fully correct? Please think and classify the review again with one of following options - 0 or 1."})
    response_new = client.chat.completions.create(messages=messages, model=model_8b, timeout=10, max_tokens=512)
    if '1' in response_new.choices[0].message.content.lower():
        pred_label = 1
    elif '0' in response_new.choices[0].message.content.lower():
        pred_label = 0
    else:
        print(f'No answer in new generation: \n\t{response_new.choices[0].message.content.lower()}')
        if '1' in response.choices[0].message.content.lower():
            pred_label = 1
        elif '0' in response.choices[0].message.content.lower():
            pred_label = 0
        else:
            raise Exception(f'Нет решения по отзыву:\n\t{sample["text"]}\nМодель сказала:\n\t{response.choices[0].message.content.lower()}')
    # print(response.choices[0].message.content.lower())
    accuracy.append(int(pred_label == sample['label']))
print(sum(accuracy) / len(accuracy))

#### Few-Shot 

In [None]:
def build_few_shot_prompt(review_text, examples=train_dataset[:4]):
    messages = []
    system_prompt = "You are a helpful assistant that classifies movies reviews. Please, read the review and answer with symbol - 1, if user's review is positive, or with symbol 0, if review is negative. Do not answer in text, answer in one of the following two options: 1, 0"
    messages.append({"role": "system", "content": system_prompt})

    for example in examples:
        messages.append({"role": "user", "content": f"Do not answer in text and classify this review in one word with one of following options - 0 or 1: {example['text'].replace('<br />', '')}"})
        messages.append({"role": "assistant", "content": f"assistant: {example['label']}"})

    user_content = f"Do not answer in text and classify this review in one word with one of following options - 0 or 1: {review_text.replace('<br />', '')}"
    messages.append({"role": "user", "content": user_content})

    return messages

In [None]:
true_labels = [sample["label"] for sample in test_dataset]
responses = []
accuracy = []

for sample in tqdm(test_dataset):
    messages = build_few_shot_prompt(sample['text'])
    response = client.chat.completions.create(messages=messages, model=model_8b, timeout=10, max_tokens=20, temperature=0.2)
    response = response.choices[0].message.content.lower()
    responses.append(response)
        


In [None]:
preds = [int('assistant: 1' in elem) for elem in responses]
sum([preds[i] == true_labels[i] for i in range(len(preds))]) / len(preds)

#### Chain-of-Thoughts

In [None]:
model = "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
gsm8k = load_dataset("openai/gsm8k", "main")


In [None]:
samples = []

for i in range(20):
    sample = gsm8k["test"][i]
    question = sample["question"]
    answer = sample["answer"].split("####")[1].strip()
    samples.append({"question": question, "answer": int(answer)})


print("Task:", samples[0]["question"])
print("Answer:", samples[0]["answer"])

In [None]:
def create_prompt(task):
    system_prompt = 'You are a helpful assistant that helps to solve math tasks. Now you are on a math exam.'
    messages = [{"role": "system", "content": system_prompt}]
    messages.append({"role": "user", "content": f"Here is a math task. Solve it step by step and give answer in the end: {task}"})
    response = client.chat.completions.create(messages=messages, model=model, max_tokens=300)
    assistant_prompt = response.choices[0].message.content
    messages.append({"role": "assistant", "content": f"Solution: {assistant_prompt}"})
    messages.append({"role": "user", "content": "Now give me only the answer in JSON-format, key is word 'answer', and value is number - final answer for this task"})
    return messages

In [None]:
responses = []
answers = []
for sample in tqdm(samples):
    messages = create_prompt(sample['question'])
    response = client.chat.completions.create(messages=messages, model=model, max_tokens=20)
    responses.append(response.choices[0].message.content)
    answers.append(sample['answer'])    

In [None]:
predictions = [int(re.sub(r'[{}\n"]', '', response).strip().split(': ')[1]) for response in responses]
sum([predictions[i] == answers[i] for i in range(len(answers))]) / len(answers)