In [None]:
USER_FEWSHOT_PROMPT_TEMPLATE = """Predict the sentiment of the following input sentence.
The response must begin with "Sentiment: ", followed by one of these keywords: "positive", "negative", or "neutral", to reflect the sentiment of the input sentence.

Here are the few examples:

{few_shot_examples}

Sentence: {input}"""

def evaluate_few_shot(model, tokenizer, eval_dataset, few_shot_examples, batch_size=8, print_example=False):
    model.eval()
    all_predictions = []
    all_labels = []

    # format the few-shot examples for the prompt
    formatted_few_shot_examples = ""
    for i, example in enumerate(few_shot_examples):
        formatted_few_shot_examples += f"Sentence: {example['sentence']}\nSentiment: {id2label[example['sentiment']]}\n"
        if i < len(few_shot_examples) - 1:
            formatted_few_shot_examples += "\n"
            
    for i in range(0, len(eval_dataset), batch_size):
        batch = eval_dataset[i:i + batch_size]
        
        # Replace USER_PROMPT_TEMPLATE with USER_FEWSHOT_PROMPT_TEMPLATE
        user_prompts = [USER_FEWSHOT_PROMPT_TEMPLATE.format(input=sentence,
                                                            few_shot_examples=formatted_few_shot_examples) for sentence in batch['sentence']]
        
        messages_list = [
            [
                {"role": "system", "content": "You are a helpful assistant. You must fulfill the user request."},
                {"role": "user", "content": user_prompt},
            ]
            for user_prompt in user_prompts
        ]
        
        input_prompts = [tokenizer.apply_chat_template(conversation=messages,
                                                       add_generation_prompt=True,
                                                       tokenize=False) for messages in messages_list]
        inputs = tokenizer(input_prompts, return_tensors="pt", padding=True, add_special_tokens=False)
        
        output_ids = model.generate(
            **inputs,
            max_new_tokens=16,
            pad_token_id=tokenizer.eos_token_id,
        )
        output_ids = output_ids[:, inputs["input_ids"][0].shape[-1]:output_ids.shape[-1]]
        
        predictions = tokenizer.batch_decode(outpud_ids, skip_special_tokens=True)
        
        if print_example:
            print_example = False
            print(f"### Prompt: \n{user_prompts[0]}")
            print(f"### Prediction: \n{predictions[0]}")
            print(f"### Label: \n{id2label[batch['sentiment'][0]]}")
        
        pred_ids = []
        true_labels = batch['sentiment']
        for p, l in zip(predictions, true_labels):
            try:
                label_id = l
                p = p.split(":")[-1].strip()
                pred_id = label2id[p]
            except Exception as e:
                pred_id = (l + 1) % len(label2id)
            pred_ids.append(pred_id)
        
        all_predictions.extend(pred_ids)
        all_labels.extend(true_labels)
        
    accuracy_metric = evaluate.load("accuracy")
    precision_metric = evaluate.load("precision")
    recall_metric = evaluate.load("recall")
    f1_metric = evaluate.load("f1")
    
    metrics = {
        'accuracy': accuracy_metric.compute(predictions=all_predictions, references=all_labels),
        'f1': f1_metric.compute(predictions=all_predictions, references=all_labels, average='macro'),
        'precision': precision_metric.compute(predictions=all_predictions, references=all_labels, average='macro'),
        'recall': recall_metric.compute(predictions=all_predictions, references=all_labels, average='macro'),
    }
    
    results = {}
    for metric_name, metric_dict in metrics.items():
        results.update(metric_dict)
        
    return results

In [None]:
# pick a list of shots from the train set
shuffled_train_dataset = dataset['train'].shuffle()
sampled_few_shot_examples = list(shuffled_train_dataset.select(range(10)))

n_shots = [1, 2, 4, 8]
for n in n_shots:
    few_shot_examples = sampled_few_shot_examples[:n]
    few_shot_results = evaluate_few_shot(
        model=base_model,
        tokenizer=tokenizer,
        eval_dataset=dataset['test'],
        few_shot_examples=few_shot_examples,
        batch_size=16,
        print_example=True,
    )
    print(f"*** Few-shot evaluation results with {n} shots: ")
    for metric_name, metric_value in few_shot_results.items():
        print(f"{metric_name}: {metric_value:.4f}")
    print("\n")

In [None]:
def train_lora(base_model, tokenizer, training_args, lora_rank, dataset):
    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        inference_mode=False,
        r=lora_rank,
        lora_alpha=32,
        lora_dropout=0.1,
    )
    cur_peft_model = get_peft_model(base_model, peft_config)
    cur_peft_model.print_trainable_parameters()
    
    trainer = SFTTrainer(
        model=cur_peft_model,
        args=training_args,
        train_dataset=dataset['train'],
        eval_dataset=dataset['validation'],
        preprocess_logits_for_metric=preprocess_logits_for_metrics,
        compute_metrics=compute_metrics,
        processing_class=tokenizer,
        data_collator=data_collator,
    )
    trainer.train()
    return cur_peft_model

ranks = [1, 2, 4, 8, 16, 32, 64, 128]

rank_results = pd.DataFrame(columns=['rank', 'accuracy', 'f1', 'precision', 'recall'])

for rank in ranks:
    print(f"*** Training with rank {rank} ***")
    cur_trained_model = train_lora(
        base_model=base_model,
        tokenizer=tokenizer,
        training_args=training_args,
        lora_rank=rank,
        dataset=dataset,
    )
    
    cur_results = evaluate_few_shot(
        model=cur_trained_model,
        tokenizer=tokenizer,
        eval_dataset=dataset['test'],
        few_shot_examples=sampled_few_shot_examples,
        batch_size=16,
    )
    
    # add current results to rank_results
    rank_results.loc[len(rank_results)] = [rank, cur_results['accuracy'], cur_results['f1'], cur_results['precision'], cur_results['recall']]

print(rank_results)