Clone github repo to have access to data files.

In [1]:
!git clone https://github.com/Hananxx/SentimentAnalysisPromptExp.git

Cloning into 'SentimentAnalysisPromptExp'...
remote: Enumerating objects: 38, done.[K
remote: Counting objects: 100% (38/38), done.[K
remote: Compressing objects: 100% (33/33), done.[K
remote: Total 38 (delta 18), reused 9 (delta 2), pack-reused 0 (from 0)[K
Receiving objects: 100% (38/38), 51.56 KiB | 7.37 MiB/s, done.
Resolving deltas: 100% (18/18), done.


### Install and import needed packages

In [2]:
!pip install transformers torch pandas accelerate



In [15]:
import pandas as pd
import json
from transformers import pipeline, AutoTokenizer
import torch
from sklearn.metrics import classification_report



### Set filepath prefix

In [16]:
filepath_prefix = "SentimentAnalysisPromptExp/"

### Load testing dataset

In [17]:
df = pd.read_csv(filepath_prefix + 'data/app-test.csv')
print(df.head())  # Inspect the first few rows

                                            sentence  label
0                  its nice this apps is must lovely      1
1        this is really good this app is really good      1
2  ? freezes and force closes a lot on droid incr...      2
3  favorite i use this application every day is v...      1
4  ? probally the biggest flop ever. as soon as y...      2


### Load templates and set the used template

In [18]:
with open(filepath_prefix + 'prompts/zero-shot-prompt-template.json', 'r') as f:
    templates = json.load(f)
print(templates)
prompt = templates['vicuna-0']

{'vicuna-0': "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nUSER: Please perform Sentiment Classification task. Given the sentence from {}, assign a sentiment label from ['negative', 'neutral', 'positive']. Return label only without any other text.\nASSISTANT: Sure!</s>\nUSER: Sentence: {}\nASSISTANT:", 'vicuna-jira-0': "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nUSER: Please perform Sentiment Classification task. Given the sentence from {}, assign a sentiment label from ['negative', 'positive']. Return label only without any other text.\nASSISTANT: Sure!</s>\nUSER: Sentence: {}\nASSISTANT:", 'llama2-0': "<s>[INST] <<SYS>>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's q

### Load sentences

In [19]:
sentences = df['sentence'].tolist() # For test purposes only the first 50 sentences.

### Extract sentiment labels from different responses

In [20]:
def extract_sentiment_label(response):
    labels = ['positive', 'negative', 'neutral']
    for label in labels:
        if label in response:
            return label
    return None  # if no label found

### Load tokenizer and model

In [21]:
models = ['lmsys/vicuna-13b-v1.5', 'WizardLM/WizardLM-13B-V1.2', 'meta-llama/Llama-2-13b-chat-hf']
def load_model(model_name):
    model_output = []
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    # create a text generation pipeline
    model_pipeline = pipeline(
        'text-generation',
        model=model_name,
        tokenizer=tokenizer,
        dtype=torch.float16,
        device_map='auto',
    )

    for sentence in sentences:
        full_prompt = prompt.format("APP reviews", sentence)
        output = model_pipeline(
            full_prompt,
            max_new_tokens=1024,
            # max_new_tokens= 512,
            temperature=0.7,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
            return_full_text=False
        )
        response = output[0]['generated_text'].strip()  # Extract just the label.
        model_output.append(extract_sentiment_label(response.lower()))

    print(model_output)
    return model_output

### Post process metrics

In [22]:
labels = { 0: 'neutral', 1: 'positive', 2: 'negative'}
def get_data_frame(model_output):
      return pd.DataFrame({
        'text': df['sentence'].tolist(),
        'true_label': [labels[label_num] for label_num in df['label'].tolist()],
        'pred_label': model_output
    })

#### Weighted metrics

In [23]:
print('=====( Vicuna model output )=====')
vicuna_output = load_model(models[0])
print('=====( Vicuna model metrics )=====')
vicuna_data_frame = get_data_frame(vicuna_output)
print(classification_report(vicuna_data_frame['true_label'], vicuna_data_frame['pred_label']))

=====( Vicuna model output )=====


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


['positive', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'negative', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'neutral', 'negative', 'positive', 'negative', 'neutral', 'neutral', 'negative', 'negative', 'positive', 'positive', 'negative', 'positive', 'positive', 'positive', 'negative', 'negative', 'positive', 'positive']
=====( Vicuna model metrics )=====
              precision    recall  f1-score   support

    negative       0.92      0.92      0.92        13
     neutral       0.67      0.67      0.67         3
    positive       0.95      0.95      0.95        19

    accuracy                           0.91        35
   macro avg       0.85      0.85      0.85        35
weighted avg       0.91      0.91      0.91        35



### Analysis - WizardLM model

In [24]:
print('=====( WizardLM model output )=====')
wizardlm_output = load_model(models[1])
print('=====( WizardLM model metrics )=====')
wizardlm_data_frame = get_data_frame(wizardlm_output)
print(classification_report(wizardlm_data_frame['true_label'], wizardlm_data_frame['pred_label']))

=====( WizardLM model output )=====


tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/96.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/657 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/26.0G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/170 [00:00<?, ?B/s]

Device set to use cuda:0


['positive', 'positive', 'negative', 'positive', 'negative', 'positive', 'negative', 'negative', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'negative', 'neutral', 'negative', 'positive', 'positive', 'neutral', 'positive', 'positive', 'positive', 'positive', 'positive', 'negative', 'positive', 'positive', 'positive', 'negative', 'positive', 'positive', 'positive']
=====( WizardLM model metrics )=====
              precision    recall  f1-score   support

    negative       1.00      0.69      0.82        13
     neutral       1.00      0.67      0.80         3
    positive       0.79      1.00      0.88        19

    accuracy                           0.86        35
   macro avg       0.93      0.79      0.83        35
weighted avg       0.89      0.86      0.85        35

