Clone github repo to have access to data files.

In [1]:
!git clone https://github.com/Hananxx/SentimentAnalysisPromptExp.git

Cloning into 'SentimentAnalysisPromptExp'...
remote: Enumerating objects: 23, done.[K
remote: Counting objects: 100% (23/23), done.[K
remote: Compressing objects: 100% (18/18), done.[K
remote: Total 23 (delta 8), reused 9 (delta 2), pack-reused 0 (from 0)[K
Receiving objects: 100% (23/23), 39.42 KiB | 4.38 MiB/s, done.
Resolving deltas: 100% (8/8), done.


### Install and import needed packages

In [2]:
!pip install transformers torch pandas accelerate



In [3]:
import pandas as pd
import json
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import torch

### Set root path for accessing repo files

In [11]:
file_path_prefix = "SentimentAnalysisPromptExp/"

### Load dataset

In [12]:
df = pd.read_csv(file_path_prefix + 'data/app.csv')
print(df.head())  # Inspect the first few rows

                                            sentence  label
0  package file invalid i had my phone on factory...      2
1  iffy nice clean app but sometimes it works and...      2
2                        cool just freezes everytime      2
3  network error! suddenly after downloading an u...      2
4  annoying it let me choose the pictures i want ...      2


### Load prompts

In [13]:
with open(file_path_prefix + 'prompts/zero-shot-prompt-template.json', 'r') as f:
    templates = json.load(f)
print(templates)

{'vicuna-0': "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nUSER: Please perform Sentiment Classification task. Given the sentence from {}, assign a sentiment label from ['negative', 'neutral', 'positive']. Return label only without any other text.\nASSISTANT: Sure!</s>\nUSER: Sentence: {}\nASSISTANT:", 'vicuna-jira-0': "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\nUSER: Please perform Sentiment Classification task. Given the sentence from {}, assign a sentiment label from ['negative', 'positive']. Return label only without any other text.\nASSISTANT: Sure!</s>\nUSER: Sentence: {}\nASSISTANT:", 'llama2-0': "<s>[INST] <<SYS>>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's q

### Use Vicuna

In [24]:
# Load tokenizer and model
model_name = 'lmsys/vicuna-13b-v1.5'
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Create a text generation pipeline
model_pipeline = pipeline(
    'text-generation',
    model=model_name,
    tokenizer=tokenizer,
    dtype=torch.float16,
    device_map='auto',
)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Device set to use cuda:0


In [25]:
prompt = templates['vicuna-0']
sentences = df['sentence'].tolist()[0:100] # For test purposes only first 100 sentences.

for sentence in sentences:
    full_prompt = prompt.format("APP reviews", sentence)
    output = model_pipeline(
          full_prompt,
          max_length=2048,
          eos_token_id=tokenizer.eos_token_id
    )

    response = output[0]['generated_text'].split("ASSISTANT:")[-1].strip()  # Extract just the label.
    print(response)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
positive
negative
Sentiment: negative
negative
positive
negative
negative
negative
negative
negative
negative
negative
positive
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
neutral
negative
negative
negative
negative
positive
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
negative
neutral
negative
negative
negative
negative
positive
positive
negative
positive
negative
negative
positive
negative
positive
positive
positive
positive
positive
neutral
