In [11]:
import pandas as pd

In [4]:
import torch

print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Number of GPU:  1
GPU Name:  NVIDIA GeForce RTX 3050 6GB Laptop GPU
Using device: cuda


In [5]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

import warnings
warnings.filterwarnings('ignore')

## Aspect Term Extraction

In [6]:
tokenizer = AutoTokenizer.from_pretrained("kevinscaria/ate_tk-instruct-base-def-pos-neg-neut-combined")
model = AutoModelForSeq2SeqLM.from_pretrained("kevinscaria/ate_tk-instruct-base-def-pos-neg-neut-combined")

bos_instruction = """Definition: The output will be the aspects (both implicit and explicit) which have an associated opinion that are extracted from the input text. In cases where there are no aspects the output should be noaspectterm.
    Positive example 1-
    input: I charge it at night and skip taking the cord with me because of the good battery life.
    output: battery life
    Positive example 2-
    input: I even got my teenage son one, because of the features that it offers, like, iChat, Photobooth, garage band and more!.
    output: features, iChat, Photobooth, garage band
    Negative example 1-
    input: Speaking of the browser, it too has problems.
    output: browser
    Negative example 2-
    input: The keyboard is too slick.
    output: keyboard
    Neutral example 1-
    input: I took it back for an Asus and same thing- blue screen which required me to remove the battery to reset.
    output: battery
    Neutral example 2-
    input: Nightly my computer defrags itself and runs a virus scan.
    output: virus scan
    Now complete the following example-
    input: """
delim_instruct = ''
eos_instruct = ' \noutput:'
text = 'The cab ride was amazing but the service was pricey.'

tokenized_text = tokenizer(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
output = model.generate(tokenized_text.input_ids)
print('Model output: ', tokenizer.decode(output[0], skip_special_tokens=True))

tokenizer_config.json:   0%|          | 0.00/2.57k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

Model output:  cab ride, service


## Aspect Term Sentiment Classification

In [7]:
tokenizer = AutoTokenizer.from_pretrained("kevinscaria/atsc_tk-instruct-base-def-pos-neg-neut-combined")
model = AutoModelForSeq2SeqLM.from_pretrained("kevinscaria/atsc_tk-instruct-base-def-pos-neg-neut-combined")

bos_instruct = """Definition: The output will be 'positive' if the aspect identified in the sentence contains a positive sentiment. If the sentiment of the identified aspect in the input is negative the answer will be 'negative'. 
    Otherwise, the output should be 'neutral'. For aspects which are classified as noaspectterm, the sentiment is none.
    Positive example 1-
    input: With the great variety on the menu , I eat here often and never get bored. The aspect is menu.
    output: positive
    Positive example 2- 
    input: Great food, good size menu, great service and an unpretensious setting. The aspect is food.
    output: positive
    Negative example 1-
    input: They did not have mayonnaise, forgot our toast, left out ingredients (ie cheese in an omelet), below hot temperatures and the bacon was so over cooked it crumbled on the plate when you touched it. The aspect is toast.
    output: negative
    Negative example 2-
    input: The seats are uncomfortable if you are sitting against the wall on wooden benches. The aspect is seats.
    output: negative
    Neutral example 1-
    input: I asked for seltzer with lime, no ice. The aspect is seltzer with lime.
    output: neutral
    Neutral example 2-
    input: They wouldnt even let me finish my glass of wine before offering another. The aspect is glass of wine.
    output: neutral
    Now complete the following example-
    input: """
delim_instruct = ' The aspect is '
eos_instruct = '.\noutput:'
text = 'The cab ride was amazing but the driver was rude.'
aspect_term = 'cab ride'

tokenized_text = tokenizer(bos_instruction + text + delim_instruct + aspect_term + eos_instruct, return_tensors="pt")
output = model.generate(tokenized_text.input_ids)
print(f'Model output for {aspect_term}: ', tokenizer.decode(output[0], skip_special_tokens=True))

aspect_term = 'driver'
tokenized_text = tokenizer(bos_instruction + text + delim_instruct + aspect_term + eos_instruct, return_tensors="pt")
output = model.generate(tokenized_text.input_ids)
print(f'Model output for {aspect_term}: ', tokenizer.decode(output[0], skip_special_tokens=True))

tokenizer_config.json:   0%|          | 0.00/2.57k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

Model output for cab ride:  positive
Model output for driver:  negative


## Joint Task - Aspect Term and Polarity Co Extraction

In [8]:
tokenizer = AutoTokenizer.from_pretrained("kevinscaria/joint_tk-instruct-base-def-pos-neg-neut-combined")
model = AutoModelForSeq2SeqLM.from_pretrained("kevinscaria/joint_tk-instruct-base-def-pos-neg-neut-combined")

bos_instruction = """Definition: The output will be the aspects (both implicit and explicit) and the aspects sentiment polarity. In cases where there are no aspects the output should be noaspectterm:none.
    Positive example 1-
    input: I charge it at night and skip taking the cord with me because of the good battery life.
    output: battery life:positive, 
    Positive example 2-
    input: I even got my teenage son one, because of the features that it offers, like, iChat, Photobooth, garage band and more!.
    output: features:positive, iChat:positive, Photobooth:positive, garage band:positive
    Negative example 1-
    input: Speaking of the browser, it too has problems.
    output: browser:negative
    Negative example 2-
    input: The keyboard is too slick.
    output: keyboard:negative
    Neutral example 1-
    input: I took it back for an Asus and same thing- blue screen which required me to remove the battery to reset.
    output: battery:neutral
    Neutral example 2-
    input: Nightly my computer defrags itself and runs a virus scan.
    output: virus scan:neutral
    Now complete the following example-
    input: """
delim_instruct = ''
eos_instruct = ' \noutput:'
text = 'The cab ride was amazing but the service was pricey.'

tokenized_text = tokenizer(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
output = model.generate(tokenized_text.input_ids)
print('Model output: ', tokenizer.decode(output[0], skip_special_tokens=True))

tokenizer_config.json:   0%|          | 0.00/2.57k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

Model output:  cab ride:positive, service:negative


## Custom Codes

In [12]:

# Function to process text
def process_text(text):
    tokenized_text = tokenizer(bos_instruction + text + delim_instruct + eos_instruct, return_tensors="pt")
    output = model.generate(tokenized_text.input_ids)
    return tokenizer.decode(output[0], skip_special_tokens=True)

# Sample dataframe
data = {
    'text': ['The cab ride was amazing but the service was pricey.', 
             'Realiable and comfortable. Order one size bigger.','Nice product & quality also good i liked it 👍👌👌','']
}
df = pd.DataFrame(data)

# Apply the function to the text column
df['processed_text'] = df['text'].apply(process_text)

print(df)


                                                text  \
0  The cab ride was amazing but the service was p...   
1                              Another example text.   

                        processed_text  
0  cab ride:positive, service:negative  
1                    noaspectterm:none  
