In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoTokenizer, BertTokenizer,AutoModelForSequenceClassification,BertForSequenceClassification
from transformers import pipeline

In [None]:
df=pd.read_csv(r"/content/synthetic_absa_dataset.csv")
df.head()

In [None]:
df.info()


In [None]:
df['Input_text']=df.apply(lambda x: f"what is the review {x['review']} [sep] for the aspect {x['aspect']}",axis=1)
df.head()

In [None]:
#import labelencoder
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
df['sentiment']=le.fit_transform(df['sentiment'])
df.head()

In [None]:
df1=df[['Input_text','sentiment']]
df1.head()

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(df1['Input_text'],df1['sentiment'],test_size=0.2,random_state=42)

In [None]:
print(x_train)

In [None]:
x_test

In [None]:
y_train

In [None]:
from transformers import BertTokenizer
tokenizer=BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
train_encoding = tokenizer(list(x_train), truncation=True, padding=True, max_length=100)
test_encoding = tokenizer(list(x_test), truncation=True, padding=True, max_length=100)

In [None]:
#print(train_encoding.attention_mask)
print(list(train_encoding.keys()))
#print(train_encoding.items())
#print(len(y_train.values))


In [None]:
import torch
#train_dataset=torch.utils.data.TensorDataset(torch.tensor(train_encoding['input_ids']),torch.tensor(train_encoding['attention_mask']),torch.tensor(y_train.values))
#test_encoding=torch.utils.data.TensorDataset(torch.tensor(test_encoding['input_ids']),torch.tensor(test_encoding['attention_mask']),torch.tensor(y_test.values))
#we can use above meathod to create tensors and

# we can create a class as well
class model_data(torch.utils.data.Dataset):
  def __init__(self,encodings,labels):
    self.encodings=encodings
    self.labels=labels
  def __len__(self):
    return len(self.labels)
  def __getitem__(self,idx):
    item={key:torch.tensor(val[idx]) for key,val in self.encodings.items()}
    item['labels']=torch.tensor(self.labels[idx])
    return item

In [None]:
train_dataset=model_data(train_encoding,y_train.values)
test_dataset=model_data(test_encoding,y_test.values)

In [None]:
train_dataset[0]

In [None]:
from transformers import BertForSequenceClassification, Trainer, TrainingArguments

model=BertForSequenceClassification.from_pretrained('bert-base-uncased',num_labels=3)

args=TrainingArguments(output_dir='./results',num_train_epochs=3,
                       per_device_train_batch_size=16,
                       per_device_eval_batch_size=64,warmup_steps=500,
                       weight_decay=0.01,logging_dir='./logs',
                       logging_steps=10)

In [None]:
trainer=Trainer(model=model,args=args,train_dataset=train_dataset,eval_dataset=test_dataset)

In [None]:
trainer.train()

In [None]:
eval=trainer.evaluate()
print(eval)

In [None]:
pred=trainer.predict(test_dataset)
print(pred)

In [None]:
y_pred=pred.predictions.argmax(-1)
print(y_pred)
pred_label=pred.label_ids
print(pred_label)

In [None]:
id2label={0:'negative',1:'neutral',2:'positive'}
readable_pred_label=[id2label[i] for i in y_pred]
readable_actual_labels=[id2label[i] for i in y_test.values]


In [None]:
print(len(readable_pred_label))
print(len(readable_actual_labels))
print(len(x_test))

In [None]:
x_test_df=pd.DataFrame(x_test)
print(type(x_test_df))
x_test_df.columns=['review']
print(x_test_df)
x_test_df['actual_label']=readable_actual_labels
x_test_df['predicted_label']=readable_pred_label
x_test_df.head()

In [None]:
x_test.iloc[0]

In [None]:
test_dataset[0]

Using Hugging face transfomer pipeline

In [None]:
from transformers import pipeline
classifier=pipeline('sentiment-analysis',model='yangheng/deberta-v3-base-absa-v1.1',tokenizer='yangheng/deberta-v3-base-absa-v1.1')
#apply classifier on whole x_train
aspects = ['camera','price','design','screen']
result=x_train.apply(lambda x: {a: classifier(x,text_pair=a)[0]['label'] for a in aspects})
print(result)

In [None]:
print(result[435])

In [None]:
result1=classifier(x_train[3],text_pair='design')
print(result1)