In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install -q torch
!pip install -U -q accelerate transformers
!pip install -q sentencepiece
!pip install --upgrade -q simplet5
!pip install -q sentence-transformers

In [None]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report
import torch
import pandas as pd
import numpy as np
from simplet5 import SimpleT5

In [None]:
# validation file path
VALIDATION_FILE_PATH = '/content/drive/MyDrive/ASPECT ANALYSIS ALL/post split 80 20 ratio train test data/testing_data_20.csv'

In [None]:
# reading the excel file into a pandas dataframe
validation_data = pd.read_csv(VALIDATION_FILE_PATH)

In [None]:
# removing the duplicate reviews if any
validation_data = validation_data.drop_duplicates(subset=['Review'])
# deleting the rows with null values if any
validation_data = validation_data.dropna()

In [None]:
# Renaming the labels
label_replacements = {
    'Ease of Use': 'Usability',
    'Ease of Reprocessing': 'Reprocessability',
    'Ease of Storage': 'Storability',
}
# reverseing the dictionary above
label_replacements_reverse = {
    'Usability': 'Ease of Use',
    'Reprocessability': 'Ease of Reprocessing',
    'Storability': 'Ease of Storage',
}

In [None]:
# list of original labels
original_labels = [
    'Adaptability', 'Durability', 'Ease of Use', 'Ergonomics',
    'Interference', 'Performance', 'Use Efficiency', 'Aesthetics',
    'Ease of Reprocessing', 'Ease of Storage', 'Price', 'Safety'
]

modified_labels = [
    'Adaptability', 'Durability', 'Usability', 'Ergonomics',
    'Interference', 'Performance', 'Use Efficiency', 'Aesthetics',
    'Reprocessability', 'Storability', 'Price', 'Safety'
]


In [None]:
# SimpleT5 requires that we specify the use case before each review
testing_data['source_text'] = "predict Aspect: "+ testing_data['source_text']

In [None]:
model = SimpleT5()

In [None]:
# load trained T5 model
model.load_model("t5","/content/drive/MyDrive/ASPECT ANALYSIS ALL/T5_MODEL_FILES/simplet5-smallmodel", use_gpu=True)
# to load the base model
# model.load_model("t5","/content/drive/MyDrive/ASPECT ANALYSIS ALL/T5_MODEL_FILES/simplet5-basemodel", use_gpu=True)

In [None]:
predictions = []
for rev in testing_data.source_text.values:
    pred = model.predict(rev)[0]
    predictions.append(pred)

In [None]:
# importing sentence transformers, to map the new labels to original labels based on cosine similarity
from sentence_transformers import SentenceTransformer, util

In [None]:
# loading the embedding model
embedder = SentenceTransformer('all-MiniLM-L6-v2')

In [None]:
# creating embeddings for the original labels
original_labels_embeddings = embedder.encode(original_labels, convert_to_tensor=True)

In [None]:
# using sentence transformer to map new T5 generated labels to original labels using cosine similarity
mapped_predictions_new_labels = []
for predicted_value in predictions:
    if predicted_value in modified_labels:
        mapped_predictions_new_labels.append(predicted_value)
    else:
      pred_embedding = embedder.encode(predicted_value, convert_to_tensor=True)
      cos_scores = util.cos_sim(pred_embedding, original_labels_embeddings)[0]
      top_result = torch.topk(cos_scores, k=1)
      for score, idx in zip(top_result[0], top_result[1]):
          mapped_predictions_new_labels.append(original_labels[idx])

In [None]:
def compute_metrics(labels, preds):
    accuracy = accuracy_score(y_true=labels, y_pred=preds)
    precision = precision_score(y_true=labels, y_pred=preds, average='weighted')
    recall = recall_score(y_true=labels, y_pred=preds, average='weighted')
    f1score = f1_score(y_true=labels, y_pred=preds, average='weighted')
    return {"accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1_score": f1score}

In [None]:
predictions_mapped_to_original = [label_replacements_reverse[item] if item in label_replacements_reverse else item for item in mapped_predictions_new_labels]

In [None]:
compute_metrics(testing_data.Aspect.values, predictions_mapped_to_original)

In [None]:
print(classification_report(testing_data.Aspect.values, predictions_mapped_to_original))