### Load Libraries

In [2]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.15.0-py3-none-any.whl (3.4 MB)
[K     |████████████████████████████████| 3.4 MB 14.0 MB/s 
[?25hCollecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 76.0 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.4.0-py3-none-any.whl (67 kB)
[K     |████████████████████████████████| 67 kB 6.9 MB/s 
[?25hCollecting sacremoses
  Downloading sacremoses-0.0.47-py2.py3-none-any.whl (895 kB)
[K     |████████████████████████████████| 895 kB 69.8 MB/s 
Collecting tokenizers<0.11,>=0.10.1
  Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)
[K     |████████████████████████████████| 3.3 MB 59.6 MB/s 
Installing collected packages: pyyaml, tokenizers, sacremoses, huggingface-hub, transform

In [3]:
from transformers import AutoModelForSequenceClassification
from transformers import TFAutoModelForSequenceClassification
from transformers import AutoTokenizer
import numpy as np
from scipy.special import softmax
import csv
import urllib.request

import pandas as pd
from sklearn.metrics import classification_report

### Pre-process

In [4]:
def preprocess(text):
  new_text = []
  for t in text.split(" "):
      t = '@user' if t.startswith('@') and len(t) > 1 else t
      t = 'http' if t.startswith('http') else t
      new_text.append(t)
  return " ".join(new_text)

### Load model

In [5]:
task = 'sentiment'
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL)

Downloading:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

In [7]:
# download label mapping
labels=[]
mapping_link = f"https://github.com/cardiffnlp/tweeteval/blob/main/datasets/sentiment/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    html = f.read().decode('utf-8').split("\n")
    csvreader = csv.reader(html, delimiter='\t')
labels = [row[1][:-5] for row in csvreader if len(row) > 1]

In [8]:
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
model.save_pretrained(MODEL)

Downloading:   0%|          | 0.00/476M [00:00<?, ?B/s]

### Load data

In [16]:
df = pd.read_csv('/content/drive/MyDrive/WT/data.csv')

In [17]:
texts = df['body']

### Inference

In [18]:
pred_scores = []
for text in texts:
  text = preprocess(text)
  encoded_input = tokenizer(text, return_tensors='pt')
  output = model(**encoded_input)
  scores = output[0][0].detach().numpy()
  scores = softmax(scores)
  pred_scores.append(scores)

In [19]:
y_pred = []
for scores in pred_scores:
  pred_label = labels[np.argmax(scores)]
  y_pred.append(pred_label)

### Evaluate Model

In [20]:
y_true = df['sentiment_summary'].to_list()

In [21]:
print(classification_report(y_true, y_pred))

              precision    recall  f1-score   support

    negative       0.63      0.92      0.74       569
     neutral       0.26      0.15      0.19       230
    positive       0.58      0.09      0.16       201

    accuracy                           0.57      1000
   macro avg       0.49      0.39      0.36      1000
weighted avg       0.53      0.57      0.50      1000



In [23]:
#            precision    recall  f1-score   support

#     negative       0.63      0.92      0.74       569
#      neutral       0.26      0.15      0.19       230
#     positive       0.58      0.09      0.16       201

#     accuracy                           0.57      1000
#    macro avg       0.49      0.39      0.36      1000
# weighted avg       0.53      0.57      0.50      1000

In [24]:
# The pre-trained Roberta model performs well in case the negative tweets with F1 sore of 75. For positive and neutral tweets the model needs further improvements.

# Things that might imporve this are:
# 1. Fine tuning on the model
# 2. Using weighted losses to accomodate lower representation of neutral and negative sentiment