In [19]:
import pandas as pd

import tqdm

from tqdm.notebook import tqdm

In [1]:
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax

In [2]:
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

Downloading:   0%|          | 0.00/747 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/499M [00:00<?, ?B/s]

In [7]:
amazon_data = pd.read_csv('../amazon-reviews/data/Reviews.csv')
print(amazon_data.shape)



(568454, 10)


In [8]:
amazon_data = amazon_data.head(500)
print(amazon_data.shape)

(500, 10)


In [10]:
sample = amazon_data['Text'][75]
print(sample)

No tea flavor at all. Just whole brunch of artifial flavors. It is not returnable. I wasted 20+ bucks.


In [15]:
def scores_roberta(row):
    encoded_text = tokenizer(row, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)
    scores_dict = {
        'score_neg' : scores[0],
        'score_neu' : scores[1],
        'score_pos' : scores[2]
    }
    return scores_dict

In [20]:
res = {}
for i, row in tqdm(amazon_data.iterrows(), total=len(amazon_data)):
    try:
        text = row['Text']
        id = row['Id']
        roberta_result = scores_roberta(text)
        res[id] = roberta_result
    except RuntimeError:
        print(f'Error in id {id}')

  0%|          | 0/500 [00:00<?, ?it/s]

Error in id 83
Error in id 187


In [22]:
polarity_scores = pd.DataFrame(res).T

polarity_scores

Unnamed: 0,score_neg,score_neu,score_pos
1,0.009624,0.049980,0.940396
2,0.508986,0.452414,0.038600
3,0.003229,0.098067,0.898704
4,0.002295,0.090219,0.907486
5,0.001635,0.010302,0.988063
...,...,...,...
496,0.001906,0.009862,0.988232
497,0.004415,0.034215,0.961369
498,0.006427,0.074537,0.919036
499,0.865614,0.119366,0.015020


In [23]:
polarity_scores = polarity_scores.reset_index().rename(columns = {'index': 'Id'})

polarity_scores

Unnamed: 0,Id,score_neg,score_neu,score_pos
0,1,0.009624,0.049980,0.940396
1,2,0.508986,0.452414,0.038600
2,3,0.003229,0.098067,0.898704
3,4,0.002295,0.090219,0.907486
4,5,0.001635,0.010302,0.988063
...,...,...,...,...
493,496,0.001906,0.009862,0.988232
494,497,0.004415,0.034215,0.961369
495,498,0.006427,0.074537,0.919036
496,499,0.865614,0.119366,0.015020


In [24]:
polarity_scores = polarity_scores.merge(amazon_data, how = 'left')

polarity_scores

Unnamed: 0,Id,score_neg,score_neu,score_pos,ProductId,UserId,ProfileName,HelpfulnessNumerator,HelpfulnessDenominator,Score,Time,Summary,Text
0,1,0.009624,0.049980,0.940396,B001E4KFG0,A3SGXH7AUHU8GW,delmartian,1,1,5,1303862400,Good Quality Dog Food,I have bought several of the Vitality canned d...
1,2,0.508986,0.452414,0.038600,B00813GRG4,A1D87F6ZCVE5NK,dll pa,0,0,1,1346976000,Not as Advertised,Product arrived labeled as Jumbo Salted Peanut...
2,3,0.003229,0.098067,0.898704,B000LQOCH0,ABXLMWJIXXAIN,"Natalia Corres ""Natalia Corres""",1,1,4,1219017600,"""Delight"" says it all",This is a confection that has been around a fe...
3,4,0.002295,0.090219,0.907486,B000UA0QIQ,A395BORC6FGVXV,Karl,3,3,2,1307923200,Cough Medicine,If you are looking for the secret ingredient i...
4,5,0.001635,0.010302,0.988063,B006K2ZZ7K,A1UQRSCLF8GW1T,"Michael D. Bigham ""M. Wassir""",0,0,5,1350777600,Great taffy,Great taffy at a great price. There was a wid...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
493,496,0.001906,0.009862,0.988232,B000G6RYNE,APGAA43E3WPN7,Darren,0,0,5,1201392000,amazing chips,i rarely eat chips but i saw these and tried t...
494,497,0.004415,0.034215,0.961369,B000G6RYNE,ABR7HU5H1KNE,Keith,0,0,5,1196726400,Best Chip Ever,This is easily the best potato chip that I hav...
495,498,0.006427,0.074537,0.919036,B000G6RYNE,AJQD2WWJYOYFQ,bubbles,0,0,4,1186617600,"Tangy, spicy, and sweet- oh my!",Kettle Chips Spicy Thai potato chips have the ...
496,499,0.865614,0.119366,0.015020,B000G6RYNE,A16YH487W9ZYO0,Bruce G. Lindsay,0,0,4,1184198400,An indulgence with a bite,"Okay, I should not eat potato chips, nor shoul..."


## Pipeline

In [25]:
from transformers import pipeline

In [26]:
sentiment_pipeline = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [29]:
sentiment_pipeline('I hate it so much that I love it')

[{'label': 'POSITIVE', 'score': 0.9792561531066895}]