# Hugging Face Transformers

## 0. Read in Data

In [1]:
import pandas as pd

# modify the column width
pd.set_option('display.max_colwidth', None) # Default is 50, None shows all text

# look at a subset of the reviews
df = pd.read_excel('Data/Popchip_Reviews_Sentiment.xlsx').head(30)
df.head(2)

Unnamed: 0,Id,UserId,Rating,Priority,Title,Text,Sentiment_VADER
0,23689,A21SYVGVNG8RAS,5,Low,Yummy snacks!,Popchips are the bomb!! I use the parmesan garlic to scoop up cottage cheese as a healthy alternative to chips and dip. My healthy eating program is saved.,0.9244
1,23690,AQJYXC0MPRQJL,5,Low,Great chip that is different from the rest,"I like the puffed nature of this chip that makes it more unique in the chip market. I ordered the Salt and Vinegar and absolutely love that flavor, hands down my favorite chip ever. I have tried the cheddar and regular flavors as well. The cheddar is about a 4/5 and the regular is about a 3/5 because I prefer strong flavors and obviously that would not be the case for the regular. The Salt and Vinegar is kind of weak compared to some regular S&V chips, but is quite flavorful and makes you wanting to come back for more.",0.7269


In [3]:
df.shape

(30, 7)

## 1. Sentiment Analysis

In [4]:
from transformers import pipeline

In [6]:
sentiment_analyzer = pipeline('sentiment-analysis', 
                              model='distilbert/distilbert-base-uncased-finetuned-sst-2-english',
                              device=-1 # -1 to use CPU
                             )

Device set to use cpu


In [7]:
text1 = 'When life gives you lemons, make lemonade! ðŸ™‚'
text2 = 'A dozen lemons will make a gallon of lemonade.'
text3 = 'I didn\'t like the taste of that lemonade at all.'

In [8]:
sentiment_analyzer(text1)

[{'label': 'POSITIVE', 'score': 0.996239423751831}]

In [9]:
sentiment_analyzer(text2)

[{'label': 'POSITIVE', 'score': 0.7781572341918945}]

In [10]:
sentiment_analyzer(text3)

[{'label': 'NEGATIVE', 'score': 0.9955589771270752}]

In [None]:
## Practical Example

In [17]:
sentiment_analyzer = pipeline('sentiment-analysis', 
                              model='distilbert/distilbert-base-uncased-finetuned-sst-2-english',
                              device=-1, # -1 to use CPU
                              truncation=True # Truncates text to make it shorter (Text we want to analyze)
                             )

Device set to use cpu


In [18]:
df.Text.apply(sentiment_analyzer)

0     [{'label': 'POSITIVE', 'score': 0.9935213923454285}]
1      [{'label': 'POSITIVE', 'score': 0.999605119228363}]
2     [{'label': 'NEGATIVE', 'score': 0.6984866261482239}]
3     [{'label': 'NEGATIVE', 'score': 0.9996308088302612}]
4     [{'label': 'POSITIVE', 'score': 0.9991814494132996}]
5     [{'label': 'POSITIVE', 'score': 0.9994196891784668}]
6     [{'label': 'POSITIVE', 'score': 0.9992188215255737}]
7     [{'label': 'POSITIVE', 'score': 0.9969040751457214}]
8     [{'label': 'POSITIVE', 'score': 0.9894027709960938}]
9     [{'label': 'POSITIVE', 'score': 0.9991832375526428}]
10    [{'label': 'POSITIVE', 'score': 0.9994851350784302}]
11    [{'label': 'NEGATIVE', 'score': 0.7255946397781372}]
12    [{'label': 'POSITIVE', 'score': 0.9966173768043518}]
13    [{'label': 'POSITIVE', 'score': 0.9997195601463318}]
14    [{'label': 'POSITIVE', 'score': 0.8944363594055176}]
15    [{'label': 'POSITIVE', 'score': 0.9989368319511414}]
16    [{'label': 'POSITIVE', 'score': 0.9998534917831421

In [19]:
## Sentiment: round 2

In [24]:
%%time
# ^ Specific to jupyter notebook. It puts a magic function to see how long the cell took to run

from transformers import logging

logging.set_verbosity_error() # removes confusing errors when running pipeline

sentiment_analyzer = pipeline('sentiment-analysis', 
                              model='distilbert/distilbert-base-uncased-finetuned-sst-2-english',
                              device=-1, # -1 to use CPU
                              truncation=True # Truncates text to make it shorter (Text we want to analyze)
                             )

df.Text.apply(sentiment_analyzer)

CPU times: total: 10.5 s
Wall time: 812 ms


0     [{'label': 'POSITIVE', 'score': 0.9935213923454285}]
1      [{'label': 'POSITIVE', 'score': 0.999605119228363}]
2     [{'label': 'NEGATIVE', 'score': 0.6984866261482239}]
3     [{'label': 'NEGATIVE', 'score': 0.9996308088302612}]
4     [{'label': 'POSITIVE', 'score': 0.9991814494132996}]
5     [{'label': 'POSITIVE', 'score': 0.9994196891784668}]
6     [{'label': 'POSITIVE', 'score': 0.9992188215255737}]
7     [{'label': 'POSITIVE', 'score': 0.9969040751457214}]
8     [{'label': 'POSITIVE', 'score': 0.9894027709960938}]
9     [{'label': 'POSITIVE', 'score': 0.9991832375526428}]
10    [{'label': 'POSITIVE', 'score': 0.9994851350784302}]
11    [{'label': 'NEGATIVE', 'score': 0.7255946397781372}]
12    [{'label': 'POSITIVE', 'score': 0.9966173768043518}]
13    [{'label': 'POSITIVE', 'score': 0.9997195601463318}]
14    [{'label': 'POSITIVE', 'score': 0.8944363594055176}]
15    [{'label': 'POSITIVE', 'score': 0.9989368319511414}]
16    [{'label': 'POSITIVE', 'score': 0.9998534917831421

In [25]:
%%time
# ^ Specific to jupyter notebook. It puts a magic function to see how long the cell took to run

from transformers import logging

logging.set_verbosity_error() # removes confusing errors when running pipeline

sentiment_analyzer = pipeline('sentiment-analysis', 
                              model='distilbert/distilbert-base-uncased-finetuned-sst-2-english',
                              device='cuda:0', # -1 to use CPU, 'mps' to use apple GPU. Windows NVIDIA GPU use 'cuda' or 'cuda:0' or 0.
                              truncation=True # Truncates text to make it shorter (Text we want to analyze)
                             )

df.Text.apply(sentiment_analyzer)

CPU times: total: 46.9 ms
Wall time: 175 ms


RuntimeError: PyTorch is not linked with support for mps devices