In [3]:
! pip install transformers


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


# Sentiment Analysis using BERT

## Importing Dependencies

In [4]:
import torch
import pandas as pd
import numpy as np
from transformers import BertTokenizer, BertForSequenceClassification

## Downloading Dataset

In [5]:
df = pd.read_csv('https://gist.githubusercontent.com/Mukilan-Krishnakumar/e998ecf27d11b84fe6225db11c239bc6/raw/74dbac2b992235e555df9a0a4e4d7271680e7e45/imdb_movie_reviews.csv')
df.head()

Unnamed: 0,text,sentiment
0,"My daughter liked it but I was aghast, that a ...",neg
1,I... No words. No words can describe this. I w...,neg
2,this film is basically a poor take on the old ...,neg
3,"This is a terrible movie, and I'm not even sur...",neg
4,First of all this movie is a piece of reality ...,pos


We will drop the sentiment which comes along with the dataset and predict our own sentiment using BERT

In [6]:
df = df.drop('sentiment',axis=1)

## Model Building and Evaluation

In [None]:
tokenizer = BertTokenizer.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')
model = BertForSequenceClassification.from_pretrained('nlptown/bert-base-multilingual-uncased-sentiment')

tokenizer_config.json:   0%|          | 0.00/39.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/953 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

In [None]:
def sentiment_score(movie_review):
	token = tokenizer.encode(movie_review, return_tensors = 'pt')
	result = model(token)
	return int(torch.argmax(result.logits))+1

In [None]:
df['sentiment'] = df['text'].apply(lambda x: sentiment_score(x[:512]))

In [None]:
df.head()

In [None]:
pip install emoji==0.6.0

## Pre-trained BERT on Twitter US Election 2020 for Stance Detection towards Donald Trump (KE-MLM)

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np

In [None]:
# choose GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# select mode path here
pretrained_LM_path = "kornosk/bert-election2020-twitter-stance-trump-KE-MLM"

In [None]:
# load model
tokenizer = AutoTokenizer.from_pretrained(pretrained_LM_path)
model = AutoModelForSequenceClassification.from_pretrained(pretrained_LM_path)

id2label = {
    0: "AGAINST",
    1: "FAVOR",
    2: "NONE"
}


In [None]:
##### Prediction Neutral #####
sentence = "Hello World."
inputs = tokenizer(sentence.lower(), return_tensors="pt")
outputs = model(**inputs)
predicted_probability = torch.softmax(outputs[0], dim=1)[0].tolist()

print("Sentence:", sentence)
print("Prediction:", id2label[np.argmax(predicted_probability)])
print("Against:", predicted_probability[0])
print("Favor:", predicted_probability[1])
print("Neutral:", predicted_probability[2])

In [None]:
##### Prediction Favor #####
sentence = "Go Go Trump!!!"
inputs = tokenizer(sentence.lower(), return_tensors="pt")
outputs = model(**inputs)
predicted_probability = torch.softmax(outputs[0], dim=1)[0].tolist()

print("Sentence:", sentence)
print("Prediction:", id2label[np.argmax(predicted_probability)])
print("Against:", predicted_probability[0])
print("Favor:", predicted_probability[1])
print("Neutral:", predicted_probability[2])

In [None]:
##### Prediction Against #####
sentence = "Trump is the worst."
inputs = tokenizer(sentence.lower(), return_tensors="pt")
outputs = model(**inputs)
predicted_probability = torch.softmax(outputs[0], dim=1)[0].tolist()

print("Sentence:", sentence)
print("Prediction:", id2label[np.argmax(predicted_probability)])
print("Against:", predicted_probability[0])
print("Favor:", predicted_probability[1])
print("Neutral:", predicted_probability[2])

In [None]:
sentence = "Science is true."
inputs = tokenizer(sentence.lower(), return_tensors="pt")
outputs = model(**inputs)
predicted_probability = torch.softmax(outputs[0], dim=1)[0].tolist()

print("Sentence:", sentence)
print("Prediction:", id2label[np.argmax(predicted_probability)])
print("Against:", predicted_probability[0])
print("Favor:", predicted_probability[1])
print("Neutral:", predicted_probability[2])

##  🙊 Detoxify Toxic Comment Classification with ⚡ Pytorch Lightning and 🤗 Transformers

In [None]:
# install detoxify

! pip install detoxify


In [None]:
results=Detoxify('original').predict('I joked to my friend and said shut up you are a liar ')

In [None]:
print(results)

In [None]:
print(results)

In [None]:
results_unbiased=Detoxify('unbiased').predict('shut up you are a liar')

In [None]:
results_turkish = Detoxify('multilingual').predict('kapa çeneni sen bir yalancısın')

In [None]:
print(results)

In [None]:
print(results_unbiased)

In [None]:
print(results_turkish)

In [None]:
results_german = Detoxify('multilingual').predict('Halt die fresse du bist ein Lügner!')

In [None]:
print(results_german)

In [None]:
print(pd.DataFrame(results,results_unbiased,results_german).round(5))

Sources

https://huggingface.co/kornosk/bert-election2020-twitter-stance-trump-KE-MLM

https://huggingface.co/unitary/toxic-bert

https://github.com/unitaryai/detoxify

https://huggingface.co/docs/transformers/model_doc/bert

https://huggingface.co/models?other=politics&sort=trending

https://wandb.ai/mukilan/BERT_Sentiment_Analysis/reports/An-Introduction-to-BERT-And-How-To-Use-It--VmlldzoyNTIyOTA1#bert-based-models



