In [1]:
model_name = 'nlptown/bert-base-multilingual-uncased-sentiment'

In [2]:
pip install Transformers

Note: you may need to restart the kernel to use updated packages.


In [3]:
from transformers import BertTokenizer, TFBertForSequenceClassification

tokenizer = BertTokenizer.from_pretrained(model_name)

model = TFBertForSequenceClassification.from_pretrained(model_name)

All model checkpoint layers were used when initializing TFBertForSequenceClassification.

All the layers of TFBertForSequenceClassification were initialized from the model checkpoint at nlptown/bert-base-multilingual-uncased-sentiment.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [5]:
text = "I like my job"

inputs = tokenizer(text)
print(inputs)

input_ids = inputs['input_ids']

predictions = model.predict([input_ids])

logits = predictions.logits

import numpy as np

predicted_class = np.argmax(logits)
predicted_class

{'input_ids': [101, 151, 11531, 11153, 19594, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1]}


3

In [6]:
sentiment_mapping = {
    0: 'very negative',
    1: 'negative',
    2: 'neutral',
    3: 'positive',
    4: 'very positive'
}

predicted_sentiment = sentiment_mapping[predicted_class]
print("The text is predicted to have a sentiment: {}".format(predicted_sentiment))

The text is predicted to have a sentiment: positive


In [8]:
text = "I hate my job"

inputs = tokenizer(text, return_tensors='tf')

input_ids = inputs['input_ids']

predictions = model.predict([input_ids])

logits = predictions.logits

predicted_class = np.argmax(logits)
print(logits)

predicted_sentiment = sentiment_mapping[predicted_class]
print("The text is predicted to have a sentiment: {}".format(predicted_sentiment))

[[ 2.9380844   1.6215168  -0.16542505 -1.9236571  -1.9499079 ]]
The text is predicted to have a sentiment: very negative


In [9]:
import pandas as pd

csv_file = '../IMDB Dataset.csv'

df = pd.read_csv(csv_file)

df

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive
...,...,...
49995,I thought this movie did a down right good job...,positive
49996,"Bad plot, bad dialogue, bad acting, idiotic di...",negative
49997,I am a Catholic taught in parochial elementary...,negative
49998,I'm going to have to disagree with the previou...,negative


In [10]:
first_5_rows = df.head(5)

first_5_rows

Unnamed: 0,review,sentiment
0,One of the other reviewers has mentioned that ...,positive
1,A wonderful little production. <br /><br />The...,positive
2,I thought this was a wonderful way to spend ti...,positive
3,Basically there's a family where a little boy ...,negative
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive


In [12]:
import numpy as np

sentiment_mapping = {
    0: 'very negative',
    1: 'negative',
    2: 'neutral',
    3: 'positive',
    4: 'very positive'
}


first_5_rows_copy = first_5_rows.copy()


predicted_sentiments = []


for title in first_5_rows_copy['review']:

    inputs = tokenizer(title,)
    

    input_ids = inputs['input_ids']
    

    predictions = model.predict([input_ids])
    

    logits = predictions.logits
    

    predicted_class = np.argmax(logits)
    print("predicted id: ", predicted_class)

    predicted_sentiment = sentiment_mapping[predicted_class]
    

    predicted_sentiments.append(predicted_sentiment)


first_5_rows_copy['predicted_sentiment'] = predicted_sentiments


first_5_rows_copy

predicted id:  2
predicted id:  4
predicted id:  3
predicted id:  2
predicted id:  3


Unnamed: 0,review,sentiment,predicted_sentiment
0,One of the other reviewers has mentioned that ...,positive,neutral
1,A wonderful little production. <br /><br />The...,positive,very positive
2,I thought this was a wonderful way to spend ti...,positive,positive
3,Basically there's a family where a little boy ...,negative,neutral
4,"Petter Mattei's ""Love in the Time of Money"" is...",positive,positive


In [13]:
import pandas as pd

csv_file = 'arabic_review.csv'

df = pd.read_csv(csv_file)

df

Unnamed: 0,ID,Feed,Sentiment
0,1,الحلو انكم بتحكوا على اساس انو الاردن ما فيه ف...,Negative
1,2,كله رائع بجد ربنا يكرمك,Positive
2,3,ابشرك فيه تحسن ولله الحمد باذن الله يرجع قريبا,Positive
3,4,ابو الشباب راعي العود ليش ماوزنه في البيت غباء,Negative
4,5,ابو معيتق قطع اوتار العود وقال السلام عليكم,Negative
5,6,اترك ما تهوى لاجل من تخشى,Positive
6,7,اتصور لو ظليت ما اتعلق احسن لانه تعليقاتك مقرفه,Negative
7,8,اتفه على هيك برنامج عالمي,Negative
8,9,اتقوا الله فينا بكفي رفع اسعار الرواتب بالحضيض,Negative
9,10,اجتماع حواء اكيد في خرفنه,Negative


In [14]:
first_5_rows = df.head(5)

first_5_rows

Unnamed: 0,ID,Feed,Sentiment
0,1,الحلو انكم بتحكوا على اساس انو الاردن ما فيه ف...,Negative
1,2,كله رائع بجد ربنا يكرمك,Positive
2,3,ابشرك فيه تحسن ولله الحمد باذن الله يرجع قريبا,Positive
3,4,ابو الشباب راعي العود ليش ماوزنه في البيت غباء,Negative
4,5,ابو معيتق قطع اوتار العود وقال السلام عليكم,Negative


In [15]:
import numpy as np

sentiment_mapping = {
    0: 'very negative',
    1: 'negative',
    2: 'neutral',
    3: 'positive',
    4: 'very positive'
}

first_5_rows_copy = first_5_rows.copy()

predicted_sentiments = []

for title in first_5_rows_copy['Feed']:

    inputs = tokenizer(title,)
    
    input_ids = inputs['input_ids']
    
    predictions = model.predict([input_ids])
    
    logits = predictions.logits
    
    predicted_class = np.argmax(logits)
    print("predicted id: ", predicted_class)

    predicted_sentiment = sentiment_mapping[predicted_class]
    
    predicted_sentiments.append(predicted_sentiment)

first_5_rows_copy['predicted_sentiment'] = predicted_sentiments

first_5_rows_copy

predicted id:  0
predicted id:  4
predicted id:  4
predicted id:  4
predicted id:  4


Unnamed: 0,ID,Feed,Sentiment,predicted_sentiment
0,1,الحلو انكم بتحكوا على اساس انو الاردن ما فيه ف...,Negative,very negative
1,2,كله رائع بجد ربنا يكرمك,Positive,very positive
2,3,ابشرك فيه تحسن ولله الحمد باذن الله يرجع قريبا,Positive,very positive
3,4,ابو الشباب راعي العود ليش ماوزنه في البيت غباء,Negative,very positive
4,5,ابو معيتق قطع اوتار العود وقال السلام عليكم,Negative,very positive


In [16]:
pip install camel-tools

Note: you may need to restart the kernel to use updated packages.


In [17]:
from camel_tools.sentiment import SentimentAnalyzer

model_name = SentimentAnalyzer("CAMeL-Lab/bert-base-arabic-camelbert-da-sentiment")
sentences = ['أنا أكره عملي']
model_name.predict(sentences)

['negative']

In [18]:
sentences = ['أنا أحب عملي']
model_name.predict(sentences)

['positive']

In [19]:
import numpy as np

first_5_rows_copy = first_5_rows.copy()

predicted_sentiments = []

for title in first_5_rows_copy['Feed']:

    predictions = model_name.predict([title])
    
    predicted_sentiments.append(predictions)

first_5_rows_copy['predicted_sentiment'] = predicted_sentiments

first_5_rows_copy

Unnamed: 0,ID,Feed,Sentiment,predicted_sentiment
0,1,الحلو انكم بتحكوا على اساس انو الاردن ما فيه ف...,Negative,[negative]
1,2,كله رائع بجد ربنا يكرمك,Positive,[positive]
2,3,ابشرك فيه تحسن ولله الحمد باذن الله يرجع قريبا,Positive,[positive]
3,4,ابو الشباب راعي العود ليش ماوزنه في البيت غباء,Negative,[negative]
4,5,ابو معيتق قطع اوتار العود وقال السلام عليكم,Negative,[negative]
