# Sentiment Analysis - ROBERTA method

In [1]:
# import modules
import transformers
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from scipy.special import softmax
from tqdm.notebook import tqdm # makes loops show a progress bar

In [2]:
# create tokenizer and model objects
MODEL = f"cardiffnlp/twitter-roberta-base-sentiment"
tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForSequenceClassification.from_pretrained(MODEL)

In [3]:
def polarity_scores_roberta(example):
    encoded_text = tokenizer(example, return_tensors='pt')
    output = model(**encoded_text)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores) # returns a numpy array
    scores_dict = {
        "roberta_neg": scores[0],
        "roberta_neu": scores[1],
        "roberta_pos": scores[2],
    }
    return scores_dict

In [6]:
import pandas as pd
df = pd.read_pickle("speech_topic_similarity.pkl")

In [9]:
df = df.reset_index()

In [11]:
df.head()

Unnamed: 0,index,sentence,speech,president,speech_date,Regime change,Military Support,Fighting Al Qaeda,Democracy Promotion,Bolstering Iraqi Security Forces,Al Qaeda Downturn,Middle East Security,Freedom,Al Qaeda,Middle East Terrorism Threat,Religious Liberty
0,0,Good afternoon.,Speech on the Drawdown of US Forces in Afghani...,Joe Biden,2021-07-01,0.091866,0.020608,-0.013549,0.012241,0.129675,0.121716,-0.019216,0.057875,-0.051857,0.021145,0.035125
1,1,"Earlier today, I was briefed by our senior mil...",Speech on the Drawdown of US Forces in Afghani...,Joe Biden,2021-07-01,0.462341,0.170909,0.502308,0.706292,0.296541,0.224687,-0.120064,0.599147,-0.025133,0.397244,0.183778
2,2,"When I announced our drawdown in April, I said...",Speech on the Drawdown of US Forces in Afghani...,Joe Biden,2021-07-01,0.164941,0.084289,0.384271,0.428225,0.433138,0.283634,0.486977,0.124019,0.236934,0.134657,0.774891
3,3,Our military mission in Afghanistan will concl...,Speech on the Drawdown of US Forces in Afghani...,Joe Biden,2021-07-01,0.294849,0.466644,0.720763,0.41306,0.481794,-0.027502,0.32805,0.036519,0.448192,0.367726,0.666796
4,4,The drawdown is proceeding in a secure and ord...,Speech on the Drawdown of US Forces in Afghani...,Joe Biden,2021-07-01,0.166568,0.59265,0.154344,-0.28015,0.251566,0.446406,-0.209066,0.491736,-0.196509,0.079829,-0.286824


In [12]:
results = {}
for i, row in tqdm(df.iterrows(), total=len(df)):
    try:
        text = row['sentence']
        rowid = row['index'] # make sure dataFrame has reset index 
        results[rowid] = polarity_scores_roberta(text) # apply function and store in results dictionary
    except RuntimeError:
        print(f"Broke for id: {rowid}") # catch row id if bad response

results_df = pd.DataFrame(results).T

  0%|          | 0/2293 [00:00<?, ?it/s]

In [16]:
final_df = pd.concat([df, results_df], axis=1)

### Assess Results

In [41]:
pd.set_option('display.max_colwidth', 250)
most_pos = final_df.sort_values(by=['roberta_pos'], ascending=False).loc[:,['index', 'sentence', 'speech', 'roberta_pos']]
#most_pos = most_pos.style.set_properties(**{'text-align': 'left'})
most_pos[["index", "sentence", "roberta_pos"]].head(10)

Unnamed: 0,index,sentence,roberta_pos
2127,2127,"Well, thank you all very much for that warm welcome.",0.990416
2130,2130,I want to thank you for your warm welcome and thank you for inviting me to one of America's great institutions.,0.988278
449,449,Last night was a great night for the United States and for the World.,0.987943
1482,1482,"Thanks for the warm welcome, and thanks for inviting me to join you in this 20th anniversary of the National Endowment for Democracy.",0.986391
1843,1843,I'm pleased to be back here with the men and women of the Defense Department.\r,0.984813
447,447,You are the very best there is anywhere in the world.,0.984545
527,527,"It's an extraordinary honor for me to do so here at West Point -- where so many men and women have prepared to stand up for our security, and to represent what is finest about our country.",0.984264
816,816,"Thanks to our extraordinary men and women in uniform, our civilian personnel, and our many coalition partners, we are meeting our goals.",0.98358
445,445,Thank you as well to the great intelligence professionals who helped make this very successful journey possible.,0.981257
2146,2146,America is fortunate and I am proud to have ROTC graduate Colin Powell serving our country.,0.978353


In [42]:
pd.set_option('display.max_colwidth', 250)
most_neg = final_df.sort_values(by=['roberta_neg'], ascending=False).loc[:,['index', 'sentence', 'speech', 'roberta_neg']]
#most_neg = most_neg.style.set_properties(**{'text-align': 'left'})
most_neg[["index", "sentence", "roberta_neg"]].head(10)

Unnamed: 0,index,sentence,roberta_neg
450,450,"A brutal killer, one who has caused so much hardship and death, was violently eliminated – he will never again harm another innocent man, woman or child.",0.975585
433,433,"Terrorists who oppress and murder innocent people should never sleep soundly, knowing that we will completely destroy them.",0.973434
1692,1692,The situation in Iraq is unacceptable to the American people -- and it is unacceptable to me.,0.973283
438,438,Baghdadi and the losers who worked with him – in some cases people who had no idea what they were getting into and how dangerous and unglamorous it was – killed many people.,0.971123
298,298,"As the head of the Quds Force, Soleimani was personally responsible for some of the absolutely worst atrocities.",0.971071
364,364,"As the head of the Quds Force, Soleimani was personally responsible for some of the absolutely worst atrocities.",0.971071
301,301,"He viciously wounded and murdered thousands of U.S. troops, including the planting of roadside bombs that maim and dismember their victims.",0.969413
367,367,"He viciously wounded and murdered thousands of U.S. troops, including the planting of roadside bombs that maim and dismember their victims.",0.969413
440,440,"The shocking publicized murder of a Jordanian pilot who was burned alive in a cage for all to see, and the execution of Christians in Libya and Egypt, as well as the genocidal mass murder of Yazidis, rank ISIS among the most depraved organization...",0.968638
1964,1964,"To allow this to happen would be to ignore the lessons of September the 11th and make it more likely that America would suffer another attack like the one we experienced that day; a day in which 19 armed men with box cutters killed nearly 3,000 p...",0.968636


Both the roberta_pos and roberta_neg columns look sensible!

### Final Amendments

We might want to use various date formats for our speech date in the application. So let's get ahead and add those columns in now, to avoid making edits to our dataframe object in the application itself (as much as possible at least).

In [43]:
import datetime as dt

In [56]:
final_df['YearMonth'] = final_df['speech_date'].dt.strftime('%B %Y')

In [58]:
final_df['YearMonth'].unique()

array(['July 2021', 'August 2021', 'December 2017', 'January 2020',
       'October 2019', 'August 2014', 'December 2009', 'October 2011',
       'June 2011', 'May 2011', 'November 2006', 'March 2003',
       'November 2003', 'January 2007', 'March 2008', 'December 2005',
       'July 2004'], dtype=object)

In [60]:
final_df['YRMNTH'] = final_df['YRMNTH'] = final_df['speech_date'].dt.year * 100 + final_df['speech_date'].dt.month 

In [61]:
final_df['YRMNTH'].unique()

array([202107, 202108, 201712, 202001, 201910, 201408, 200912, 201110,
       201106, 201105, 200611, 200303, 200311, 200701, 200803, 200512,
       200407])

Happy days!

In [63]:
final_df.to_pickle("c:\\Users\\User\\OneDrive\\Documents\\GitHub\\NLP\\US Presedential Speeches\\final_df.pkl")

---