### Importing Required libraries

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score
from gensim.models import Word2Vec
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier

### Reading the dataset

In [2]:
data=pd.read_csv("Political_Speeches")

In [3]:
data

Unnamed: 0,Country,Date,Speaker,Headline,Text_of_Speech,Designation,Running President/PM,Speech Link,Emotion,Context
0,Russia,16/07/2021,Vladimir Putin,Meeting of APEC Economic Leaders,"Madam Chair,\n\nColleagues,\n\nFirst of all, ...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,OPTIMISM,DEVELOPMENT
1,Russia,2021-09-05 00:00:00,Vladimir Putin,Victory Parade on Red Square,"Citizens of Russia,\n\nDear veterans,\n\nComra...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,JOY,NATIONALISM
2,Russia,2021-08-04 00:00:00,Vladimir Putin,Meeting on the results of implementing Preside...,"Good afternoon, colleagues.\n\nLet’s start.\n\...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,NEUTRAL,DEVELOPMENT
3,Russia,21-11-2020,Vladimir Putin,G20 Summit,"Colleagues,\n\nThe scope of problems humanity ...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,NEUTRAL,DEVELOPMENT
4,Russia,20-11-2020,Vladimir Putin,Address to participants in Nuremberg Lessons f...,"Colleagues, friends,\n\nFirst of all, I would ...",President,Vladimir Putin,http://en.kremlin.ru/events/president/transcri...,UPSET,EXTREMISM
...,...,...,...,...,...,...,...,...,...,...
2005,USA,05/02/2013,Barack Obama,\nRemarks by the President,"Good afternoon, everybody. \n\nI wanted to sa...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,NEUTRAL,DEVELOPMENT
2006,USA,04/02/2013,Barack Obama,\nRemarks by the President on Preventing Gun V...,"Hello, everybody. Please have a seat. Have a...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,NEUTRAL,OTHERS
2007,USA,02/02/2013,Barack Obama,\nWeekly Address: A Balanced Approach to Growi...,"Hi, everybody. \n\nIn the coming weeks, we fa...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,OPTIMISM,DEVELOPMENT
2008,USA,28/01/2013,Barack Obama,\nRemarks by the President Before Meeting with...,"Well, Vice President Biden and I just want to...",President,Barack Obama,https://obamawhitehouse.archives.gov/the-press...,OPTIMISM,OTHERS


In [4]:
data = data[['Text_of_Speech', 'Emotion']]

In [5]:
data

Unnamed: 0,Text_of_Speech,Emotion
0,"Madam Chair,\n\nColleagues,\n\nFirst of all, ...",OPTIMISM
1,"Citizens of Russia,\n\nDear veterans,\n\nComra...",JOY
2,"Good afternoon, colleagues.\n\nLet’s start.\n\...",NEUTRAL
3,"Colleagues,\n\nThe scope of problems humanity ...",NEUTRAL
4,"Colleagues, friends,\n\nFirst of all, I would ...",UPSET
...,...,...
2005,"Good afternoon, everybody. \n\nI wanted to sa...",NEUTRAL
2006,"Hello, everybody. Please have a seat. Have a...",NEUTRAL
2007,"Hi, everybody. \n\nIn the coming weeks, we fa...",OPTIMISM
2008,"Well, Vice President Biden and I just want to...",OPTIMISM


In [6]:
data.isnull().sum()

Text_of_Speech    0
Emotion           0
dtype: int64

In [7]:
# Handle missing or duplicate data
data.dropna(inplace=True)
data.drop_duplicates(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.drop_duplicates(inplace=True)


In [8]:
data.isnull().sum()

Text_of_Speech    0
Emotion           0
dtype: int64

In [9]:
data

Unnamed: 0,Text_of_Speech,Emotion
0,"Madam Chair,\n\nColleagues,\n\nFirst of all, ...",OPTIMISM
1,"Citizens of Russia,\n\nDear veterans,\n\nComra...",JOY
2,"Good afternoon, colleagues.\n\nLet’s start.\n\...",NEUTRAL
3,"Colleagues,\n\nThe scope of problems humanity ...",NEUTRAL
4,"Colleagues, friends,\n\nFirst of all, I would ...",UPSET
...,...,...
2005,"Good afternoon, everybody. \n\nI wanted to sa...",NEUTRAL
2006,"Hello, everybody. Please have a seat. Have a...",NEUTRAL
2007,"Hi, everybody. \n\nIn the coming weeks, we fa...",OPTIMISM
2008,"Well, Vice President Biden and I just want to...",OPTIMISM


In [10]:
X = data['Text_of_Speech'].apply(lambda x: x.split()) 
y = data['Emotion'].apply(lambda x: x.split(','))

In [11]:
X

0       [Madam, Chair,, Colleagues,, First, of, all,, ...
1       [Citizens, of, Russia,, Dear, veterans,, Comra...
2       [Good, afternoon,, colleagues., Let’s, start.,...
3       [Colleagues,, The, scope, of, problems, humani...
4       [Colleagues,, friends,, First, of, all,, I, wo...
                              ...                        
2005    [Good, afternoon,, everybody., I, wanted, to, ...
2006    [Hello,, everybody., Please, have, a, seat., H...
2007    [Hi,, everybody., In, the, coming, weeks,, we,...
2008    [Well,, Vice, President, Biden, and, I, just, ...
2009    [Vice, President, Biden,, Mr., Chief, Justice,...
Name: Text_of_Speech, Length: 2009, dtype: object

In [12]:
y

0       [OPTIMISM]
1            [JOY]
2        [NEUTRAL]
3        [NEUTRAL]
4          [UPSET]
           ...    
2005     [NEUTRAL]
2006     [NEUTRAL]
2007    [OPTIMISM]
2008    [OPTIMISM]
2009         [JOY]
Name: Emotion, Length: 2009, dtype: object

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
X_test

1594    [Hello,, everybody., This, past, week,, we, lo...
526     [Good, afternoon., The, Southern, District, is...
393     [Good, afternoon, respected, colleagues,, We, ...
1404    [THE, PRESIDENT:, My, fellow, Americans:, Toni...
433     [Esteemed, members, of, the, Board,, Comrade, ...
                              ...                        
941     [When, I, stood, in, Downing, Street, as, Prim...
733     [Mr., President, and, Mrs., Obama,, Ladies, an...
1576    [I, just, had, a, wonderful, conversation, wit...
692     [Excellencies,, Distinguished, Guests,, Good, ...
1256    [THE, PRESIDENT:, Thank, you, very, much,, Jul...
Name: Text_of_Speech, Length: 402, dtype: object

In [15]:
y_test

1594     [NEUTRAL]
526        [UPSET]
393      [NEUTRAL]
1404       [UPSET]
433      [NEUTRAL]
           ...    
941      [NEUTRAL]
733     [OPTIMISM]
1576    [OPTIMISM]
692      [NEUTRAL]
1256    [OPTIMISM]
Name: Emotion, Length: 402, dtype: object

### Word Embedding Technique: WORD2VEC

In [16]:
# Train Word2Vec model
embedding_model = Word2Vec(sentences=X_train, vector_size=100, window=5, min_count=1, workers=4)

In [17]:
# Generate word embeddings for the text data
X_train_embeddings = np.array([np.mean([embedding_model.wv[word] for word in words if word in embedding_model.wv], axis=0) for words in X_train])
X_test_embeddings = np.array([np.mean([embedding_model.wv[word] for word in words if word in embedding_model.wv], axis=0) for words in X_test])

In [18]:
X_train_embeddings

array([[-0.5766086 , -0.22433317,  0.2907299 , ..., -0.20981255,
         0.39520153,  0.14648587],
       [-0.66872126,  0.09184001,  0.5833525 , ...,  0.1675789 ,
         0.21608777,  0.3133535 ],
       [-0.95901495,  0.23587811,  0.45198905, ...,  0.15333517,
         0.24818788,  0.08313505],
       ...,
       [-0.67046136, -0.08968383,  0.37571955, ...,  0.04302053,
         0.24313715,  0.13679695],
       [-0.6435419 , -0.08907568,  0.47549444, ..., -0.01581487,
         0.66008633,  0.24055694],
       [-0.5392378 , -0.09541202,  0.27990514, ..., -0.03263106,
         0.2800381 ,  0.22827052]], dtype=float32)

In [19]:
# Transform multi-labels into binary format
mlb = MultiLabelBinarizer()
y_train_encoded = mlb.fit_transform(y_train)
y_test_encoded = mlb.transform(y_test)

In [20]:
y_train_encoded

array([[0, 1, 0, 0],
       [0, 1, 0, 0],
       [0, 1, 0, 0],
       ...,
       [0, 0, 1, 0],
       [1, 0, 0, 0],
       [0, 0, 0, 1]])

In [21]:
# Train a multi-label classification model (e.g., RandomForestClassifier)
classifier = MultiOutputClassifier(RandomForestClassifier(n_estimators=100, random_state=42))
classifier.fit(X_train_embeddings, y_train_encoded)

In [22]:
# Predict emotions for the test set
y_pred = classifier.predict(X_test_embeddings)

In [23]:
# Evaluate the model
f1_micro = f1_score(y_test_encoded, y_pred, average='micro')
f1_macro = f1_score(y_test_encoded, y_pred, average='macro')

In [24]:
print(f'F1 Score (Micro): {f1_micro}')
print(f'F1 Score (Macro): {f1_macro}')

F1 Score (Micro): 0.29197080291970806
F1 Score (Macro): 0.28331520725635706


#### LSTM

In [27]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense




In [None]:
# Tokenize and pad sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)

max_sequence_length = max([len(seq) for seq in X_train_seq])
X_train_padded = pad_sequences(X_train_seq, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_seq, maxlen=max_sequence_length)

# Transform multi-labels into binary format
mlb = MultiLabelBinarizer()
y_train_encoded = mlb.fit_transform(y_train)
y_test_encoded = mlb.transform(y_test)

# Build LSTM model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=100, input_length=max_sequence_length))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(len(mlb.classes_), activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the LSTM model
model.fit(X_train_padded, y_train_encoded, epochs=10, batch_size=64, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test_padded, y_test_encoded)
print(f'Loss: {loss}')
print(f'Accuracy: {accuracy}')



Epoch 1/10


