# Sentiment Analysis on The Social Dilemma [Recurrent Neural Network]

### Importing the libraries

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.utils import to_categorical


from tensorflow.keras.preprocessing.text import Tokenizer  #for tokenization
from sklearn.metrics import accuracy_score,confusion_matrix,precision_score,recall_score  #metrics
import seaborn as sns
from tensorflow.keras.preprocessing.sequence import pad_sequences
plt.style.use('ggplot') #template/style for background for plots as ggplot

### Loading the Data

In [2]:
train_df=pd.read_csv(r"C:\Users\HELLO\Desktop\New folder (2)\DL + AI Exam Paper\Dataset\RNN\tweets_train.csv")
test_df=pd.read_csv(r"C:\Users\HELLO\Desktop\New folder (2)\DL + AI Exam Paper\Dataset\RNN\tweets_test.csv")

In [3]:
train_df.head()

Unnamed: 0,user_name,user_location,user_description,user_created,user_followers,user_friends,user_favourites,user_verified,date,hashtags,source,is_retweet,clean_text,Sentiment
0,Romy 👑,Bolton - England,"Tables turn, bridges burn, you live and learn.",2009-06-15 09:00:39,525,896,3854,False,2020-09-19 15:19:32,['TheSocialDilemma'],Twitter for Android,False,TheSocialDilemma is an eye opener isn t it ple...,Neutral
1,TLynn Peterson,"Black Canyon City, Arizona",Acquired disability ♿ after an accident. Livin...,2013-05-29 00:17:46,5045,5374,48152,False,2020-09-13 00:31:46,['TheSocialDilemma'],Twitter for Android,False,TheSocialDilemma If we don t agree on what is ...,Positive
2,Rutger Kosters,Netherlands,Cloud Solution Architect @NetApp | VCDX #209 |...,2014-10-07 09:34:10,574,251,1070,False,2020-09-09 21:10:33,['TheSocialDilemma'],Twitter for Android,False,Watching TheSocialDilemma scary to see social ...,Negative
3,IDFWU🗯🇿🇼🇿🇦,"East London, South Africa","YOU HAVE OPTIONS, YOU CAN’T JUST TAKE WHAT LIF...",2012-08-22 11:09:06,706,618,1320,False,2020-09-11 04:33:08,,Twitter for Android,False,You check your social media before you pee in ...,Positive
4,Treebel,,,2010-09-25 07:25:31,9,13,55,False,2020-09-17 16:27:38,['thesocialdilemma'],Twitter for iPhone,False,watch thesocialdilemma and see what s actually...,Negative


In [4]:
test_df.head()

Unnamed: 0,user_name,user_location,user_description,user_created,user_followers,user_friends,user_favourites,user_verified,date,hashtags,source,is_retweet,clean_text
0,Doug Webb,Probably Europe,"I develop methods for effective cooperation, w...",2015-12-12 15:22:58,157,228,496,False,2020-09-15 22:53:36,"['theSocialDilemma', 'joinMastodon']",Twitter Web App,False,Watch theSocialDilemma then joinMastodon
1,Dan Reynish,"Medicine Hat, Alberta",News Anchor at Noon and 5 pm on CHAT TV. Canad...,2011-09-10 12:17:10,1722,2500,13912,False,2020-09-12 04:43:03,['WeTheNorth'],Twitter Web App,False,With the 2019 2020 NBA season officially over ...
2,🌤,,,2011-06-01 02:23:57,880,585,205,False,2020-09-19 07:06:53,['TheSocialDilemma'],Twitter for iPhone,False,if you want to really know about people go tal...
3,“IT IS WHAT IT IS.”🇰🇪🇳🇿🇺🇸,WA,Heliophile🌞☀️• #Rugby • Wildlifer • #HIV • #So...,2009-04-17 13:35:02,1143,2692,11999,False,2020-09-12 18:28:18,"['TheSocialDilemma', 'Netflix']",Twitter for iPhone,False,Have you watched TheSocialDilemma on Netflix
4,Mahima Sood,,Amy Dunne X Holly Golightly || Data Scientist ...,2018-01-26 21:57:22,18,65,263,False,2020-09-12 18:48:53,['TheSocialDilemma'],Twitter for iPhone,False,Highly recommend TheSocialDilemma on


### Mapped sentiment labels to numerical values.

In [5]:
label_map = {'positive': 0, 'neutral': 1, 'negative': 2}


unexpected_values = train_df[~train_df['Sentiment'].isin(label_map.keys())]
print("Rows with unexpected sentiment labels:\n", unexpected_values)


Rows with unexpected sentiment labels:
                          user_name               user_location  \
0                           Romy 👑            Bolton - England   
1                   TLynn Peterson  Black Canyon City, Arizona   
2                   Rutger Kosters                 Netherlands   
3                       IDFWU🗯🇿🇼🇿🇦   East London, South Africa   
4                          Treebel                         NaN   
...                            ...                         ...   
16995             Ra'Chelle Rogers       Philadelphia/New York   
16996                   Mike Lynch             Boston, MA, USA   
16997  James Baldwin was a genius.            Terra-Belle, USA   
16998   Johan - SocialMediaBreakup        Dublin City, Ireland   
16999                attilacsordas               Cambridge, UK   

                                        user_description         user_created  \
0         Tables turn, bridges burn, you live and learn.  2009-06-15 09:00:39   
1    

In [6]:
label_map.update({
    'Positive': 0,
    'Neutral': 1,
    'Negative': 2
})
# Defining the label map i.e. classes
# Map sentiment labels to integers
train_df['Sentiment'] = train_df['Sentiment'].map(label_map)


### Handled any NaN values.

In [7]:
print("Number of NaNs after mapping:", train_df['Sentiment'].isnull().sum())

# Print rows with NaN sentiment labels to understand the issue
nan_sentiment_rows = train_df[train_df['Sentiment'].isnull()]
print("Rows with NaN sentiment labels after mapping:\n", nan_sentiment_rows)


Number of NaNs after mapping: 0
Rows with NaN sentiment labels after mapping:
 Empty DataFrame
Columns: [user_name, user_location, user_description, user_created, user_followers, user_friends, user_favourites, user_verified, date, hashtags, source, is_retweet, clean_text, Sentiment]
Index: []


#### There are no NaN values in Sentiment in training df

In [9]:
train_labels = to_categorical(train_df['Sentiment'])

### Tokenized and padded the text data for input into the RNN model.

In [10]:
train_df['clean_text'] = train_df['clean_text'].astype(str) # ensuring all entries are strings
test_df['clean_text'] = test_df['clean_text'].astype(str)

print(train_df['clean_text'].apply(type).unique())  # Printing the types
print(test_df['clean_text'].apply(type).unique())


[<class 'str'>]
[<class 'str'>]


In [11]:
max_words = 20000
max_len = 100

tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(train_df['clean_text'])


In [12]:
train_sequences = tokenizer.texts_to_sequences(train_df['clean_text'])
test_sequences = tokenizer.texts_to_sequences(test_df['clean_text'])


In [13]:
train_padded = pad_sequences(train_sequences, maxlen=max_len)
test_padded = pad_sequences(test_sequences, maxlen=max_len)


## Creating and Compiling the model

In [14]:

model = Sequential()
model.add(Embedding(input_dim=max_words, output_dim=128, input_length=max_len))
model.add(SimpleRNN(64))
model.add(Dense(3, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [16]:
history = model.fit(train_padded, train_labels, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 82ms/step - accuracy: 0.6117 - loss: 0.8660 - val_accuracy: 0.8400 - val_loss: 0.4328
Epoch 2/10
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 79ms/step - accuracy: 0.9100 - loss: 0.2597 - val_accuracy: 0.8691 - val_loss: 0.3759
Epoch 3/10
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 90ms/step - accuracy: 0.9770 - loss: 0.0769 - val_accuracy: 0.8468 - val_loss: 0.4892
Epoch 4/10
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 90ms/step - accuracy: 0.9894 - loss: 0.0384 - val_accuracy: 0.8724 - val_loss: 0.4678
Epoch 5/10
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 81ms/step - accuracy: 0.9959 - loss: 0.0168 - val_accuracy: 0.8518 - val_loss: 0.6022
Epoch 6/10
[1m425/425[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 76ms/step - accuracy: 0.9930 - loss: 0.0252 - val_accuracy: 0.8503 - val_loss: 0.6038
Epoch 7/10
[1m4

In [18]:
test_predictions = model.predict(test_padded)
test_pred_labels = test_predictions.argmax(axis=1)
reverse_label_map = {0: 'positive', 1: 'neutral', 2: 'negative'}
test_pred_sentiments = [reverse_label_map[label] for label in test_pred_labels]
test_df['Predicted_Sentiment'] = test_pred_sentiments

[1m96/96[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step


### Validation Accuracy

In [19]:
val_accuracy = history.history['val_accuracy'][-1]
print(f"Validation Accuracy: {val_accuracy:.4f}")

Validation Accuracy: 0.8629


In [20]:
Sentiment_mapping = {'Positive': 0, 'Neutral': 1, 'Negative': 2}
Y_train = train_df['Sentiment'].map(Sentiment_mapping)

In [21]:
test_pred_sentiments

['neutral',
 'neutral',
 'positive',
 'neutral',
 'positive',
 'positive',
 'positive',
 'neutral',
 'positive',
 'positive',
 'positive',
 'neutral',
 'positive',
 'positive',
 'negative',
 'negative',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'negative',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'positive',
 'neutral',
 'positive',
 'positive',
 'negative',
 'negative',
 'positive',
 'positive',
 'positive',
 'neutral',
 'negative',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'positive',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'neutral',
 'neutral',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'positive',
 'positive',
 'positive',
 'positive',
 'neutral',
 'neutral',
 'positive',
 'positive',
 'positive',
 'neutral',
 'neutral',
 'neutral',
 'positive',
 'positive',
 'negative',
 'positive',
 'positive',
 'negative',
 'neutral',
 'neutral',
 'positive',
 'positive',
 'negative',
 'neutral',
 'pos

In [22]:
test_df['Predicted_Sentiment'] = test_pred_sentiments
test_pred_df = test_df.copy()
print(test_pred_df.head())

                   user_name          user_location  \
0                  Doug Webb        Probably Europe   
1                Dan Reynish  Medicine Hat, Alberta   
2                          🌤                    NaN   
3  “IT IS WHAT IT IS.”🇰🇪🇳🇿🇺🇸                     WA   
4                Mahima Sood                    NaN   

                                    user_description         user_created  \
0  I develop methods for effective cooperation, w...  2015-12-12 15:22:58   
1  News Anchor at Noon and 5 pm on CHAT TV. Canad...  2011-09-10 12:17:10   
2                                                NaN  2011-06-01 02:23:57   
3  Heliophile🌞☀️• #Rugby • Wildlifer • #HIV • #So...  2009-04-17 13:35:02   
4  Amy Dunne X Holly Golightly || Data Scientist ...  2018-01-26 21:57:22   

   user_followers  user_friends  user_favourites  user_verified  \
0             157           228              496          False   
1            1722          2500            13912          False   
2    

In [23]:
test_pred_df 

Unnamed: 0,user_name,user_location,user_description,user_created,user_followers,user_friends,user_favourites,user_verified,date,hashtags,source,is_retweet,clean_text,Predicted_Sentiment
0,Doug Webb,Probably Europe,"I develop methods for effective cooperation, w...",2015-12-12 15:22:58,157,228,496,False,2020-09-15 22:53:36,"['theSocialDilemma', 'joinMastodon']",Twitter Web App,False,Watch theSocialDilemma then joinMastodon,neutral
1,Dan Reynish,"Medicine Hat, Alberta",News Anchor at Noon and 5 pm on CHAT TV. Canad...,2011-09-10 12:17:10,1722,2500,13912,False,2020-09-12 04:43:03,['WeTheNorth'],Twitter Web App,False,With the 2019 2020 NBA season officially over ...,neutral
2,🌤,,,2011-06-01 02:23:57,880,585,205,False,2020-09-19 07:06:53,['TheSocialDilemma'],Twitter for iPhone,False,if you want to really know about people go tal...,positive
3,“IT IS WHAT IT IS.”🇰🇪🇳🇿🇺🇸,WA,Heliophile🌞☀️• #Rugby • Wildlifer • #HIV • #So...,2009-04-17 13:35:02,1143,2692,11999,False,2020-09-12 18:28:18,"['TheSocialDilemma', 'Netflix']",Twitter for iPhone,False,Have you watched TheSocialDilemma on Netflix,neutral
4,Mahima Sood,,Amy Dunne X Holly Golightly || Data Scientist ...,2018-01-26 21:57:22,18,65,263,False,2020-09-12 18:48:53,['TheSocialDilemma'],Twitter for iPhone,False,Highly recommend TheSocialDilemma on,positive
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3063,𝘔𝘶𝘳𝘪;🦋,𝘶𝘯𝘥𝘦𝘳 𝘵𝘩𝘦 𝘱𝘢𝘭𝘮𝘴🌴,𝘸𝘪𝘧𝘦 // 𝘤𝘢𝘵 𝘮𝘰𝘮 // 𝘱𝘴𝘺𝘤𝘩𝘰𝘭𝘰𝘨𝘪𝘴𝘵; 🧠 𝘣𝘦𝘢𝘤𝘩𝘣𝘶𝘮;🌴,2009-07-29 03:18:16,942,357,9917,False,2020-09-14 12:27:35,['TheSocialDilemma'],Twitter for iPhone,False,Everybody should watch TheSocialDilemma,neutral
3064,Didi Crawford,"Birmingham, England",Traveller | Blogger | Currently growing a huma...,2011-12-06 09:24:20,468,1095,3440,False,2020-09-22 19:25:48,['TheSocialDilemma'],Twitter for iPhone,False,Enjoyed TheSocialDilemma Then read this,neutral
3065,Ritwik Arora,,"Spent the last 30 minutes writing this bio, ch...",2020-10-13 01:12:06,10,137,106,False,2020-10-13 02:41:26,['TheSocialDilemma'],Twitter for Android,False,Couldn t stop thinking about docu TheSocialDil...,negative
3066,Nishant Mody,Mumbai,"I love music, dogs, nature, politics & marketi...",2012-12-30 04:58:15,174,541,5482,False,2020-09-10 17:19:44,['TheSocialDilemma'],Twitter for iPhone,False,This was the best one by on TheSocialDilemma on,positive


### This project showcases the application of an RNN for sentiment analysis on Twitter data, achieving meaningful insights and predictions from social media text.