# Sentiment Analysis Score

Predict the sentiment analysis label, using a deep learning model for each query/emotion inside the relations file

In [27]:
# Add project path to the PYTHONPATH

import os
import sys
from pathlib import Path

sys.path.append(Path(os.path.join(os.path.abspath(''), '../')).resolve().as_posix())

# Load Tokenizer

Import and load the tokenizer from a `.pickle` file

In [28]:
import pickle
from pathlib import Path

In [29]:
tokenizer_file = Path('../datasets/sentiment140/tokenizer.pickle').resolve()
with tokenizer_file.open('rb') as file:
    tokenizer = pickle.load(file)

# Load Model

Load the sentiment analysis model, using the saved weights

In [30]:
from tensorflow.keras.layers import Input, Embedding, GRU
from tensorflow.keras.layers import Dropout, GlobalMaxPooling1D
from tensorflow.keras.layers import Bidirectional, Dense
from tensorflow.keras.models import Sequential

In [31]:
input_dim = min(tokenizer.num_words, len(tokenizer.word_index) + 1)
embedding_dim = 200
input_length = 100
gru_units = 128
gru_dropout = 0.1
recurrent_dropout = 0.1
dropout = 0.1

In [32]:
model = Sequential()
model.add(Embedding(
    input_dim=input_dim,
    output_dim=embedding_dim,
    input_shape=(input_length,)
))

model.add(Bidirectional(GRU(
    gru_units,
    return_sequences=True,
    dropout=gru_dropout,
    recurrent_dropout=recurrent_dropout
)))
model.add(GlobalMaxPooling1D())
model.add(Dense(32, activation='relu'))
model.add(Dropout(dropout))

model.add(Dense(1, activation='sigmoid'))

In [33]:
weights_path = Path('../models/sentiment_analysis/gru_model.h5').resolve()
model.load_weights(weights_path.as_posix())

# Load Query Relations

Load the relations between queries and emotions from a `.json` file

In [34]:
import json

In [35]:
relations_path = Path('../query_relations.json')
with relations_path.open('r') as file:
    relations = json.load(file)

# Predict polarity

Predict the polarity of the texts, using the sentiment analysis model

In [36]:
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nlp import preprocess
from tqdm import tqdm
import pandas as pd
import numpy as np
import re

In [37]:
dataset_dir = Path('../datasets/tweepy').resolve()

In [38]:
data_dict = {}

query_dict = {
    'query': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': [],
    'emotion': []
}

dir_files = os.listdir(dataset_dir)

with tqdm(total=len(dir_files)) as t:
    for filename in dir_files:
        dataset = pd.read_csv(os.path.join(dataset_dir, filename))
        cleaned_texts = preprocess(dataset.text, quiet=True)

        query = re.findall(r'(#[^.]+|@.+@)', filename)[0].replace('@', ':')

        predict_sequences = [text.split() for text in cleaned_texts]
        list_tokenized_predict = tokenizer.texts_to_sequences(predict_sequences)
        x_predict = pad_sequences(list_tokenized_predict, maxlen=100)

        result = model.predict(x_predict)
        
        emotion = relations[query]
        query_dict['query'].append(query)
        query_dict['mean'].append(np.mean(result))
        query_dict['max'].append(np.amax(result))
        query_dict['min'].append(np.amin(result))
        query_dict['count'].append(len(dataset))
        query_dict['std'].append(np.std(result))
        query_dict['emotion'].append(emotion)

        if emotion in data_dict:
            data_dict[emotion] = np.concatenate([data_dict[emotion], result])
        else:
            data_dict[emotion] = result
        
        t.update()

100%|██████████| 17/17 [08:23<00:00, 39.10s/it]


# Print Results

Print the queries/emotions and the values

In [39]:
df = pd.DataFrame(data=query_dict)
for emotion in df.emotion.unique():
    display(df[df.emotion == emotion])

Unnamed: 0,query,mean,max,min,std,count,emotion
0,#angry,0.321497,0.952827,0.0012,0.235699,493,anger
6,#mad,0.386649,0.979122,0.00046,0.259777,414,anger
9,:face_with_steam_from_nose:,0.526053,0.996217,0.000765,0.270318,18000,anger
10,:face_with_symbols_on_mouth:,0.437529,0.995513,0.000696,0.259906,17999,anger
16,:pouting_face:,0.45432,0.992831,0.00144,0.260314,5200,anger


Unnamed: 0,query,mean,max,min,std,count,emotion
1,#anxious,0.46728,0.980756,0.001258,0.268581,806,fear
4,#fear,0.469079,0.972688,0.003774,0.244103,5358,fear
8,:face_screaming_in_fear:,0.534089,0.995192,0.00147,0.262059,11890,fear
12,:fearful_face:,0.491772,0.995513,0.000551,0.262854,17993,fear


Unnamed: 0,query,mean,max,min,std,count,emotion
2,#depressed,0.16111,0.770609,0.000267,0.18624,699,sadness
7,#sad,0.107017,0.855292,0.000269,0.16411,7092,sadness
14,:loudly_crying_face:,0.515492,0.996391,0.000596,0.284174,18000,sadness
15,:pensive_face:,0.458807,0.995513,0.000381,0.284172,18000,sadness


Unnamed: 0,query,mean,max,min,std,count,emotion
3,#excited,0.912459,0.99308,0.102013,0.111222,1162,joy
5,#joy,0.798238,0.993685,0.012623,0.164588,5602,joy
11,:face_with_tears_of_joy:,0.567992,0.99634,0.000534,0.258006,10900,joy
13,:grinning_face_with_smiling_eyes:,0.688579,0.996391,0.001183,0.242627,17996,joy


In [40]:
emotion_dict = {
    'emotion': [],
    'mean': [],
    'max': [],
    'min': [],
    'std': [],
    'count': []
}

for emotion, result in data_dict.items():
    emotion_dict['emotion'].append(emotion)
    emotion_dict['mean'].append(np.mean(result))
    emotion_dict['max'].append(np.amax(result))
    emotion_dict['min'].append(np.amin(result))
    emotion_dict['std'].append(np.std(result))
    emotion_dict['count'].append(len(result))
    
emotion_df = pd.DataFrame(data=emotion_dict)
display(emotion_df)

Unnamed: 0,emotion,mean,max,min,std,count
0,anger,0.475587,0.996217,0.00046,0.268171,42106
1,fear,0.501809,0.995513,0.000551,0.261131,36047
2,sadness,0.420382,0.996391,0.000267,0.304309,43791
3,joy,0.676242,0.996391,0.000534,0.250057,35660
