In [5]:
import pandas as pd
import numpy as np
from pathlib import Path
from glob import glob
import re
import emoji
import pickle
from scipy.stats import pearsonr,spearmanr

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer, PorterStemmer
from nltk.tokenize import word_tokenize

from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.model_selection import train_test_split

import statsmodels.api as sm

from gensim.models.word2vec import Word2Vec
import gensim.downloader as downloader_api

import tensorflow as tf
from tensorflow.keras.backend import clear_session
from tensorflow.keras.models import load_model, save_model
from tensorflow.keras import Sequential, Input #type:ignore
from tensorflow.keras.layers import Dense, SimpleRNN, LSTM, GRU, TextVectorization, Embedding # type:ignore

# word2vec_model = downloader_api.load('word2vec-google-news-300')
# glove_model = downloader_api.load('glove-wiki-gigaword-100')

In [22]:
from gensim.models import KeyedVectors


word2vec_model = KeyedVectors.load(r'models\artifacts\word2vec.kv')
glove_model = KeyedVectors.load(r'models\artifacts\glove.kv')

In [21]:
# word2vec_model.save(r'models\artifacts\word2vec.kv')
# glove_model.save(r'models\artifacts\glove.kv')

NameError: name 'word2vec_model' is not defined

## Fetch the data paths for different types of data provided

In [8]:
anger_path = glob('Emotion Intensity Data\Anger\*')
anger_path

['Emotion Intensity Data\\Anger\\anger_dev.txt',
 'Emotion Intensity Data\\Anger\\anger_test.txt',
 'Emotion Intensity Data\\Anger\\anger_train.txt']

In [9]:
fear_path = glob('Emotion Intensity Data\Fear\*')
fear_path

['Emotion Intensity Data\\Fear\\fear_dev.txt',
 'Emotion Intensity Data\\Fear\\fear_test.txt',
 'Emotion Intensity Data\\Fear\\fear_train.txt']

In [10]:
joy_path = glob('Emotion Intensity Data\Joy\*')
joy_path

['Emotion Intensity Data\\Joy\\joy_dev.txt',
 'Emotion Intensity Data\\Joy\\joy_test.txt',
 'Emotion Intensity Data\\Joy\\joy_train.txt']

In [11]:
sadness_path = glob('Emotion Intensity Data\Sadness\*')
sadness_path

['Emotion Intensity Data\\Sadness\\sadness_dev.txt',
 'Emotion Intensity Data\\Sadness\\sadness_test.txt',
 'Emotion Intensity Data\\Sadness\\sadness_train.txt']

## Load the data

In [12]:
def data_loader(file_paths: list):
    train_dataframe_0 = pd.read_csv(file_paths[0], sep='\t', header=None, names=['ID','Comment','Emotion','Intensity'])
    train_dataframe_1 = pd.read_csv(file_paths[2], sep='\t', header=None, names=['ID','Comment','Emotion','Intensity'])
    train_df = pd.concat([train_dataframe_1,train_dataframe_0]).reset_index(drop=True)
    test_df = pd.read_csv(file_paths[1], sep='\t', header=None, names=['ID','Comment','Emotion','Intensity'])
    return (train_df, test_df)

### Angry Data

In [13]:
angry_train_df, angry_test_df = data_loader(anger_path)

### Fear Data

In [14]:
fear_train_df, fear_test_df = data_loader(fear_path)

### Joy Data

In [15]:
joy_train_df, joy_test_df = data_loader(joy_path)

### Sadness Data

In [16]:
sadness_train_df, sadness_test_df = data_loader(sadness_path)

## Text Preprocessing
- Convertion to lowercase
- Removal of:
    - Whitespace charecters
    - All other Special Charecters
    - Stopwords
    - Emojis
- Tokenization, Stemming, Lemmatization
- Feature Extraction of stemmed and lemmatized text using:
    - Bag of Words
    - TF/IDF
    - Word2Vec
    - GloVe

We will create a class that does this job overall and returns the stemmed and lemmatized data that has undergone all the feature extraction techniques mentioned above. Therefore there will be a total of 8 types of data.

In [17]:
class text_preprocess():
    def __init__(self) -> None:
        pass
    
    def preprocess(self, df: pd.DataFrame, return_df: bool = False):
        stem_bow_vectorizer = CountVectorizer()
        lemmatize_bow_vectorizer = CountVectorizer()
        stemmed_tf_idf_vectorizer = TfidfVectorizer()
        lemmatized_tf_idf_vectorizer = TfidfVectorizer()
        
        df['Cleaned_text'] = df['Comment'].str.lower()                              # convert to lowercase
        df['Cleaned_text'] = df['Cleaned_text'].apply(self.remove_whitespace)       # removes whitespace charecters
        df['Cleaned_text'] = df['Cleaned_text'].apply(self.remove_special_chars)    # removes all other special charecters
        df['Cleaned_text'] = df['Cleaned_text'].apply(self.remove_stopwords)        # removes stopwords        
        df['Cleaned_text'] = df['Cleaned_text'].apply(self.remove_emoji)            # removes emojis

        df['Stemmed_text'] = df['Cleaned_text'].apply(self.tokenize_stem)           # applies tokenization & stemming on cleaned text
        df['Lemmatized_text'] = df['Cleaned_text'].apply(self.tokenize_lemmatize)   # applies tokenization & lemmatization on cleaned text
        
        stemmed_bow = pd.DataFrame(stem_bow_vectorizer.fit_transform(df['Stemmed_text']).toarray())             # Stemmed data - BOW
        lemmatized_bow = pd.DataFrame(lemmatize_bow_vectorizer.fit_transform(df['Lemmatized_text']).toarray())  # Lemmatized data - BOW
        
        stemmed_tf_idf = pd.DataFrame(stemmed_tf_idf_vectorizer.fit_transform(df['Stemmed_text']).toarray())    # Stemmed data - TF/IDF
        lemmatized_tf_idf = pd.DataFrame(lemmatized_tf_idf_vectorizer.fit_transform(df['Lemmatized_text']).toarray()) # Lemmatized data - TF/IDF

        # load the word2vec model & vectorize the sentence
        stemmed_word2vec = pd.DataFrame(np.vstack(df['Stemmed_text'].apply(lambda x: self.vec_converter(sentence = x, 
                                                                                                           keyedvector = word2vec_model))))
        lemmatized_word2vec = pd.DataFrame(np.vstack(df['Lemmatized_text'].apply(lambda x: self.vec_converter(sentence = x, 
                                                                                                                 keyedvector = word2vec_model))))
        
        # load the glove model & vectorize the sentence
        stemmed_glove = pd.DataFrame(np.vstack(df['Stemmed_text'].apply(lambda x: self.vec_converter(sentence = x, 
                                                                                                           keyedvector = glove_model))))
        lemmatized_glove = pd.DataFrame(np.vstack(df['Lemmatized_text'].apply(lambda x: self.vec_converter(sentence = x, 
                                                                                                                 keyedvector = glove_model))))
        
        if return_df is True:
            return (df)
        else:
            self.save(stem_bow_vectorizer,r'models\artifacts\stem_bow_vectorizer.pkl')
            self.save(lemmatize_bow_vectorizer,r'models\artifacts\lemmatize_bow_vectorizer.pkl')
            self.save(stemmed_tf_idf_vectorizer,r'models\artifacts\stem_tf_idf_vectorizer.pkl')
            self.save(lemmatized_tf_idf_vectorizer,r'models\artifacts\lemmatize_tf_idf_vectorizer.pkl')
            return((stemmed_bow, lemmatized_bow, stemmed_tf_idf, lemmatized_tf_idf, 
                   stemmed_word2vec, lemmatized_word2vec, stemmed_glove, lemmatized_glove))
    
    def remove_whitespace(self, text:str):
        pattern = r'\\[tnr\x0b\x0c]'
        text = re.sub(pattern, ' ', text)
        return text
    
    def remove_special_chars(self, text:str):
        text = re.sub(pattern="[^a-zA-Z0-9]",
                    repl=" ",
                    string=text)
        text = re.sub(pattern="\s+",
                    repl=" ",
                    string=text)
        return text
    
    def remove_stopwords(self, text:str):
        stopwords_ = stopwords.words('english')
        stopwords_.extend(["i'm","im","u"])
        return(" ".join([word for word in text.split() if word not in stopwords_]))
    
    def remove_emoji(self, text:str):
        return(emoji.replace_emoji(text,""))
    
    def tokenize_stem(self, sentence:str):
        stemmer = PorterStemmer()
        tokenized_sentence = word_tokenize(sentence)
        stemmed_tokens = [stemmer.stem(word) for word in tokenized_sentence]
        stemmed_sentence = " ".join(stemmed_tokens)
        return stemmed_sentence
    
    def tokenize_lemmatize(self, sentence:str):
        lemmatizer = WordNetLemmatizer()
        tokenized_sentence = word_tokenize(sentence)
        lemmatized_tokens = [lemmatizer.lemmatize(word) for word in tokenized_sentence]
        lemmatized_sentence = " ".join(lemmatized_tokens)
        return lemmatized_sentence
    
    def vec_converter(self, sentence:str, keyedvector):
        tokenized_sentence = word_tokenize(sentence)
        vector_token = [keyedvector[word] for word in tokenized_sentence if word in keyedvector] #this is where we convert the tokens into vectors
        vector = np.mean(vector_token,axis=0) if vector_token else np.zeros(keyedvector.vector_size)
        return vector
    
    def save(self, object, filepath):
        with open(filepath, 'wb') as file:
            pickle.dump(object, file)

    def load(self, filepath):
        with open(filepath, 'rb') as file:
            object = pickle.load(file)
        return (object)

In [18]:
preprocessor_obj = text_preprocess()

## Evaluation

In [19]:
class evaluate():
    def __init__(self) -> None:
        pass

    def eval(self, y_true, y_pred):
        pearsonr_ = pearsonr(y_true, y_pred)
        spearmanr_ = spearmanr(y_true, y_pred)
        return ({"Pearson":pearsonr_.statistic,"Spearman":spearmanr_.statistic})
    
eval_obj = evaluate()

## Modelling
We will use two types of models:
- Statistical models
- Deep Learning models

### Statistical Models
We will use the following statistical models from the statsmodels library
- OLS (Ordinary Least Squares)
- GLS (Generalized Least Squares)
- WLS (Weighted Least Squares)
- GLM (Generalized Linear Model)

In [16]:
class statmodels():
    def __init__(self) -> None:
        pass

    # common function for all the selected statsmodels
    def stats_models(self, X, y, model_name:str, data_name:str, df_data_name:str):
        X = sm.add_constant(X)
        x_train,x_test,y_train,y_test = train_test_split(X, y, train_size=0.75, random_state=42)

        # if model_name in 'OLS':
        #     model = sm.OLS(endog=y_train,
        #                 exog=x_train)
        # elif model_name in "GLS":
        #     model = sm.GLS(endog=y_train,
        #                 exog=x_train)
        # elif model_name == "WLS":
        #     model = sm.WLS(endog=y_train,
        #                 exog=x_train)
        # elif model_name == "GLM":
        #     model = sm.GLM(endog=y_train,
        #                    exog=x_train)

        model = sm.OLS(endog=y_train, exog=x_train)
        result = model.fit()
        y_pred = result.predict(x_test)
        result.save(f'models\statsmodels\{model_name}_{data_name}_{df_data_name}.pkl') # saving the instance of model for later usage
        result = eval_obj.eval(y_test,y_pred)
        return result
    
    def stats_models_predict(self, X: pd.DataFrame, model,):
        X = sm.add_constant(X)
        y_pred = model.predict(X)
        return y_pred
    
    def model_report(self,df: pd.DataFrame, df_data_name_: str):
        processed_data = (stemmed_bow, lemmatized_bow, stemmed_tf_idf, lemmatized_tf_idf, 
                          stemmed_word2vec, lemmatized_word2vec, stemmed_glove, lemmatized_glove) = preprocessor_obj.preprocess(df)
        data_names = ['Stem_BOW','Lemmatize_BOW','Stem_TF_IDF','Lemmatize_TF_IDF',
                    'Stem_Word2Vec','Lemmatize_Word2Vec','Stem_GloVe','Lemmatize_GloVe']
        stats_models_names = ['OLS']#,'GLS','WLS','GLM']
        stats_report = {}
        for model_name in stats_models_names:
            stats_report[model_name] = {}
            for i in range(len(processed_data)):  
                stats_report[model_name][data_names[i]] = self.stats_models(processed_data[i], df['Intensity'], 
                                                                            model_name=model_name,data_name=data_names[i],
                                                                            df_data_name = df_data_name_)
        return stats_report['OLS']
    
stat_model = statmodels()

In [17]:
def statsmodel_test_prediction(df: pd.DataFrame, model_name: str, stem_or_lemma: str,
                               feature_extraction_name: str, data_name: str):
    with open(f'models\statsmodels\{model_name}_{stem_or_lemma}_{feature_extraction_name}_{data_name}.pkl','rb') as file:
        model_result = pickle.load(file)
    processed_df = preprocessor_obj.preprocess(df, return_df=True)
    if stem_or_lemma in 'Stem':
        if feature_extraction_name in 'BOW':
            vectorizer = preprocessor_obj.load(r'models\artifacts\stem_bow_vectorizer.pkl')
            stemmed_df = pd.DataFrame(vectorizer.transform(processed_df['Stemmed_text']).toarray())
        elif feature_extraction_name in 'TF_IDF':
            vectorizer = preprocessor_obj.load(r'models\artifacts\stem_tf_idf_vectorizer.pkl')
            stemmed_df = pd.DataFrame(vectorizer.transform(processed_df['Stemmed_text']).toarray())
        elif feature_extraction_name in 'Word2Vec':
            stemmed_df = pd.DataFrame(np.vstack(processed_df['Stemmed_text'].apply(lambda x: preprocessor_obj.vec_converter(sentence = x, 
                                                                                                   keyedvector = word2vec_model))))
        elif feature_extraction_name in 'GloVe':
            stemmed_df = pd.DataFrame(np.vstack(processed_df['Stemmed_text'].apply(lambda x: preprocessor_obj.vec_converter(sentence = x, 
                                                                                                   keyedvector = glove_model))))
        else:
            raise ValueError(f"Unknown feature extraction name: {feature_extraction_name}")
        y_pred = stat_model.stats_models_predict(stemmed_df,model_result)

    elif stem_or_lemma in 'Lemmatize':
        if feature_extraction_name in 'BOW':
            vectorizer = preprocessor_obj.load(r'models\artifacts\lemmatize_bow_vectorizer.pkl')
            lemmatized_df = pd.DataFrame(vectorizer.transform(processed_df['Lemmatized_text']).toarray())
        elif feature_extraction_name in 'TF_IDF':
            vectorizer = preprocessor_obj.load(r'models\artifacts\lemmatize_tf_idf_vectorizer.pkl')
            lemmatized_df = pd.DataFrame(vectorizer.transform(processed_df['Lemmatized_text']).toarray())
        elif feature_extraction_name in 'Word2Vec':
            lemmatized_df = pd.DataFrame(np.vstack(processed_df['Lemmatized_text'].apply(lambda x: preprocessor_obj.vec_converter(sentence = x, 
                                                                                                   keyedvector = word2vec_model))))
        elif feature_extraction_name in 'GloVe':
            lemmatized_df = pd.DataFrame(np.vstack(processed_df['Lemmatized_text'].apply(lambda x: preprocessor_obj.vec_converter(sentence = x, 
                                                                                                   keyedvector = glove_model))))
        else:
            raise ValueError(f"Unknown feature extraction name: {feature_extraction_name}")
        y_pred = stat_model.stats_models_predict(lemmatized_df,model_result)

    return (y_pred)

In [18]:
fear_stat_report = pd.DataFrame(stat_model.model_report(fear_train_df.copy(),'Fear'))

In [20]:
fear_stat_report.T

Unnamed: 0,Pearson,Spearman
Stem_BOW,0.479338,0.543866
Lemmatize_BOW,0.468752,0.521532
Stem_TF_IDF,0.471711,0.510255
Lemmatize_TF_IDF,0.535501,0.590652
Stem_Word2Vec,0.314629,0.329086
Lemmatize_Word2Vec,0.534867,0.538858
Stem_GloVe,0.340748,0.35514
Lemmatize_GloVe,0.469166,0.481395


In [21]:
y_pred_ = statsmodel_test_prediction(fear_test_df,
                                    'OLS',
                                    'Lemmatize',
                                    'GloVe',
                                    'Fear')
result = eval_obj.eval(fear_test_df['Intensity'],y_pred_)

In [1]:
data_type = input("Test on Stem/Lemmatized test data: ")

In [2]:
data_type

'stem'

In [22]:
result

{'Pearson': 0.5224373248449986, 'Spearman': 0.5064857798464922}

### Deep Learning Models
We will use the following models:
- RNN (Recurrent Neural Networks)
- LSTM (Long Short Term Memory)
- GRU (Gated Recurrent Unit)

In [7]:
class nn_models():
    def __init__(self) -> None:
        # The maximum length of a sentence that can be accepted by the model:
        #<<<< max(len(data) for data in train_df['Lemmatized_text']) = 127 >>>>#
        # Running the above code on Lemmatized data tells us that the maximum length of a sentence in lemmatized text is 127. 
        # Hence we will round it to 130. 
        self.sequence_length = 130
        self.nn_models = [SimpleRNN, LSTM, GRU]
        self.nn_model_names = ['RNN','LSTM','GRU']
        self.data_type_names = ['Stemmed','Lemmatized']

    def data_splitter(self,X,y):
        x_train,x_test,y_train,y_test = train_test_split(X, y, train_size=0.75, random_state=42)
        return x_train,x_test,y_train,y_test
    
    def stem_data(self, df: pd.DataFrame):
        #split the data into train and test to avoid data leakage
        x_train_stemmed,x_test_stemmed,y_train_stemmed,y_test_stemmed = self.data_splitter(df['Stemmed_text'],df['Intensity'])

        #perform tokenization using TextVectorization
        text_vectorizer_stemmed = TextVectorization(output_sequence_length=self.sequence_length)
        text_vectorizer_stemmed.adapt(df['Stemmed_text'])
        # vocab_size = text_vectorizer_stemmed.vocabulary_size()

        # #Get the data in tensor format
        # x_train_stemmed = text_vectorizer_stemmed(x_train)
        # x_test_stemmed = text_vectorizer_stemmed(x_test)
        # y_train_stemmed = tf.convert_to_tensor(y_train)
        # y_test_stemmed = tf.convert_to_tensor(y_test)
        
        # save(text_vectorizer_stemmed, r"models\artifacts\text_vectorizer_stemmed")

        return (x_train_stemmed, x_test_stemmed, y_train_stemmed, y_test_stemmed, text_vectorizer_stemmed)
    
    def lemmatize_data(self, df: pd.DataFrame):
        #split the data into train and test to avoid data leakage
        x_train_lemmatized,x_test_lemmatized,y_train_lemmatized,y_test_lemmatized = self.data_splitter(df['Lemmatized_text'],df['Intensity'])

        #perform tokenization using TextVectorization
        text_vectorizer_lemmatized = TextVectorization(output_sequence_length=self.sequence_length)
        text_vectorizer_lemmatized.adapt(df['Lemmatized_text'])
        # vocab_size = text_vectorizer_lemmatized.vocabulary_size()

        # #Get the data in tensor format
        # x_train_lemmatized = text_vectorizer_lemmatized(x_train)
        # x_test_lemmatized = text_vectorizer_lemmatized(x_test)
        # y_train_lemmatized = tf.convert_to_tensor(y_train)
        # y_test_lemmatized = tf.convert_to_tensor(y_test)

        # save(text_vectorizer_lemmatized, r"models\artifacts\text_vectorizer_lemmatized")

        return (x_train_lemmatized, x_test_lemmatized, y_train_lemmatized, y_test_lemmatized, text_vectorizer_lemmatized)
    

    def model_builder(self, model_, text_vectorizer):
        model = Sequential()
        model.add(Input(shape=(1,), dtype=tf.string))
        model.add(text_vectorizer)
        model.add(Embedding(input_dim=text_vectorizer.vocabulary_size(),
                        output_dim=64,
                        input_length=self.sequence_length))
        model.add(model_(64,return_sequences=True))
        model.add(model_(32))
        model.add(Dense(1))
        
        model.compile(optimizer='adam',loss='mean_squared_error')
        return (model)
    
    def load_nn_model(self,file_path:str):
        return load_model(file_path)
    
    def nn_model_report(self, df: pd.DataFrame, df_data_name_: str):
        df = preprocessor_obj.preprocess(df, return_df=True)
        (x_train_stemmed, x_test_stemmed, y_train_stemmed, y_test_stemmed, text_vectorizer_stemmed) = self.stem_data(df)
        (x_train_lemmatized, x_test_lemmatized, y_train_lemmatized, y_test_lemmatized, text_vectorizer_lemmatized) = self.lemmatize_data(df)
        text_vectorizers = [text_vectorizer_stemmed, text_vectorizer_lemmatized]
        train_data = [[x_train_stemmed,y_train_stemmed],[x_train_lemmatized,y_train_lemmatized]]
        test_data = [[x_test_stemmed,y_test_stemmed],[x_test_lemmatized,y_test_lemmatized]]

        stats_report = {}
        for x in range(len(self.nn_models)):
            clear_session()
            model = self.model_builder(self.nn_models[x], text_vectorizers[0])
            stats_report[self.nn_model_names[x]] = {}
            for k,(i,j) in enumerate(zip(train_data,test_data)):
                print(model.summary(),'\n')
                model.fit(x=tf.convert_to_tensor(i[0]), 
                        y=tf.convert_to_tensor(i[1]),
                        epochs=20,
                        validation_data=(tf.convert_to_tensor(j[0]),tf.convert_to_tensor(j[1])))
                model.save(f'models\deep_learning_models\{self.nn_model_names[x]}_{self.data_type_names[k]}_{df_data_name_}.tf')
                # rmse = pow(model.get_metrics_result()['loss'].numpy(),0.5)
                y_pred = model.predict(tf.convert_to_tensor(j[0]))
                result = eval_obj.eval(j[1],y_pred.flatten())
                stats_report[self.nn_model_names[x]][self.data_type_names[k]] = result
                clear_session()
                model = self.model_builder(self.nn_models[x],text_vectorizers[1])  
        return stats_report
    
    def nn_models_test_prediction(self, model_filepath:str, df: pd.DataFrame, stem_or_lemma:str):
        df = preprocessor_obj.preprocess(df, return_df=True)
        model = self.load_nn_model(model_filepath)
        if stem_or_lemma == 'Stem':
            y_pred = model.predict(tf.convert_to_tensor(df['Stemmed_text']))
        elif stem_or_lemma == 'Lemmatize':
            y_pred = model.predict(tf.convert_to_tensor(df['Lemmatized_text']))
        else:
            raise ValueError(f"Unknown data name: {stem_or_lemma}")
        return y_pred

nn_models_obj = nn_models()

In [29]:
nn_report = nn_models_obj.nn_model_report(fear_train_df.copy(),'Fear')

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVe  (None, 130)               0         
 ctorization)                                                    
                                                                 
 embedding (Embedding)       (None, 130, 64)           249472    
                                                                 
 simple_rnn (SimpleRNN)      (None, 130, 64)           8256      
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 32)                3104      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 260865 (1019.00 KB)
Trainable params: 260865 (1019.00 KB)
Non-trainable params: 0 (0.00 Byte)
______________

INFO:tensorflow:Assets written to: models\deep_learning_models\RNN_Stemmed_Fear.tf\assets


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization_1 (Text  (None, 130)               0         
 Vectorization)                                                  
                                                                 
 embedding (Embedding)       (None, 130, 64)           272832    
                                                                 
 simple_rnn (SimpleRNN)      (None, 130, 64)           8256      
                                                                 
 simple_rnn_1 (SimpleRNN)    (None, 32)                3104      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 284225 (1.08 MB)
Trainable params: 284225 (1.08 MB)
Non-trainable params: 0 (0.00 Byte)
____________________

INFO:tensorflow:Assets written to: models\deep_learning_models\RNN_Lemmatized_Fear.tf\assets


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVe  (None, 130)               0         
 ctorization)                                                    
                                                                 
 embedding (Embedding)       (None, 130, 64)           249472    
                                                                 
 lstm (LSTM)                 (None, 130, 64)           33024     
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 294945 (1.13 MB)
Trainable params: 294945 (1.13 MB)
Non-trainable params: 0 (0.00 Byte)
____________________

INFO:tensorflow:Assets written to: models\deep_learning_models\LSTM_Stemmed_Fear.tf\assets


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization_1 (Text  (None, 130)               0         
 Vectorization)                                                  
                                                                 
 embedding (Embedding)       (None, 130, 64)           272832    
                                                                 
 lstm (LSTM)                 (None, 130, 64)           33024     
                                                                 
 lstm_1 (LSTM)               (None, 32)                12416     
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 318305 (1.21 MB)
Trainable params: 318305 (1.21 MB)
Non-trainable params: 0 (0.00 Byte)
____________________

INFO:tensorflow:Assets written to: models\deep_learning_models\LSTM_Lemmatized_Fear.tf\assets


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVe  (None, 130)               0         
 ctorization)                                                    
                                                                 
 embedding (Embedding)       (None, 130, 64)           249472    
                                                                 
 gru (GRU)                   (None, 130, 64)           24960     
                                                                 
 gru_1 (GRU)                 (None, 32)                9408      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 283873 (1.08 MB)
Trainable params: 283873 (1.08 MB)
Non-trainable params: 0 (0.00 Byte)
____________________

INFO:tensorflow:Assets written to: models\deep_learning_models\GRU_Stemmed_Fear.tf\assets


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization_1 (Text  (None, 130)               0         
 Vectorization)                                                  
                                                                 
 embedding (Embedding)       (None, 130, 64)           272832    
                                                                 
 gru (GRU)                   (None, 130, 64)           24960     
                                                                 
 gru_1 (GRU)                 (None, 32)                9408      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 307233 (1.17 MB)
Trainable params: 307233 (1.17 MB)
Non-trainable params: 0 (0.00 Byte)
____________________

INFO:tensorflow:Assets written to: models\deep_learning_models\GRU_Lemmatized_Fear.tf\assets




In [32]:
pd.DataFrame(nn_report['RNN'])

Unnamed: 0,Stemmed,Lemmatized
Pearson,0.22745,0.226925
Spearman,0.202881,0.222821


In [33]:
pd.DataFrame(nn_report['LSTM'])

Unnamed: 0,Stemmed,Lemmatized
Pearson,-0.066921,0.007375
Spearman,-0.065432,-0.001174


In [34]:
pd.DataFrame(nn_report['GRU'])

Unnamed: 0,Stemmed,Lemmatized
Pearson,0.101004,-0.058105
Spearman,0.102477,-0.059346


In [23]:
y_pred = nn_models_obj.nn_models_test_prediction('models\deep_learning_models\LSTM_Lemmatized_Sadness.tf',
                                        sadness_test_df.copy(),
                                        'Lemmatize')



In [39]:
eval_obj.eval(sadness_test_df['Intensity'],y_pred.flatten())

{'Pearson': 0.07895379476103466, 'Spearman': 0.07247222681235509}

In [24]:
eval_obj.eval(sadness_test_df['Intensity'],y_pred.flatten())

{'Pearson': 0.03210738354025374, 'Spearman': 0.030989273118014263}