# *Aya Shibbi -- Senior Project - 2024*

# **Voice-To-Text Emotion Detection**


In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# unbalanced
my_csv = "/content/drive/MyDrive/AyaShibbi_SeniorProject_2024_LIU/Part2_Emotion_Detection/Datasets/Original/unbalanced_onlyTesting.csv"

# balanced
# my_csv = "/content/drive/MyDrive/AyaShibbi_SeniorProject_2024_LIU/Part2_Emotion_Detection/Datasets/Original/balanced_combinedTrainingTestingValidation.csv"

Mounted at /content/drive


In [None]:
# install toolkit
!pip install nltk



In [None]:
import pandas as pd
import numpy as np
import string

import nltk

In [None]:
# load the dataset from the CSV file into a pandas DataFrame
csv = pd.read_csv(my_csv)
df = pd.DataFrame(csv)

## **2. Emotion Detection**

### Simple Exploratory Data Analysis (EDA)

In [None]:
# view columns
print(df.columns)

Index(['text', 'label'], dtype='object')


In [None]:
# number of records and features
print(df.shape)

(16000, 2)


In [None]:
# number of null values
print(df.isnull().sum())

text     0
label    0
dtype: int64


In [None]:
# create a column with the emotion names
labels_dict = {0:'sad', 1:'joy', 2:'love', 3:'anger', 4:'fear', 5:'surprise'}
df['description'] = df['label'].map(labels_dict)

In [None]:
# view first 5 rows
print(df.head())

                                                text  label description
0                            i didnt feel humiliated      0         sad
1  i can go from feeling so hopeless to so damned...      0         sad
2   im grabbing a minute to post i feel greedy wrong      3       anger
3  i am ever feeling nostalgic about the fireplac...      2        love
4                               i am feeling grouchy      3       anger


In [None]:
# view last 5 rows
print(df.tail())

                                                    text  label description
15995  i just had a very brief time in the beanbag an...      0         sad
15996  i am now turning and i feel pathetic that i am...      0         sad
15997                     i feel strong and good overall      1         joy
15998  i feel like this was such a rude comment and i...      3       anger
15999  i know a lot but i feel so stupid because i ca...      0         sad


### Processing Text Data Using NLTK (NLP)

In [None]:
text = df['text']

#### Remove Punctuations

In [None]:
# remove punctuation (ex. '(', ')' ...)
punctuations = string.punctuation
cleaned_text = [''.join([char for char in sentence if char not in punctuations]) for sentence in text]

print(cleaned_text[:5])

['i didnt feel humiliated', 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake', 'im grabbing a minute to post i feel greedy wrong', 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property', 'i am feeling grouchy']


#### Remove URLs and HTML Tags

In [None]:
import re
from bs4 import BeautifulSoup

# remove URL
pattern = r"(http|ftp|https)://([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?"
cleaned_text = [re.sub(pattern, "", sentence) for sentence in cleaned_text]

# remove HTML tags
cleaned_text = [BeautifulSoup(sentence, "html.parser").get_text() for sentence in cleaned_text]

# remove 'href http www'
def remove_extras(text):
  pattern = r"\b(href|http|https?|www)\b"
  return re.sub(pattern, "", text, flags=re.IGNORECASE)

cleaned_text = [remove_extras(sentence) for sentence in cleaned_text]

print(cleaned_text[:5])

['i didnt feel humiliated', 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake', 'im grabbing a minute to post i feel greedy wrong', 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property', 'i am feeling grouchy']


#### Remove Whitespaces

In [None]:
cleaned_text_from_white_spaces = []

for sentence in cleaned_text:
    sentence = sentence.strip()  # Remove leading and trailing whitespaces
    sentence = " ".join(sentence.split())  # Replace multiple whitespaces with a single space
    cleaned_text_from_white_spaces.append(sentence)

print(cleaned_text_from_white_spaces[:5])

['i didnt feel humiliated', 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake', 'im grabbing a minute to post i feel greedy wrong', 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property', 'i am feeling grouchy']


#### Save Cleaned Text as CSV file

In [None]:
# save to df
df['cleaned_text'] = cleaned_text_from_white_spaces

print(df.head())

                                                text  label description  \
0                            i didnt feel humiliated      0         sad   
1  i can go from feeling so hopeless to so damned...      0         sad   
2   im grabbing a minute to post i feel greedy wrong      3       anger   
3  i am ever feeling nostalgic about the fireplac...      2        love   
4                               i am feeling grouchy      3       anger   

                                        cleaned_text  
0                            i didnt feel humiliated  
1  i can go from feeling so hopeless to so damned...  
2   im grabbing a minute to post i feel greedy wrong  
3  i am ever feeling nostalgic about the fireplac...  
4                               i am feeling grouchy  


In [None]:
# unbalanced
df.to_csv('/content/drive/MyDrive/AyaShibbi_SeniorProject_2024_LIU/Part2_Emotion_Detection/Datasets/Unbalanced/Preprocessed_Text/unbalanced_cleaned_text_ds.csv', index=False)

# balanced
# df.to_csv('/content/drive/MyDrive/AyaShibbi_SeniorProject_2024_LIU/Part2_Emotion_Detection/Datasets/Balanced/Preprocessed_Text/balanced_cleaned_text_ds.csv', index=False)

#### LowerCase Conversion

In [None]:
# lowercasing
lower_case = []

for sentences in df['cleaned_text']:
  lower_case.append(sentences.lower())

print(lower_case[:5])

['i didnt feel humiliated', 'i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake', 'im grabbing a minute to post i feel greedy wrong', 'i am ever feeling nostalgic about the fireplace i will know that it is still on the property', 'i am feeling grouchy']


#### Remove StopWords

In [None]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [None]:
# remove stopwords (ex. 'a', 'the', 'is', 'are' ...)
from nltk.corpus import stopwords
stopwords = stopwords.words("english")
remove_stop_words = []

for sentences in lower_case:
  words = sentences.split()
  words_in_sent = []
  for word in words:
    if word not in stopwords:
      words_in_sent.append(word) # a list of the whole token that contains more than 1 word
  remove_stop_words.append(" ".join(words_in_sent))

print(remove_stop_words[:5])

['didnt feel humiliated', 'go feeling hopeless damned hopeful around someone cares awake', 'im grabbing minute post feel greedy wrong', 'ever feeling nostalgic fireplace know still property', 'feeling grouchy']


#### Save filtered_text as csv file

In [None]:
# save to df
df["filtered_text"] = remove_stop_words

print(df.head())

                                                text  label description  \
0                            i didnt feel humiliated      0         sad   
1  i can go from feeling so hopeless to so damned...      0         sad   
2   im grabbing a minute to post i feel greedy wrong      3       anger   
3  i am ever feeling nostalgic about the fireplac...      2        love   
4                               i am feeling grouchy      3       anger   

                                        cleaned_text  \
0                            i didnt feel humiliated   
1  i can go from feeling so hopeless to so damned...   
2   im grabbing a minute to post i feel greedy wrong   
3  i am ever feeling nostalgic about the fireplac...   
4                               i am feeling grouchy   

                                       filtered_text  
0                              didnt feel humiliated  
1  go feeling hopeless damned hopeful around some...  
2          im grabbing minute post feel greedy 

In [None]:
# unbalanced
df.to_csv('/content/drive/MyDrive/AyaShibbi_SeniorProject_2024_LIU/Part2_Emotion_Detection/Datasets/Unbalanced/Preprocessed_Text/unbalanced_filtered_text_ds.csv', index=False)

# balanced
# df.to_csv('/content/drive/MyDrive/AyaShibbi_SeniorProject_2024_LIU/Part2_Emotion_Detection/Datasets/Balanced/Preprocessed_Text/balanced_filtered_text_ds.csv', index=False)

#### Lemmatization

In [None]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [None]:
# ex. running --> run (reduces words to their base form)
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
lemmatizer = WordNetLemmatizer()

lemmatized_text = []

for sentence in remove_stop_words:
    lemmatized_sentence = []
    sentence = sentence.split()
    for word in sentence:
        synsets = wordnet.synsets(word)
        if synsets:
            pos = synsets[0].pos()
            if pos in [synset.pos() for synset in synsets]:
                lemmatized_sentence.append(lemmatizer.lemmatize(word, pos))
            else:
                lemmatized_sentence.append(lemmatizer.lemmatize(word))
        else:
            lemmatized_sentence.append(word)
    lemmatized_text.append(' '.join(lemmatized_sentence))

print(lemmatized_text[:5])

['didnt feel humiliate', 'go feeling hopeless damned hopeful around someone care awake', 'im grab minute post feel greedy wrong', 'ever feeling nostalgic fireplace know still property', 'feeling grouchy']


#### Save Lemmatized Text as CSV File

In [None]:
# add lemmatized_text to the dataset
df['lemmatized_text'] = lemmatized_text

print(df.head())

                                                text  label description  \
0                            i didnt feel humiliated      0         sad   
1  i can go from feeling so hopeless to so damned...      0         sad   
2   im grabbing a minute to post i feel greedy wrong      3       anger   
3  i am ever feeling nostalgic about the fireplac...      2        love   
4                               i am feeling grouchy      3       anger   

                                        cleaned_text  \
0                            i didnt feel humiliated   
1  i can go from feeling so hopeless to so damned...   
2   im grabbing a minute to post i feel greedy wrong   
3  i am ever feeling nostalgic about the fireplac...   
4                               i am feeling grouchy   

                                       filtered_text  \
0                              didnt feel humiliated   
1  go feeling hopeless damned hopeful around some...   
2          im grabbing minute post feel gree

In [None]:
# unbalanced
df.to_csv('/content/drive/MyDrive/AyaShibbi_SeniorProject_2024_LIU/Part2_Emotion_Detection/Datasets/Unbalanced/Preprocessed_Text/unbalanced_lemmatized_text_ds.csv', index=False)

# balanced
# df.to_csv('/content/drive/MyDrive/AyaShibbi_SeniorProject_2024_LIU/Part2_Emotion_Detection/Datasets/Balanced/Preprocessed_Text/balanced_lemmatized_text_ds.csv', index=False)