In [7]:
# Basic packages
import pandas as pd 
import numpy as np
import re
import collections
import matplotlib.pyplot as plt
from pathlib import Path
# Packages for data preparation
from sklearn.model_selection import train_test_split
from nltk.corpus import stopwords
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
# Packages for modeling
from keras import models
from keras import layers
from keras import regularizers

In [17]:
df = pd.read_csv('data/sentiment_analysis_dataset.csv')

In [18]:
df

Unnamed: 0,line,sentiment
0,I AM REALLY FRUSTRATED BECAUSE YOU CONSTANTLY ...,Angry
1,IT MAKES ME UPSET THAT YOU NEVER TAKE RESPONSI...,Angry
2,I CANNOT BELIEVE YOU MISSED ANOTHER DEADLINE A...,Angry
3,IT ANNOYS ME WHEN YOU INTERRUPT DURING MEETING...,Angry
4,I AM TIRED OF YOUR EXCUSES EVERY TIME SOMETHIN...,Angry
5,IT UPSETS ME THAT YOU ALWAYS ARRIVE LATE AND Y...,Angry
6,I AM REALLY ANGRY BECAUSE YOU NEVER PREPARE FO...,Angry
7,IT INFURIATES ME THAT YOU NEVER APPRECIATE THE...,Angry
8,I CANNOT STAND HOW YOU ALWAYS TALK DOWN TO PEO...,Angry
9,IT IS VERY DISAPPOINTING THAT YOU NEVER FOLLOW...,Angry


In [31]:
from collections import Counter
from math import log

def sentiment_analysis(text, word_counts, class_priors, class_probs):
  """
  This function performs sentiment analysis on a text using word counts, class priors, and class probabilities for three sentiment classes (Angry, Happy, Neutral).

  Args:
      text (str): The text to analyze for sentiment.
      word_counts (dict): A dictionary mapping words to their counts in each sentiment class document.
      class_priors (dict): A dictionary mapping sentiment classes (Angry, Happy, Neutral) to their prior probabilities.
      class_probs (dict): A dictionary mapping sentiment classes to dictionaries mapping words to their conditional probabilities.

  Returns:
      str: The predicted sentiment class (Angry, Happy, or Neutral).
  """

  # Preprocess the text (optional)
  # - Convert to lowercase
  # - Remove punctuation
  text = text.lower()

  # Split the text into words
  words = text.split()

  # Calculate probabilities for each sentiment class
  angry_prob = class_priors["Angry"]
  happy_prob = class_priors["Happy"]
  neutral_prob = class_priors["Neutral"]
  for word in words:
    if word in word_counts:
      angry_prob += log(class_probs["Angry"].get(word, 0.001))  # Add Laplace smoothing
      happy_prob += log(class_probs["Happy"].get(word, 0.001))
      neutral_prob += log(class_probs["Neutral"].get(word, 0.001))
  print(f'angry_prob is {angry_prob}')
  print(f'happy_prob is {happy_prob}') 
  print(f'neutral_prob is {neutral_prob}')  
  # Classify based on the highest probability
  max_prob = max(angry_prob, happy_prob, neutral_prob)
  if max_prob == angry_prob:
    return "Angry"
  elif max_prob == happy_prob:
    return "Happy"
  else:
    return "Neutral"

def train(angry_df, happy_df, neutral_df):
    """
    This function trains the sentiment analysis model for three sentiment classes by building word counts, class priors, and class probabilities.

    Args:
        angry_df (pandas.DataFrame): A DataFrame containing text documents labeled as Angry in a 'text' column.
        happy_df (pandas.DataFrame): A DataFrame containing text documents labeled as Happy in a 'text' column.
        neutral_df (pandas.DataFrame): A DataFrame containing text documents labeled as Neutral in a 'text' column.

    Returns:
        dict: A dictionary containing the trained model (word_counts, class_priors, class_probs).
    """

    # Extract text from DataFrames (assuming 'text' column contains the documents)
    angry_text = angry_df['line'].tolist()
    happy_text = happy_df['line'].tolist()
    neutral_text = neutral_df['line'].tolist()

    # Count word occurrences in each sentiment class
    angry_word_counts = Counter()
    happy_word_counts = Counter()
    neutral_word_counts = Counter()
    for doc in angry_text:
        angry_word_counts.update(doc.lower().split())
    for doc in happy_text:
        happy_word_counts.update(doc.lower().split())
    for doc in neutral_text:
        neutral_word_counts.update(doc.lower().split())

    # Combine word counts from all classes
    all_word_counts = dict(angry_word_counts + happy_word_counts + neutral_word_counts)

    # Calculate total word counts for each class
    total_angry_words = sum(angry_word_counts.values())
    total_happy_words = sum(happy_word_counts.values())
    total_neutral_words = sum(neutral_word_counts.values())

    # Calculate class priors
    class_priors = {
        "Angry": len(angry_text) / (len(angry_text) + len(happy_text) + len(neutral_text)),
        "Happy": len(happy_text) / (len(angry_text) + len(happy_text) + len(neutral_text)),
        "Neutral": len(neutral_text) / (len(angry_text) + len(happy_text) + len(neutral_text))
    }

    # Calculate class conditional probabilities with Laplace smoothing
    class_probs = {
        "Angry": {word: (count + 1) / (total_angry_words + len(all_word_counts)) for word, count in angry_word_counts.items()},
        "Happy": {word: (count + 1) / (total_happy_words + len(all_word_counts)) for word, count in happy_word_counts.items()},
        "Neutral": {word: (count + 1) / (total_neutral_words + len(all_word_counts)) for word, count in neutral_word_counts.items()}
    }

    # Return the trained model data
    return {"word_counts": all_word_counts, "class_priors": class_priors, "class_probs": class_probs}


In [23]:
filt_1 =  df['sentiment'] == 'Angry'
Angry_df = df[filt_1]

filt_2 =  df['sentiment'] == 'Happy'
Happy_df = df[filt_2]

filt_3 =  df['sentiment'] == 'Neutral'
Neutral_df = df[filt_3]

In [24]:
naive_bayes_dict = train(Angry_df,Happy_df,Neutral_df)

In [25]:
line_1 = "I AM REALLY ANNOYED BY YOUR CONSTANT COMPLAINING AND YOU NEVER OFFER ANY SOLUTIONS WHICH IS VERY UNHELPFUL AND NEGATIVE"

In [32]:
#text, word_counts, class_priors, class_probs
#{"word_counts": all_word_counts, "class_priors": class_priors, "class_probs": class_probs}
line_1_class = sentiment_analysis(line_1,naive_bayes_dict.get('word_counts'),naive_bayes_dict.get('class_priors'),naive_bayes_dict.get('class_probs'))

angry_prob is -59.156608349332785
happy_prob is -58.95556569147118
neutral_prob is -74.30199204573346


In [33]:
line_1_class

'Happy'

In [28]:
line_2 = "IT IS FRUSTRATING THAT YOU NEVER PAY ATTENTION DURING DISCUSSIONS AND YOUR LACK OF ATTENTION IS REALLY AFFECTING OUR PROGRESS"

In [34]:
line_2_class = sentiment_analysis(line_2,naive_bayes_dict.get('word_counts'),naive_bayes_dict.get('class_priors'),naive_bayes_dict.get('class_probs'))

angry_prob is -69.30714396213065
happy_prob is -77.7889879525025
neutral_prob is -87.05193816907583


In [30]:
line_2_class

'Angry'

In [35]:
line_3 = 'I AM DELIGHTED BY YOUR FRIENDLINESS AND YOU ALWAYS MAKE EVERYONE FEEL WELCOME WHICH FOSTERS A SENSE OF COMMUNITY'

In [36]:
line_3_class = sentiment_analysis(line_3,naive_bayes_dict.get('word_counts'),naive_bayes_dict.get('class_priors'),naive_bayes_dict.get('class_probs'))

angry_prob is -65.18889489096102
happy_prob is -59.022055432740004
neutral_prob is -73.35421064676792


In [37]:
line_3_class

'Happy'

In [38]:
line_4 = 'IT IS WONDERFULL THAT YOU ALWAYS SHOW KINDNESS AND YOUR EMPATHY TOWARDS OTHERS IS TRULY HEARTWARMING AND APPRECIATED'

In [39]:
line_4_class = sentiment_analysis(line_4,naive_bayes_dict.get('word_counts'),naive_bayes_dict.get('class_priors'),naive_bayes_dict.get('class_probs'))

angry_prob is -47.62306359715982
happy_prob is -45.00072806828581
neutral_prob is -60.08101637966102


In [40]:
line_5 = 'YOUR ANALYSIS OF THE DATA WAS ACCURATE AND WELL PRESENTED PROVIDING A CLEAR UNDERSTANDING OF THE TRENDS AND PATTERNS'

In [41]:
line_5_class = sentiment_analysis(line_5,naive_bayes_dict.get('word_counts'),naive_bayes_dict.get('class_priors'),naive_bayes_dict.get('class_probs'))

angry_prob is -55.19966146773303
happy_prob is -56.33727781640785
neutral_prob is -44.469507346411596


In [42]:
line_5_class

'Neutral'

In [43]:
line_6 = 'THE MEETING MINUTES YOU PREPARED WAS DETAILED AND WELL ORGANISED ACCURATELY REFLECTING THE DISCUSSIONS AND DECESIONS MADE'

In [44]:
line_6_class = sentiment_analysis(line_6,naive_bayes_dict.get('word_counts'),naive_bayes_dict.get('class_priors'),naive_bayes_dict.get('class_probs'))

angry_prob is -47.080365294797126
happy_prob is -46.00307837302877
neutral_prob is -39.224392295272764


In [45]:
line_6_class

'Neutral'