# Imports

In [42]:
import os
import gzip
import json
import numpy as np
import matplotlib.pyplot as plt
import csv
import gensim.downloader as api

from random import randint
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV, train_test_split
from matplotlib.backends.backend_pdf import PdfPages
from gensim.models import Word2Vec, KeyedVectors
from nltk.tokenize import word_tokenize

# 1. Dataset Preparation & Analysis

## 1.2 - Loading the dataset

In [43]:
### goemotions.json.gz must be placed in the parent of current directory

path = os.getcwd()
f = gzip.open(os.path.abspath(os.path.join(path, os.pardir)) + '/goemotions.json.gz', 'rb')
file_content = f.read()
data_list = json.loads(file_content)

## 1.3.1 - Extracting the posts and 2 sets of labels (emotion and sentiment)

In [44]:
posts = []
emotions = []
sentiments = []

for entry in data_list:
    posts.append(entry[0])
    emotions.append(entry[1])
    sentiments.append(entry[2])

## 1.3.2 - Plotting the distribution of the posts in each category and saving the graphics

In [45]:
emotion_counts = []
emotion_labels = []
sentiment_counts = []
sentiment_labels = []

# Get n different colors
def getColors(n):
    colors = []
    for i in range(n):
        color = '#%06X' % randint(0, 0xFFFFFF)
        while color in colors:
            color = '#%06X' % randint(0, 0xFFFFFF)
        colors.append(color)
    return colors

# Breakdown emotions list into counts and labels
for emotion in emotions:
    if emotion in emotion_labels:
        index = emotion_labels.index(emotion)
        emotion_counts[index] = emotion_counts[index] + 1
    else:
        emotion_labels.append(emotion)
        emotion_counts.append(1)

# Breakdown sentiments list into counts and labels
for sentiment in sentiments:
    if sentiment in sentiment_labels:
        index = sentiment_labels.index(sentiment)
        sentiment_counts[index] = sentiment_counts[index] + 1
    else:
        sentiment_labels.append(sentiment)
        sentiment_counts.append(1)

# Calculating frequencies for each classification
emotion_freq = [round((count/sum(emotion_counts)), 3) for count in emotion_counts]
sentiment_freq = [round((count/sum(sentiment_counts)), 3) for count in sentiment_counts]

with PdfPages('frequency_charts.pdf') as pdf:
    # Plotting emotion frequencies
    plt.pie(emotion_counts, labels=emotion_labels, 
            startangle=90, colors=getColors(len(emotion_labels)),
            rotatelabels=True, counterclock=False, 
            explode=[0.1 for emotion in emotion_labels], shadow=True)
    plt.title('Emotion Frequencies', y=1.25)
    # Displaying distribution in legend because hard to see on the chart    
    plt.legend(title='Emotions:', labels=[f'{l}: {s:0.1f}%' for l, s in zip(emotion_labels, [freq * 100 for freq in emotion_freq])],
            bbox_to_anchor=(1.2,0.5), loc='center right', 
            bbox_transform=plt.gcf().transFigure)
    pdf.savefig(bbox_inches='tight')
    plt.close()

    # Plotting sentiment frequencies
    plt.pie(sentiment_counts, labels=sentiment_labels, 
            startangle=90, colors=getColors(len(sentiment_labels)),
            counterclock=False, autopct='%1.1f%%',
            explode=[0.1 for sentiment in sentiment_labels], shadow=True)
    plt.title('Sentiment Frequencies')
    plt.legend(title='Sentiments:', labels=sentiment_labels, bbox_to_anchor=(1,0.5), loc='center right', bbox_transform=plt.gcf().transFigure)
    pdf.savefig(bbox_inches='tight')
    plt.close()

# 3. Embeddings as Features

## 3.1 - Load embedding model

In [46]:
GoogleNews = api.load("word2vec-google-news-300", return_path = False)
print(GoogleNews)

KeyedVectors<vector_size=300, 3000000 keys>


## 3.2 - Tokenizer

In [47]:
tokens = []
for i in word_tokenize(GoogleNews):
    tokens.append(i)
print(len(tokens))

print('Size of vocabulary: ', len(GoogleNews), 'tokens')

TypeError: the JSON object must be str, bytes or bytearray, not KeyedVectors

## 3.3 - Average Embedding

## 3.4 - Hit Rates

## 3.5 - Base MLP

## 3.6 - Top MLP

## 3.7 - Classification Performance

## 3.8 - Rerun Best Performing Model

# 2. Words as Features

## 2.1 - Processing the dataset: Extracting tokens/words and their frequencies

In [None]:
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(posts)

# Vectorizing emotions and sentiments
emotion_to_index = {}
sentiment_to_index = {}
index = 0

for emotion in emotions:
    if emotion in emotion_to_index:
        # already seen
        continue
    emotion_to_index[emotion] = index
    index += 1
y_emotions = [emotion_to_index[emotion] for emotion in emotions]

index = 0
for sentiment in sentiments:
    if sentiment in sentiment_to_index:
        continue
    sentiment_to_index[sentiment] = index
    index += 1
y_sentiments = [sentiment_to_index[sentiment] for sentiment in sentiments]

print('Size of vocabulary: ', len(vectorizer.vocabulary_), 'tokens')

Size of vocabulary:  30449 tokens


## 2.2 - Splitting dataset

In [None]:
X_emotions_train, X_emotions_test, y_emotions_train, y_emotions_test = train_test_split(X, y_emotions, test_size=0.2)
X_sentiments_train, X_sentiments_test, y_sentiments_train, y_sentiments_test = train_test_split(X, y_sentiments, test_size=0.2)

## 2.3 - Training and testing classifiers for both classifications, using word frequency as features

### 2.3.1 - Base-MNB

In [None]:
base_MNB = MultinomialNB()

base_MNB_emotions_model = base_MNB.fit(X_emotions_train, y_emotions_train)
y_base_MNB_emotions_pred = base_MNB_emotions_model.predict(X_emotions_test)

base_MNB_sentiments_model = base_MNB.fit(X_sentiments_train, y_sentiments_train)
y_base_MNB_sentiments_pred = base_MNB_sentiments_model.predict(X_sentiments_test)

# print(classification_report(y_emotions_test, y_base_MNB_emotions_pred, target_names=emotion_labels))
# print(classification_report(y_sentiments_test, y_base_MNB_sentiments_pred, target_names=sentiment_labels))

### 2.3.2 - Base-DT

In [None]:
base_DT = DecisionTreeClassifier()

base_DT_emotions_model = base_DT.fit(X_emotions_train, y_emotions_train)
y_base_DT_emotions_pred = base_DT_emotions_model.predict(X_emotions_test)

base_DT_sentiments_model = base_DT.fit(X_sentiments_train, y_sentiments_train)
y_base_DT_sentiments_pred = base_DT_sentiments_model.predict(X_sentiments_test)

In [None]:
print("\nBase-DT with the default parameters")

# evaluate classifier
print("\nBase-DT Emotions Classification Report:\n", classification_report(y_emotions_test, y_base_DT_emotions_pred, target_names=emotion_labels))
print("\nBase-DT Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_base_DT_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nBase-DT Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_base_DT_emotions_pred))
print("\nBase-DT Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_base_DT_sentiments_pred))


Base-DT with the default parameters

Base-DT Emotions Classification Report:
                 precision    recall  f1-score   support

       sadness       0.21      0.35      0.27       772
       neutral       0.46      0.66      0.55     10980
          love       0.49      0.58      0.53      1016
     gratitude       0.71      0.74      0.72      1434
   disapproval       0.23      0.23      0.23      1506
     amusement       0.44      0.47      0.45      1191
disappointment       0.15      0.13      0.14       942
   realization       0.14      0.11      0.12       938
    admiration       0.48      0.47      0.47      2117
     annoyance       0.20      0.14      0.17      1704
     confusion       0.23      0.18      0.20       990
      optimism       0.32      0.24      0.27       889
    excitement       0.24      0.19      0.21       585
        caring       0.27      0.19      0.22       676
       remorse       0.35      0.23      0.28       291
           joy       0.3

### 2.3.3 - Base-MLP

In [None]:
base_MLP = MLPClassifier(max_iter=5) ### talk about low epochs in analysis

base_MLP_emotions_model = base_MLP.fit(X_emotions_train, y_emotions_train)
y_base_MLP_emotions_pred = base_MLP_emotions_model.predict(X_emotions_test)

base_MLP_sentiments_model = base_MLP.fit(X_sentiments_train, y_sentiments_train)
y_base_MLP_sentiments_pred = base_MLP_sentiments_model.predict(X_sentiments_test)



In [None]:
print("\nBase-MLP with the default parameters")

# evaluate classifier
print("\nBase-MLP Emotions Classification Report:\n", classification_report(y_emotions_test, y_base_MLP_emotions_pred, target_names=emotion_labels))
print("\nBase-MLP Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_base_MLP_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nBase-MLP Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_base_MLP_emotions_pred))
print("\nBase-MLP Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_base_MLP_sentiments_pred))


Base-MLP with the default parameters

Base-MLP Emotions Classification Report:
                 precision    recall  f1-score   support

       sadness       0.35      0.30      0.33       783
       neutral       0.47      0.62      0.53     11094
          love       0.52      0.62      0.57      1015
     gratitude       0.78      0.76      0.77      1409
   disapproval       0.22      0.22      0.22      1505
     amusement       0.51      0.57      0.54      1217
disappointment       0.21      0.12      0.16       930
   realization       0.19      0.10      0.13       999
    admiration       0.47      0.53      0.50      2065
     annoyance       0.20      0.15      0.17      1656
     confusion       0.26      0.18      0.21      1004
      optimism       0.40      0.31      0.35       891
    excitement       0.25      0.21      0.23       575
        caring       0.26      0.19      0.22       713
       remorse       0.39      0.46      0.42       296
           joy       0

### 2.3.4 - Top-MNB

In [None]:
top_MNB_hyper_params = {
    # Because an alpha too small will result in numeric errors, 0 is set as 1.0e-10
    'alpha': [1.0e-10, 0.5, 1.5, 3.0]
}

top_MNB_emotions = MultinomialNB(class_prior=emotion_freq)
top_MNB_sentiments = MultinomialNB(class_prior=sentiment_freq)

top_MNB_emotions_grid_search = GridSearchCV(estimator=top_MNB_emotions, param_grid=top_MNB_hyper_params)
top_MNB_sentiments_grid_search = GridSearchCV(estimator=top_MNB_sentiments, param_grid=top_MNB_hyper_params)

top_MNB_emotions_model = top_MNB_emotions_grid_search.fit(X_emotions_train, y_emotions_train)
y_top_MNB_emotions_pred = top_MNB_emotions_model.predict(X_emotions_test)
# print('best params for emotions MNB: ', top_MNB_emotions_model.best_params_)

top_MNB_sentiments_model = top_MNB_sentiments_grid_search.fit(X_sentiments_train, y_sentiments_train)
y_top_MNB_sentiments_pred = top_MNB_sentiments_model.predict(X_sentiments_test)
# print('best params for sentiments MNB: ', top_MNB_sentiments_model.best_params_)

In [None]:
print("\nTop-MNB using GridSearchCV to find best hyper parameters with alphafloat")

# evaluate classifier
print("\nTop-MNB Emotions Classification Report:\n", classification_report(y_emotions_test, y_top_MNB_emotions_pred, target_names=emotion_labels))
print("\nTop-MNB Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_top_MNB_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_top_MNB_emotions_pred))
print("Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_top_MNB_sentiments_pred))


Top-MNB using GridSearchCV to find best hyper parameters with alphafloat

Top-MNB Emotions Classification Report:
                 precision    recall  f1-score   support

       sadness       0.38      0.20      0.26       783
       neutral       0.40      0.72      0.51     11094
          love       0.56      0.46      0.50      1015
     gratitude       0.70      0.73      0.72      1409
   disapproval       0.21      0.13      0.16      1505
     amusement       0.50      0.44      0.47      1217
disappointment       0.21      0.07      0.11       930
   realization       0.20      0.07      0.10       999
    admiration       0.46      0.50      0.48      2065
     annoyance       0.19      0.12      0.15      1656
     confusion       0.25      0.12      0.16      1004
      optimism       0.40      0.26      0.31       891
    excitement       0.24      0.08      0.12       575
        caring       0.26      0.15      0.19       713
       remorse       0.45      0.15      0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### 2.3.5 - Top-DT

In [None]:
top_DT_hyper_params = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [5, 10],
    'min_samples_split': [2, 3, 4]
}

top_DT = DecisionTreeClassifier()
top_DT_grid_search = GridSearchCV(estimator=top_DT, param_grid=top_DT_hyper_params)

top_DT_emotions_model = top_DT_grid_search.fit(X_emotions_train, y_emotions_train)
y_top_DT_emotions_pred = top_DT_emotions_model.predict(X_emotions_test)

top_DT_sentiments_model = top_DT_grid_search.fit(X_sentiments_train, y_sentiments_train)
y_top_DT_sentiments_pred = top_DT_sentiments_model.predict(X_sentiments_test)

In [None]:
print("\nTop-DT using GridSearchCV with criterion, max_depth and min_samples_split")

# evaluate classifier
print("\nTop-DT Emotions Classification Report:\n", classification_report(y_emotions_test, y_top_DT_emotions_pred, target_names=emotion_labels))
print("\nTop-DT Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_top_DT_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nTop-DT Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_top_DT_emotions_pred))
print("\nTop-DT Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_top_DT_sentiments_pred))


Top-DT using GridSearchCV with criterion, max_depth and min_samples_split

Top-DT Emotions Classification Report:
                 precision    recall  f1-score   support

       sadness       0.07      0.00      0.00       783
       neutral       0.36      0.94      0.52     11094
          love       0.57      0.58      0.58      1015
     gratitude       0.89      0.72      0.80      1409
   disapproval       0.00      0.00      0.00      1505
     amusement       0.57      0.35      0.44      1217
disappointment       0.00      0.00      0.00       930
   realization       0.00      0.00      0.00       999
    admiration       0.40      0.29      0.33      2065
     annoyance       0.14      0.00      0.00      1656
     confusion       0.33      0.00      0.00      1004
      optimism       0.46      0.26      0.33       891
    excitement       0.00      0.00      0.00       575
        caring       1.00      0.00      0.00       713
       remorse       0.39      0.53      0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### 2.3.6 - Top-MLP

In [None]:
top_MLP_hyper_params = {
    'activation': ['logistic', 'tanh', 'relu', 'identity'],
    'hidden_layer_sizes': [(30, 50), (10, 10, 10)],
    'solver': ['adam', 'sgd']
}

top_MLP = MLPClassifier(max_iter=5) ### talk about low epochs in analysis
top_MLP_grid_search = GridSearchCV(estimator=top_MLP, param_grid=top_MLP_hyper_params)

top_MLP_emotions_model = top_MLP_grid_search.fit(X_emotions_train, y_emotions_train)
y_top_MLP_emotions_pred = top_MLP_emotions_model.predict(X_emotions_test)

top_MLP_sentiments_model = top_MLP_grid_search.fit(X_sentiments_train, y_sentiments_train)
y_top_MLP_sentiments_pred = top_MLP_sentiments_model.predict(X_sentiments_test)



In [None]:
print("\nTop-MLP using GridSearchCV with activation, two network architectures of our choice and solver")

# evaluate classifier
print("\nTop-MLP Emotions Classification Report:\n", classification_report(y_emotions_test, y_top_MLP_emotions_pred, target_names=emotion_labels))
print("\nTop-MLP Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_top_MLP_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nTop-MLP Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_top_MLP_emotions_pred))
print("\nTop-MLP Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_top_MLP_sentiments_pred))

## 2.4 - Classification Performance

### 2.4.1 - Base MNB Performance

In [None]:
print("Base-MNB with the default parameters")

# evaluate classifier
print("\nBase-MNB Emotions Classification Report:\n", classification_report(y_emotions_test, y_base_MNB_emotions_pred, target_names=emotion_labels))
print("\nBase-MNB Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_base_MNB_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nBase-MNB Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_base_MNB_emotions_pred))
print("\nBase-MNB Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_base_MNB_sentiments_pred))

Base MNB with the default parameters


NameError: name 'y_emotions_test' is not defined

In [None]:
with open('performance.txt', 'a') as f:
    f.write("Base-MNB with the default parameters")

    # evaluate classifier
    f.write("\nBase-MNB Emotions Classification Report:\n")
    f.write(classification_report(y_emotions_test, y_base_MNB_emotions_pred, target_names=emotion_labels))
    f.write("\nBase-MNB Sentiments Classification Report:\n")
    f.write(classification_report(y_sentiments_test, y_base_MNB_sentiments_pred, target_names=sentiment_labels))

    # show confusion Matrix
    f.write("\nBase-MNB Emotions Confusion Matrix:\n")
    f.write(confusion_matrix(y_emotions_test, y_base_MNB_emotions_pred).to_csv)
    f.write("\nBase-MNB Sentiments Confusion Matrix:\n")
    f.write(confusion_matrix(y_sentiments_test, y_base_MNB_sentiments_pred).to_csv)

### 2.4.2 - Base DT Performance

In [None]:
print("\nBase-DT with the default parameters")

# evaluate classifier
print("\nBase-DT Emotions Classification Report:\n", classification_report(y_emotions_test, y_base_DT_emotions_pred, target_names=emotion_labels))
print("\nBase-DT Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_base_DT_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nBase-DT Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_base_DT_emotions_pred))
print("\nBase-DT Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_base_DT_sentiments_pred))

### 2.4.3 - Base MLP Performance

In [None]:
print("\nBase-MLP with the default parameters")

# evaluate classifier
print("\nBase-MLP Emotions Classification Report:\n", classification_report(y_emotions_test, y_base_MLP_emotions_pred, target_names=emotion_labels))
print("\nBase-MLP Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_base_MLP_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nBase-MLP Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_base_MLP_emotions_pred))
print("\nBase-MLP Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_base_MLP_sentiments_pred))

### 2.4.4 - Top MNB Performance

In [None]:
print("\nTop-MNB using GridSearchCV to find best hyper parameters with alphafloat")

# evaluate classifier
print("\nTop-MNB Emotions Classification Report:\n", classification_report(y_emotions_test, y_top_MNB_emotions_pred, target_names=emotion_labels))
print("\nTop-MNB Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_top_MNB_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_top_MNB_emotions_pred))
print("Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_top_MNB_sentiments_pred))

### 2.4.5 - Top DT Performance

In [None]:
print("\nTop-DT using GridSearchCV with criterion, max_depth and min_samples_split")

# evaluate classifier
print("\nTop-DT Emotions Classification Report:\n", classification_report(y_emotions_test, y_top_DT_emotions_pred, target_names=emotion_labels))
print("\nTop-DT Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_top_DT_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nTop-DT Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_top_DT_emotions_pred))
print("\nTop-DT Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_top_DT_sentiments_pred))

### 2.4.6 - Top MLP Performance

In [None]:
print("\nTop-MLP using GridSearchCV with activation, two network architectures of our choice and solver")

# evaluate classifier
print("\nTop-MLP Emotions Classification Report:\n", classification_report(y_emotions_test, y_top_MLP_emotions_pred, target_names=emotion_labels))
print("\nTop-MLP Sentiments Classification Report:\n", classification_report(y_sentiments_test, y_top_MLP_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nTop-MLP Emotions Confusion Matrix:\n", confusion_matrix(y_emotions_test, y_top_MLP_emotions_pred))
print("\nTop-MLP Sentiments Confusion Matrix:\n", confusion_matrix(y_sentiments_test, y_top_MLP_sentiments_pred))

## 2.5 - Different train and test splits

In [None]:
X2_emotions_train, X2_emotions_test, y2_emotions_train, y2_emotions_test = train_test_split(X, y_emotions, test_size=0.2)
X2_sentiments_train, X2_sentiments_test, y2_sentiments_train, y2_sentiments_test = train_test_split(X, y_sentiments, test_size=0.2)

### 2.5.1 - Base MNB

In [None]:
base2_MNB_emotions_model = base_MNB.fit(X2_emotions_train, y2_emotions_train)
y2_base_MNB_emotions_pred = base2_MNB_emotions_model.predict(X2_emotions_test)

base2_MNB_sentiments_model = base_MNB.fit(X2_sentiments_train, y2_sentiments_train)
y2_base_MNB_sentiments_pred = base2_MNB_sentiments_model.predict(X2_sentiments_test)

print("Second Base-MNB with the default parameters")

# evaluate classifier
print("\nBase-MNB Emotions Classification Report:\n", classification_report(y2_emotions_test, y2_base_MNB_emotions_pred, target_names=emotion_labels))
print("\nBase-MNB Sentiments Classification Report:\n", classification_report(y2_sentiments_test, y2_base_MNB_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nBase-MNB Emotions Confusion Matrix:\n", confusion_matrix(y2_emotions_test, y2_base_MNB_emotions_pred))
print("\nBase-MNB Sentiments Confusion Matrix:\n", confusion_matrix(y2_sentiments_test, y2_base_MNB_sentiments_pred))

### 2.5.2 - Base DT

In [None]:
base2_DT_emotions_model = base_DT.fit(X2_emotions_train, y2_emotions_train)
y2_base_DT_emotions_pred = base2_DT_emotions_model.predict(X2_emotions_test)

base2_DT_sentiments_model = base_DT.fit(X2_sentiments_train, y2_sentiments_train)
y2_base_DT_sentiments_pred = base2_DT_sentiments_model.predict(X2_sentiments_test)

print("Second Base-DT with the default parameters")

# evaluate classifier
print("\nBase-DT Emotions Classification Report:\n", classification_report(y2_emotions_test, y2_base_DT_emotions_pred, target_names=emotion_labels))
print("\nBase-DT Sentiments Classification Report:\n", classification_report(y2_sentiments_test, y2_base_DT_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nBase-DT Emotions Confusion Matrix:\n", confusion_matrix(y2_emotions_test, y2_base_DT_emotions_pred))
print("\nBase-DT Sentiments Confusion Matrix:\n", confusion_matrix(y2_sentiments_test, y2_base_DT_sentiments_pred))

### 2.5.3 - Base MLP

In [None]:
base2_MLP_emotions_model = base_MLP.fit(X2_emotions_train, y2_emotions_train)
y2_base_MLP_emotions_pred = base2_MLP_emotions_model.predict(X2_emotions_test)

base2_MLP_sentiments_model = base_MLP.fit(X2_sentiments_train, y2_sentiments_train)
y2_base_MLP_sentiments_pred = base2_MLP_sentiments_model.predict(X2_sentiments_test)

print("Second Base-MLP with the default parameters")

# evaluate classifier
print("\nBase-MLP Emotions Classification Report:\n", classification_report(y2_emotions_test, y2_base_MLP_emotions_pred, target_names=emotion_labels))
print("\nBase-MLP Sentiments Classification Report:\n", classification_report(y2_sentiments_test, y2_base_MLP_sentiments_pred, target_names=sentiment_labels))

# show confusion Matrix
print("\nBase-MLP Emotions Confusion Matrix:\n", confusion_matrix(y2_emotions_test, y2_base_MLP_emotions_pred))
print("\nBase-MLP Sentiments Confusion Matrix:\n", confusion_matrix(y2_sentiments_test, y2_base_MLP_sentiments_pred))

### 2.5.4 - Top MNB

In [None]:
top2_MNB_hyper_params = {
    # Because an alpha too small will result in numeric errors, 0 is set as 1.0e-10
    'alpha': [1.0e-10, 0.5, 1.5, 3.0]
}

top2_MNB_emotions = MultinomialNB(class_prior=emotion_freq)
top2_MNB_sentiments = MultinomialNB(class_prior=sentiment_freq)

top2_MNB_emotions_grid_search = GridSearchCV(estimator=top_MNB_emotions, param_grid=top_MNB_hyper_params)
top2_MNB_sentiments_grid_search = GridSearchCV(estimator=top_MNB_sentiments, param_grid=top_MNB_hyper_params)

top2_MNB_emotions_model = top_MNB_emotions_grid_search.fit(X_emotions_train, y_emotions_train)
y2_top_MNB_emotions_pred = top_MNB_emotions_model.predict(X_emotions_test)
# print('best params for emotions MNB: ', top_MNB_emotions_model.best_params_)

top2_MNB_sentiments_model = top2_MNB_sentiments_grid_search.fit(X_sentiments_train, y_sentiments_train)
y2_top_MNB_sentiments_pred = top2_MNB_sentiments_model.predict(X_sentiments_test)
# print('best params for sentiments MNB: ', top_MNB_sentiments_model.best_params_)

### 2.5.5 - Top DT

In [None]:
top2_DT_hyper_params = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [5, 10],
    'min_samples_split': [2, 3, 4]
}

top2_DT = DecisionTreeClassifier()
top2_DT_grid_search = GridSearchCV(estimator=top_DT, param_grid=top_DT_hyper_params)

top2_DT_emotions_model = top2_DT_grid_search.fit(X_emotions_train, y_emotions_train)
y2_top_DT_emotions_pred = top2_DT_emotions_model.predict(X_emotions_test)

top2_DT_sentiments_model = top2_DT_grid_search.fit(X_sentiments_train, y_sentiments_train)
y2_top_DT_sentiments_pred = top2_DT_sentiments_model.predict(X_sentiments_test)

### 2.5.6 - Top MLP

In [None]:
top2_MLP_hyper_params = {
    'activation': ['logistic', 'tanh', 'relu', 'identity'],
    'hidden_layer_sizes': [(30, 50), (10, 10, 10)],
    'solver': ['adam', 'sgd']
}

top2_MLP = MLPClassifier(max_iter=5) ### talk about low epochs in analysis
top2_MLP_grid_search = GridSearchCV(estimator=top_MLP, param_grid=top_MLP_hyper_params)

top2_MLP_emotions_model = top2_MLP_grid_search.fit(X_emotions_train, y_emotions_train)
y2_top_MLP_emotions_pred = top2_MLP_emotions_model.predict(X_emotions_test)

top2_MLP_sentiments_model = top2_MLP_grid_search.fit(X_sentiments_train, y_sentiments_train)
y2_top_MLP_sentiments_pred = top2_MLP_sentiments_model.predict(X_sentiments_test)