In [1]:
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.simplefilter("ignore") 

from matplotlib import test
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay
from collections import Counter
from pretty_confusion_matrix import pp_matrix #pip install pretty_confusion_matrix

import gensim.downloader as api
from nltk import word_tokenize
import json
import matplotlib.pyplot as plt
import gzip
import pandas as pd
import copy
import os
import numpy as np
from fpdf import FPDF #pip install fpdf2 #pip install fpdf
from PIL import Image

# Part 1

In [2]:
sentiments = []
emotions = []
comments = []

commentsTrainVector = None
commentsTestVector = None

In [3]:
#1.2
emotionsGZIP = gzip.open("./goemotions.json.gz", "rb")
emotionsJSON = json.load(emotionsGZIP)

In [4]:
def createPieChart(dict, dictName):
    labels = []
    values = []
    for x,y in dict.items():
        labels.append(x)
        values.append(y)
    plt.pie(values, labels=labels, autopct=lambda p:f'{p:.2f}%, {p*sum(values)/100 :.0f}')
    plt.savefig(fname='./graphs/'+dictName+'.pdf', format='pdf')
    plt.clf()
    

In [5]:
#1.3
for value in emotionsJSON:
        emotions.append(value[1])
        sentiments.append(value[2])
        comments.append(value[0])

createPieChart(Counter(emotions), 'emotions_with_values')
createPieChart(Counter(sentiments), 'sentiments_with_values')

<Figure size 640x480 with 0 Axes>

# Part 2

In [None]:
#2.1
vectorizer = CountVectorizer()
X  = vectorizer.fit(comments)
print("Vocabulary size: ", len(X.vocabulary_))

In [14]:
#2.2
comments_train, comments_test, sentiments_train, sentiments_test, emotions_train, emotions_test = train_test_split(comments, sentiments, emotions, test_size=0.2, random_state=0)

## 2.3

In [None]:
#2.3
vectorizer = CountVectorizer()
commentsTrainVector = vectorizer.fit_transform(comments_train)
commentsTestVector = vectorizer.transform(comments_test)


In [15]:


def getBaseClassifiersPredictions(classifier, commentsTrainVector, commentsTestVector, sentiments_train, emotions_train):

    #emotions
    classifier.fit(commentsTrainVector, emotions_train)
    emotions_classifier = copy.deepcopy(classifier)
    emotions_pred = classifier.predict(commentsTestVector)
    #print(emotions_pred)

    #sentiments
    classifier.fit(commentsTrainVector, sentiments_train)
    sentiments_classifier = copy.deepcopy(classifier)
    sentiments_pred = classifier.predict(commentsTestVector)
    #print(sentiments_pred)

    return emotions_pred, sentiments_pred, emotions_classifier, sentiments_classifier

def getGridSearchWithModelAndParams(model, params, cvCount, jobs, commentsTrainVector, commentsTestVector, sentiments_train, emotions_train):
    #Setup GridSearch and hyperparams
    tunedClassifier = GridSearchCV(model, params, cv=cvCount, n_jobs=jobs)
    hyperparams = list(params.keys())
    hyperparamsWithParams = ['param_' + paramInList for paramInList in hyperparams]
    
    #emotions
    tunedClassifier.fit(commentsTrainVector, emotions_train)
    emotions_tunedClassifier = copy.deepcopy(tunedClassifier)
    emotions_cv_results = tunedClassifier.cv_results_
    df = pd.DataFrame(emotions_cv_results)
    
    emotions_pred = tunedClassifier.predict(commentsTestVector)
    
    print(df[hyperparamsWithParams])
    print(tunedClassifier.best_score_)
    print(tunedClassifier.best_params_)
    print(tunedClassifier.predict(commentsTestVector))

    #sentiments
    tunedClassifier.fit(commentsTrainVector, sentiments_train)
    sentiments_tunedClassifier = copy.deepcopy(tunedClassifier)
    sentiments_cv_results = tunedClassifier.cv_results_
    df = pd.DataFrame(sentiments_cv_results)

    sentiments_pred = tunedClassifier.predict(commentsTestVector)
    
    print(df[hyperparamsWithParams])
    print(tunedClassifier.best_score_)
    print(tunedClassifier.best_params_)
    print(tunedClassifier.predict(commentsTestVector))

    return emotions_pred, sentiments_pred, emotions_tunedClassifier, sentiments_tunedClassifier

In [None]:
#2.3.1
emotions_baseMNB_pred, sentiments_baseMNB_pred, emotions_baseMNB_classifier, sentiments_baseMNB_classifier = getBaseClassifiersPredictions(MultinomialNB(), commentsTrainVector, commentsTestVector, sentiments_train, emotions_train)

In [None]:
#2.3.2
emotions_baseDT_pred, sentiments_baseDT_pred, emotions_baseDT_classifier, sentiments_baseDT_classifier = getBaseClassifiersPredictions(DecisionTreeClassifier(random_state = 0), commentsTrainVector, commentsTestVector, sentiments_train, emotions_train)

In [None]:
#2.3.3
emotions_baseMLP_pred, sentiments_baseMLP_pred, emotions_baseMLP_classifier, sentiments_baseMLP_classifier = getBaseClassifiersPredictions(MLPClassifier(random_state = 0, max_iter=2), commentsTrainVector, commentsTestVector, sentiments_train, emotions_train)

In [None]:
 #2.3.4
mnb_classifier = MultinomialNB()
mnb_params = {
    'alpha': [0, 0.5, 1, 10]
    }
emotions_topMNB_pred, sentiments_topMNB_pred, emotions_topMNB_classifier, sentiments_topMNB_classifier = getGridSearchWithModelAndParams(mnb_classifier, mnb_params, 10, 2, commentsTrainVector, commentsTestVector, sentiments_train, emotions_train)

In [13]:
#2.3.5
dt_classifier = DecisionTreeClassifier(random_state = 0)
dt_params = {
    'criterion': ['entropy'],
    'max_depth': [400, 600],
    'min_samples_split': [4,6,8]
}
emotions_topDT_pred, sentiments_topDT_pred, emotions_topDT_classifier, sentiments_topDT_classifier = getGridSearchWithModelAndParams(dt_classifier, dt_params, 5, 2, commentsTrainVector, commentsTestVector, sentiments_train, emotions_train)

In [None]:
#2.3.6
mlp_classifier = MLPClassifier(random_state = 0)
mlp_params = {
    'activation': ['logistic', 'tanh', 'relu', 'identity'],
    'hidden_layer_sizes': [(10, 30), (8, 8, 8)],
    'solver': ['adam', 'sgd'],
    'max_iter': [2]
    }
emotions_topMLP_pred, sentiments_topMLP_pred, emotions_topMLP_classifier, sentiments_topMLP_classifier = getGridSearchWithModelAndParams(mlp_classifier, mlp_params, 5, 2, commentsTrainVector, commentsTestVector, sentiments_train, emotions_train)

## 2.4

In [None]:

def getNewNameFileInPrecisionFolder(fName, extension):
    i = 0
    fullFileName = "./precision/"+fName+"_%s"+extension
    while os.path.exists( fullFileName % i):
        i += 1

    return fullFileName%i

def generateConfusionMatrix(clf, y_test, y_pred, figureFileName):
    #get confusion_matrix
    cm =  confusion_matrix(y_test,y_pred)
    
    #Create graph
    cmd = ConfusionMatrixDisplay( confusion_matrix = cm, display_labels= clf.classes_)
    fig, ax = plt.subplots(figsize=(15,15))

    cmd.plot(cmap=plt.cm.Blues, ax= ax, xticks_rotation='vertical')

    figurePath = getNewNameFileInPrecisionFolder('confusion_matrix_figures/'+figureFileName, '.png')
    
    #Save graph
    cmd.figure_.savefig(figurePath)

    return cm, figurePath

def generateClassificationReport(clf, y_test, y_pred, reportFileName):
    report = classification_report(y_test, y_pred, labels= clf.classes_)

    #Create file
    filePath = getNewNameFileInPrecisionFolder('classification_reports/'+reportFileName, '.txt')
    fo = open(filePath, 'wb')
    fo.write(str.encode(report))
    fo.close()
    return report, filePath


def addImage(pdf, path):
    cover = Image.open(path)
    width, height = cover.size
    width, height = float(width * 0.264583), float(height * 0.264583)
    pdf.add_page(format=(width, height))
    pdf.image(path, 0, 0, width, height)

def createPrecisionReport(modelName=None, hyperParams=None, emotions_pred=None, sentiments_pred=None, emotions_classifier=None, sentiments_classifier=None, emotions_test= emotions_test, sentiments_test= sentiments_test):
    
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font(family='Arial', size=14)
    
    #Description
    pdf.multi_cell(0, 5,'Model: \t'+modelName+'\n')
    pdf.multi_cell(0, 5,'HyperParams: \n')
    if hyperParams is not None:
        pdf.multi_cell(0, 5,json.dumps(hyperParams, indent=4)+'\n')
    pdf.multi_cell(0, 5,'\n')

    #Emotions
    cm_emotions, figurePath_emotions = generateConfusionMatrix(emotions_classifier, emotions_test, emotions_pred, modelName+'_emotions')
    report_emotions, reportPath_emotions = generateClassificationReport(emotions_classifier, emotions_test, emotions_pred, modelName+'_emotions')

    pdf.multi_cell(0, 5,'Classifications: Emotions\n')
    pdf.multi_cell(0, 5,'Confusion Matrix:\n')
    pdf.multi_cell(0, 5,'View Visual matrix at: ' + figurePath_emotions +'\n', align='L')
    pdf.multi_cell(0, 5, np.array2string(cm_emotions)+'\n')
    pdf.multi_cell(0, 5,'Classsification Report:\n')
    pdf.multi_cell(0, 5,'View formatted classification report at: ' + reportPath_emotions +'\n', align='L')
    pdf.multi_cell(0, 5, report_emotions+'\n')
    #Sentiments
    pdf.add_page()
    cm_sentiments, figurePath_sentiments = generateConfusionMatrix(sentiments_classifier, sentiments_test, sentiments_pred, modelName+'_sentiments')
    report_sentiments, reportPath_sentiments = generateClassificationReport(sentiments_classifier, sentiments_test, sentiments_pred, modelName+'_sentiments')

    pdf.multi_cell(0, 5,'Classifications: Emotions\n')
    pdf.multi_cell(0, 5,'Confusion Matrix:\n')
    pdf.multi_cell(0, 5,'View Visual matrix at: ' + figurePath_sentiments +'\n', align='L')
    pdf.multi_cell(0, 5, np.array2string(cm_sentiments)+'\n')
    pdf.multi_cell(0, 5,'Classsification Report:\n')
    pdf.multi_cell(0, 5,'View formatted classification report at: ' + reportPath_sentiments +'\n', align='L')
    pdf.multi_cell(0, 5, report_sentiments+'\n')
    filePath = getNewNameFileInPrecisionFolder(modelName, '.pdf')

    #Create file
    pdf.output(filePath, 'F')


In [None]:
#BaseMNB
createPrecisionReport(modelName = "base_MultinomialNB", emotions_pred = emotions_baseMNB_pred, sentiments_pred = sentiments_baseMNB_pred, emotions_classifier = emotions_baseMNB_classifier, sentiments_classifier = sentiments_baseMNB_classifier)

#BaseDT
createPrecisionReport(modelName = "base_DecisionTree", emotions_pred = emotions_baseDT_pred, sentiments_pred = sentiments_baseDT_pred, emotions_classifier = emotions_baseDT_classifier, sentiments_classifier = sentiments_baseDT_classifier)

#BaseMLP
createPrecisionReport(modelName = "base_MLP", emotions_pred = emotions_baseMLP_pred, sentiments_pred = sentiments_baseMLP_pred, emotions_classifier = emotions_baseMLP_classifier, sentiments_classifier = sentiments_baseMLP_classifier)

#TopMNB
createPrecisionReport(modelName = "top_MultinomialNB", hyperParams=mnb_params, emotions_pred = emotions_topMNB_pred, sentiments_pred = sentiments_topMNB_pred, emotions_classifier = emotions_topMNB_classifier, sentiments_classifier = sentiments_topMNB_classifier)

#TopDT
createPrecisionReport(modelName = "top_DecisionTree", hyperParams=dt_params, emotions_pred = emotions_topDT_pred, sentiments_pred = sentiments_topDT_pred, emotions_classifier = emotions_topDT_classifier, sentiments_classifier = sentiments_topDT_classifier)

#TopMLP
createPrecisionReport(modelName = "top_MLP", hyperParams=mlp_params, emotions_pred = emotions_topMLP_pred, sentiments_pred = sentiments_topMLP_pred, emotions_classifier = emotions_topMLP_classifier, sentiments_classifier = sentiments_topMLP_classifier)


# 2.5

In [None]:
comments_train_2_5, comments_test_2_5, sentiments_train_2_5, sentiments_test_2_5, emotions_train_2_5, emotions_test_2_5 = train_test_split(comments, sentiments, emotions, test_size=0.6, random_state=0)

commentsTrainVector_2_5 = vectorizer.fit_transform(comments_train_2_5)
commentsTestVector_2_5 = vectorizer.transform(comments_test_2_5)

#2.3.1
emotions_baseMNB_pred_2_5, sentiments_baseMNB_pred_2_5, emotions_baseMNB_classifier_2_5, sentiments_baseMNB_classifier_2_5 = getBaseClassifiersPredictions(MultinomialNB(), commentsTrainVector_2_5, commentsTestVector_2_5, sentiments_train_2_5, emotions_train_2_5)

#2.3.2
emotions_baseDT_pred_2_5, sentiments_baseDT_pred_2_5, emotions_baseDT_classifier_2_5, sentiments_baseDT_classifier_2_5 = getBaseClassifiersPredictions(DecisionTreeClassifier(random_state = 0), commentsTrainVector_2_5, commentsTestVector_2_5, sentiments_train_2_5, emotions_train_2_5)

#2.3.3
emotions_baseMLP_pred_2_5, sentiments_baseMLP_pred_2_5, emotions_baseMLP_classifier_2_5, sentiments_baseMLP_classifier_2_5 = getBaseClassifiersPredictions(MLPClassifier(max_iter=2,random_state = 0), commentsTrainVector_2_5, commentsTestVector_2_5, sentiments_train_2_5, emotions_train_2_5)

 #2.3.4
mnb_classifier = MultinomialNB()
emotions_topMNB_pred_2_5, sentiments_topMNB_pred_2_5, emotions_topMNB_classifier_2_5, sentiments_topMNB_classifier_2_5 = getGridSearchWithModelAndParams(mnb_classifier, mnb_params, 10, 2, commentsTrainVector_2_5, commentsTestVector_2_5, sentiments_train_2_5, emotions_train_2_5)

#2.3.5
dt_classifier = DecisionTreeClassifier(random_state = 0)
emotions_topDT_pred_2_5, sentiments_topDT_pred_2_5, emotions_topDT_classifier_2_5, sentiments_topDT_classifier_2_5 = getGridSearchWithModelAndParams(dt_classifier, dt_params, 5, 2, commentsTrainVector_2_5, commentsTestVector_2_5, sentiments_train_2_5, emotions_train_2_5)

#2.3.6
mlp_classifier = MLPClassifier(random_state = 0)
emotions_topMLP_pred_2_5, sentiments_topMLP_pred_2_5, emotions_topMLP_classifier_2_5, sentiments_topMLP_classifier_2_5 = getGridSearchWithModelAndParams(mlp_classifier, mlp_params, 5, 2, commentsTrainVector_2_5, commentsTestVector_2_5, sentiments_train_2_5, emotions_train_2_5)

#BaseMNB
createPrecisionReport(modelName = "base_MultinomialNB_2.5", emotions_pred = emotions_baseMNB_pred_2_5, sentiments_pred = sentiments_baseMNB_pred, emotions_classifier = emotions_baseMNB_classifier_2_5, sentiments_classifier = sentiments_baseMNB_classifier_2_5, emotions_test= emotions_test_2_5, sentiments_test= sentiments_test_2_5)

#BaseDT
createPrecisionReport(modelName = "base_DecisionTree_2.5", emotions_pred = emotions_baseDT_pred_2_5, sentiments_pred = sentiments_baseDT_pred_2_5, emotions_classifier = emotions_baseDT_classifier_2_5, sentiments_classifier = sentiments_baseDT_classifier_2_5, emotions_test= emotions_test_2_5, sentiments_test= sentiments_test_2_5)

#BaseMLP
createPrecisionReport(modelName = "base_MLP_2.5", emotions_pred = emotions_baseMLP_pred_2_5, sentiments_pred = sentiments_baseMLP_pred_2_5, emotions_classifier = emotions_baseMLP_classifier, sentiments_classifier = sentiments_baseMLP_classifier_2_5, emotions_test= emotions_test_2_5, sentiments_test= sentiments_test_2_5)

#TopMNB
createPrecisionReport(modelName = "top_MultinomialNB_2.5", hyperParams=mnb_params, emotions_pred = emotions_topMNB_pred_2_5, sentiments_pred = sentiments_topMNB_pred_2_5, emotions_classifier = emotions_topMNB_classifier_2_5, sentiments_classifier = sentiments_topMNB_classifier_2_5, emotions_test= emotions_test_2_5, sentiments_test= sentiments_test_2_5)

#TopDT
createPrecisionReport(modelName = "top_DecisionTree_2.5", hyperParams=dt_params, emotions_pred = emotions_topDT_pred_2_5, sentiments_pred = sentiments_topDT_pred_2_5, emotions_classifier = emotions_topDT_classifier_2_5, sentiments_classifier = sentiments_topDT_classifier_2_5, emotions_test= emotions_test_2_5, sentiments_test= sentiments_test_2_5)

#TopMLP
createPrecisionReport(modelName = "top_MLP_2.5", hyperParams=mlp_params, emotions_pred = emotions_topMLP_pred_2_5, sentiments_pred = sentiments_topMLP_pred_2_5, emotions_classifier = emotions_topMLP_classifier_2_5, sentiments_classifier = sentiments_topMLP_classifier_2_5, emotions_test= emotions_test_2_5, sentiments_test= sentiments_test_2_5)


3.1

In [6]:
model = api.load("word2vec-google-news-300")

3.2

In [7]:
#3.2
tokenized_comments = []
totalToken = 0

for comment in comments:
    tokenized_comments.append(word_tokenize(comment))
    totalToken = totalToken + len(tokenized_comments)

print('Total token count: ', totalToken)

Total token count:  14761142110


3.3

In [8]:
avgs = []
w2vec_dict = dict(zip(model.key_to_index.keys(),  model.vectors))
hits = 0
totalWords = 0
for soloTokenComment in tokenized_comments:
    commentVector = []
    foundCounter = 0
    commentVector = [0 for i in range(300)]
    for token in soloTokenComment:
        totalWords = totalWords + 1
        if token in w2vec_dict:
            hits = hits + 1
            foundCounter = foundCounter + 1
            commentVector = [x+y for x,y in zip(commentVector,w2vec_dict[token])]
    
    if foundCounter != 0: 
        averagedCommentVector = [colVal / foundCounter for colVal in commentVector]
    
    avgs.append(averagedCommentVector)

print(hits / totalWords)

0.7745063827339175


3.5

In [17]:
comments_train, comments_test, sentiments_train, sentiments_test, emotions_train, emotions_test = train_test_split(avgs, sentiments, emotions, test_size=0.2, random_state=0)
clf_S = MLPClassifier().fit(comments_train, sentiments_train)

In [10]:

mlp_3classifier = MLPClassifier(
    hidden_layer_sizes=  (10, 30),
    activation= 'tanh',
    solver =  'adam',
    max_iter = 2,
    random_state = 0)

mlp_3params = {
    'activation': ['logistic', 'tanh', 'relu', 'identity'],
    'hidden_layer_sizes': [(10, 30), (8, 8, 8)],
    'solver': ['adam', 'sgd'],
    'max_iter': [2]
    }
clf_TOPS = mlp_3classifier.fit(comments_train, sentiments_train)

3.7

In [11]:
def generateConfusionMatrixEmbeddingsNoPrint(clf, y_test, y_pred, figureFileName):
    #get confusion_matrix
    cm =  confusion_matrix(y_test,y_pred)
    
    #Create graph
    cmd = ConfusionMatrixDisplay( confusion_matrix = cm)
    fig, ax = plt.subplots(figsize=(15,15))

    cmd.plot(cmap=plt.cm.Blues, ax= ax, xticks_rotation='vertical')

    figurePath = getNewNameFileInPrecisionFolder('confusion_matrix_figures/'+figureFileName, '.png')
    
    #Save graph
    cmd.figure_.savefig(figurePath)

    return cm, figurePath

def generateClassificationReportEmbeddingsNoPrint(clf, y_test, y_pred, reportFileName):
    report = classification_report(y_test, y_pred)

    #Create file
    filePath = getNewNameFileInPrecisionFolder('classification_reports/'+reportFileName, '.txt')
    fo = open(filePath, 'wb')
    fo.write(str.encode(report))
    fo.close()
    return report, filePath


def createPrecisionReportNoPrint(modelName=None, hyperParams=None, emotions_pred=None, sentiments_pred=None, emotions_classifier=None, sentiments_classifier=None, emotions_test= emotions_test, sentiments_test= sentiments_test):
    
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font(family='Arial', size=14)
    
    #Description
    pdf.multi_cell(0, 5,'Model: \t'+modelName+'\n')
    pdf.multi_cell(0, 5,'HyperParams: \n')
    if hyperParams is not None:
        pdf.multi_cell(0, 5,json.dumps(hyperParams, indent=4)+'\n')
    pdf.multi_cell(0, 5,'\n')

    #Emotions
    cm_emotions, figurePath_emotions = generateConfusionMatrixEmbeddingsNoPrint(emotions_classifier, emotions_test, emotions_pred, modelName+'_emotions')
    report_emotions, reportPath_emotions = generateClassificationReportEmbeddingsNoPrint(emotions_classifier, emotions_test, emotions_pred, modelName+'_emotions')

    pdf.multi_cell(0, 5,'Classifications: Emotions\n')
    pdf.multi_cell(0, 5,'Confusion Matrix:\n')
    pdf.multi_cell(0, 5,'View Visual matrix at: ' + figurePath_emotions +'\n', align='L')
    pdf.multi_cell(0, 5, np.array2string(cm_emotions)+'\n')
    pdf.multi_cell(0, 5,'Classsification Report:\n')
    pdf.multi_cell(0, 5,'View formatted classification report at: ' + reportPath_emotions +'\n', align='L')
    pdf.multi_cell(0, 5, report_emotions+'\n')
    #Sentiments
    pdf.add_page()
    cm_sentiments, figurePath_sentiments = generateConfusionMatrixEmbeddingsNoPrint(sentiments_classifier, sentiments_test, sentiments_pred, modelName+'_sentiments')
    report_sentiments, reportPath_sentiments = generateClassificationReportEmbeddingsNoPrint(sentiments_classifier, sentiments_test, sentiments_pred, modelName+'_sentiments')

    pdf.multi_cell(0, 5,'Classifications: Emotions\n')
    pdf.multi_cell(0, 5,'Confusion Matrix:\n')
    pdf.multi_cell(0, 5,'View Visual matrix at: ' + figurePath_sentiments +'\n', align='L')
    pdf.multi_cell(0, 5, np.array2string(cm_sentiments)+'\n')
    pdf.multi_cell(0, 5,'Classsification Report:\n')
    pdf.multi_cell(0, 5,'View formatted classification report at: ' + reportPath_sentiments +'\n', align='L')
    pdf.multi_cell(0, 5, report_sentiments+'\n')
    filePath = getNewNameFileInPrecisionFolder(modelName, '.pdf')

    #Create file
    pdf.output(filePath, 'F')

In [None]:
#BaseMLP


emotions_baseMLP_pred_3_4, sentiments_baseMLP_pred_3_4, emotions_baseMLP_classifier_3_4, sentiments_baseMLP_classifier_3_4 = getBaseClassifiersPredictions(clf_S, comments_train, comments_test, sentiments_train, emotions_train)
emotions_topMLP_pred_3_5, sentiments_topMLP_pred_3_5, emotions_topMLP_classifier_3_5, sentiments_topMLP_classifier_3_5 = getGridSearchWithModelAndParams(clf_TOPS, mlp_3params, 5, 2, comments_train, comments_test, sentiments_train, emotions_train)


createPrecisionReportNoPrint(modelName = "base_MLP_3.4", emotions_pred = emotions_baseMLP_pred_3_4, sentiments_pred = sentiments_baseMLP_pred_3_4, emotions_classifier = emotions_baseMLP_classifier, sentiments_classifier = sentiments_baseMLP_classifier_3_4, emotions_test= emotions_test, sentiments_test= sentiments_test)
createPrecisionReportNoPrint(modelName = "top_MLP_3.5", hyperParams=mlp_3params, emotions_pred = emotions_topMLP_pred_3_5, sentiments_pred = sentiments_topMLP_pred_3_5, emotions_classifier = emotions_topMLP_classifier_3_5, sentiments_classifier = sentiments_topMLP_classifier_3_5, emotions_test= emotions_test, sentiments_test= sentiments_test)


In [2]:
w2v_model = api.load('‘GoogleNews-vectors-negative300')



ValueError: Incorrect model/corpus name

In [None]:


wiki_avgs = []
wiki_w2vec_dict = dict(zip(w2v_model.key_to_index.keys(),  w2v_model.vectors))
for soloTokenComment in tokenized_comments:
    commentVector = []
    foundCounter = 0
    commentVector = [0 for i in range(300)]
    for token in soloTokenComment:
        if token in wiki_w2vec_dict:
            foundCounter = foundCounter + 1
            commentVector = [x+y for x,y in zip(commentVector,wiki_w2vec_dict[token])]
    
    if foundCounter != 0: 
        averagedCommentVector = [colVal / foundCounter for colVal in commentVector]
    
    wiki_avgs.append(averagedCommentVector)




In [None]:
comments_train, comments_test, sentiments_train, sentiments_test, emotions_train, emotions_test = train_test_split(wiki_avgs, sentiments, emotions, test_size=0.2, random_state=0)
wiki = MLPClassifier(
    hidden_layer_sizes=  (10, 30),
    activation= 'tanh',
    solver =  'adam',
    max_iter = 2,
    random_state = 0).fit(comments_train, sentiments_train)