# Evaluation for sentiment analysis

## Train-Test evaluation set against Review dataset

In [1]:
import os

label_dir = {
    "positive": ["data/positive"],
    "negative": ["data/negative"]
}

data = []
labels = []

for label in label_dir.keys():
    for sub_dir in label_dir[label]:
        for file in os.listdir(sub_dir):
            filepath = sub_dir + os.sep + file
            with open(filepath, encoding='utf8', errors='ignore', mode='r') as review:
                content = review.read()
                data.append(content)
                labels.append(label)
                
# data, labels

In [2]:
# stemmer for CountVectorizer
def stemmed_words(doc):
    from nltk.stem.snowball import PorterStemmer
    from sklearn.feature_extraction.text import CountVectorizer
    from nltk.corpus import stopwords

    p_stemmer = PorterStemmer()
    analyzer = CountVectorizer(stop_words=stopwords.words("english"), ngram_range=(1, 2)).build_analyzer()
    return (p_stemmer.stem(word.lower()) for word in analyzer(doc.lower()))

In [17]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.utils import resample
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

# get dataset
X, y = data, labels

# bootstrap to get more data (sample size = 50%)
data_sample, label_sample = resample(data, labels, n_samples=int(len(data) * 0.50), random_state=1)

# split dataset into training & testing
X_train, X_test, y_train, y_test = train_test_split(X+data_sample, y+label_sample, test_size=0.3, random_state =42)

# text preprocessing and vectorisation
vectoriser = TfidfVectorizer(analyzer=stemmed_words)
X_train_vec = vectoriser.fit_transform(X_train)

# create classifier
classifier = MultinomialNB().fit(X_train_vec, y_train)

# predict on test set
X_test_vec = vectoriser.transform(X_test)
y_pred = classifier.predict(X_test_vec)

# classifier evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9533333333333334
Confusion Matrix:
 [[213   6]
 [ 15 216]]
Classification Report:
               precision    recall  f1-score   support

    negative       0.93      0.97      0.95       219
    positive       0.97      0.94      0.95       231

    accuracy                           0.95       450
   macro avg       0.95      0.95      0.95       450
weighted avg       0.95      0.95      0.95       450


# Train-Test evaluation set against new small talk responses

In [46]:
new_data_positive = ["I'm great", "I feel happy", "Everything is awesome", "I'm doing well today", "I'm so happy today", "I'm feeling fantastic", "I'm in a good mood", "I'm feeling positive", "I'm in high spirits", "I'm feeling wonderful", "Today is a good day", "I'm in a very good mood", "I'm feeling absolutely great", "I'm feeling excellent", "I'm doing superb", "Feeling really positive today", "I'm doing wonderfully", "I'm feeling fabulous", "I'm doing awesome, thanks", "I'm feeling pretty fantastic", "I'm in a great place", "I'm loving it", "I'm feeling energised", "I'm having an incredible day", "I'm doing amazing"]

new_data_negative = ["I'm feeling awful", "I'm not great today", "I'm pretty sad", "I'm feeling down", "I feel awful", "I'm feeling terrible", "I'm doing poorly", "I’m having a tough day", "I'm feeling depressed", "I'm feeling miserable", "Everything is bad", "I'm feeling so negative", "I'm not doing okay today", "I'm really upset today", "I'm feeling upset", "Everything is bad", "I'm feeling very down", "Everything is just bad", "Today is a bad day", "I'm feeling really negative", "I'm in a bad place", "I am not good at all", "I am so bad", "I feel very bad right now", "I am feeling really down"]

25

In [48]:
# get new test sets
new_data = new_data_positive + new_data_negative
new_labels = ["positive"]*len(new_data_positive) + ["negative"]*len(new_data_negative)

# create new classifier, training all IMBD dataset
vectoriser = TfidfVectorizer(analyzer=stemmed_words)
X_train_vec = vectoriser.fit_transform(X+data_sample)
classifier = MultinomialNB().fit(X_train_vec, y+label_sample)

# predict on new test set
X_test_vec = vectoriser.transform(new_data)
y_pred = classifier.predict(X_test_vec)

# classifier evaluation
print("Accuracy:", accuracy_score(new_labels, y_pred))
print("Confusion Matrix:\n", confusion_matrix(new_labels, y_pred))
print("Classification Report:\n", classification_report(new_labels, y_pred))

Accuracy: 0.84
Confusion Matrix:
 [[21  4]
 [ 4 21]]
Classification Report:
               precision    recall  f1-score   support

    negative       0.84      0.84      0.84        25
    positive       0.84      0.84      0.84        25

    accuracy                           0.84        50
   macro avg       0.84      0.84      0.84        50
weighted avg       0.84      0.84      0.84        50
