# Sentiment Classifier

This notebook contains code for XGBoost to classify the vector representations of sentences.

### Import Libraries

In [66]:
!pip install xgboost



In [88]:
import re
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import pandas as pd

import xgboost
from sklearn import metrics
import matplotlib.pyplot as plt

# from gensim.models import Word2Vec
# from nltk import regexp_tokenize, WordNetLemmatizer

In [68]:
import warnings
warnings.filterwarnings("ignore")

### Input

In [80]:
df_positive = pd.read_csv('positive.csv')
df_positive['y'] = 1

df_negative = pd.read_csv('negative.csv')
df_negative['y'] = 0

df_train = pd.concat([df_positive, df_negative], axis=0, ignore_index=True, sort=False).drop(columns=['Unnamed: 0'])
df_train = df_train.reset_index(drop=True)
df_train

x_train, y_train = df_train.drop('y', axis=1), df_train['y']

In [81]:
df_positive_gen = pd.read_csv('positive_generated.csv')
df_positive_gen['y'] = 1

df_negative_gen = pd.read_csv('negative_generated.csv')
df_negative_gen['y'] = 0

df_test = pd.concat([df_positive_gen, df_negative_gen], axis=0, ignore_index=True, sort=False).drop(columns=['Unnamed: 0'])
df_test = df_test.reset_index(drop=True)
df_test

x_test, y_test = df_test.drop('y', axis=1), df_test['y']

In [82]:
x_train

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x90,x91,x92,x93,x94,x95,x96,x97,x98,x99
0,0.000484,-0.001555,0.000901,0.003060,-0.000354,0.003025,0.002802,-0.000805,-0.004950,0.000273,...,0.002631,-0.004313,-0.001466,0.001000,0.000482,-0.004365,0.002265,0.000104,0.002418,-0.000588
1,-0.002626,0.004408,-0.001750,0.002761,-0.002218,-0.001867,0.000856,0.002642,-0.004448,-0.000254,...,0.000543,0.003214,0.004456,-0.001625,0.004442,0.002349,0.001591,-0.002265,-0.000418,0.003222
2,-0.004296,0.003443,-0.000870,0.002275,0.007596,0.000530,-0.001178,0.004529,-0.005782,-0.000132,...,-0.001725,0.002178,0.005754,-0.006012,0.002900,0.004035,0.005682,-0.001018,-0.000608,-0.001250
3,-0.003848,0.002243,0.003664,0.008289,-0.007108,-0.005873,0.000278,0.006246,-0.004882,0.002286,...,-0.000876,0.001883,-0.001149,-0.004258,0.002196,0.005888,-0.001231,0.001182,0.003990,0.006630
4,-0.006716,0.001345,-0.003990,0.003215,0.001312,-0.005515,-0.000634,0.006152,0.001488,-0.005713,...,-0.008173,0.003808,0.001652,0.002432,0.009852,-0.003864,-0.001543,-0.000846,0.003439,-0.001001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
332,0.003769,-0.002087,0.002018,0.005033,-0.004732,-0.002996,0.006310,0.004513,-0.004171,-0.002658,...,-0.000525,0.000322,-0.000029,-0.000110,0.004489,0.002951,-0.000323,-0.006393,-0.000369,0.005986
333,-0.007230,0.004255,0.002167,0.007470,-0.004872,-0.004581,-0.006076,0.003348,-0.004548,0.008493,...,-0.003462,0.003495,-0.005792,-0.008765,-0.005478,0.006755,0.006461,0.009435,0.007062,0.006767
334,-0.008933,0.009129,0.001981,0.003656,0.005645,-0.000596,0.006284,0.001293,-0.000411,-0.001655,...,-0.006237,0.000029,0.001056,-0.002060,0.009834,0.003952,0.004186,-0.000170,0.001693,-0.005563
335,0.008165,-0.004448,0.008973,0.008266,-0.004409,0.000304,0.004289,-0.003908,-0.005582,-0.006522,...,0.002061,-0.004005,-0.008224,0.006281,-0.001928,-0.000659,-0.001748,-0.004542,0.004061,-0.004275


In [83]:
x_test

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x90,x91,x92,x93,x94,x95,x96,x97,x98,x99
0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1,-0.000731,0.000322,0.006957,0.012282,-0.012682,-0.009702,0.008805,0.012232,-0.006837,-0.00513,...,0.002224,0.000259,0.004735,0.000297,0.013113,0.006899,-0.012157,-0.009599,0.001229,0.008715
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,-0.000731,0.000322,0.006957,0.012282,-0.012682,-0.009702,0.008805,0.012232,-0.006837,-0.00513,...,0.002224,0.000259,0.004735,0.000297,0.013113,0.006899,-0.012157,-0.009599,0.001229,0.008715
4,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
332,-0.000721,0.000318,0.006861,0.012112,-0.012507,-0.009568,0.008683,0.012063,-0.006743,-0.00506,...,0.002193,0.000255,0.004670,0.000293,0.012932,0.006804,-0.011989,-0.009467,0.001212,0.008594
333,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
334,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
335,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [71]:
preds

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [84]:
import time


xgb = xgboost.XGBClassifier(n_estimators=1000)

training_start = time.perf_counter()
xgb.fit(x_train, y_train)
training_end = time.perf_counter()

prediction_start = time.perf_counter()
preds = xgb.predict(x_test)
prediction_end = time.perf_counter()

acc_xgb = (preds == y_test).sum().astype(float) / len(preds)*100
xgb_train_time = training_end-training_start
xgb_prediction_time = prediction_end-prediction_start

print("XGBoost's prediction accuracy is: %3.2f" % (acc_xgb))
print("Time consumed for training: %4.3f" % (xgb_train_time))
print("Time consumed for prediction: %6.5f seconds" % (xgb_prediction_time))

XGBoost's prediction accuracy is: 45.99
Time consumed for training: 1.482
Time consumed for prediction: 0.00900 seconds


In [1]:
accuracy = metrics.accuracy_score(y_test, preds)
print(f"Accuracy: {accuracy*100:.2f}%")


confusion_matrix = metrics.confusion_matrix(y_test, preds)

cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix, display_labels = ["Negative", "Positive"])

cm_display.plot(cmap=plt.cm.Blues)
# plt.show()

NameError: name 'metrics' is not defined