### Sentiment Analysis using a Fully Connected Neural Network

In [1]:
import numpy as np
import pandas as pd

In [2]:
train = pd.read_csv("../train.csv")
val = pd.read_csv("../val.csv")

In [3]:
from sklearn.feature_extraction.text import CountVectorizer

In [4]:
vectorizer = CountVectorizer(
max_features = 2000)

In [5]:
train_words = train.iloc[:,2]
train_labels = train.iloc[:,1]
to_class = lambda x : 1 if x <= 0.20 else 2 if x <= 0.4 else 3 if x <=0.6 else 4 if x <= 0.8 else 5 if x <= 1 else 0
train_labels = train_labels.apply(to_class)

In [6]:
train_features = vectorizer.fit_transform(train_words)

In [7]:
val_words = val.iloc[:,2]
val_labels = val.iloc[:,1]
val_labels = val_labels.apply(to_class)
val_features = vectorizer.transform(val_words)

### Fully Connected Neural Network

In [8]:
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

In [9]:
neural_net = MLPClassifier(verbose=True, warm_start=True)

In [11]:
neural_net.fit(train_features, train_labels)

Iteration 158, loss = 0.46293585


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=True,
              warm_start=True)

In [12]:
val_pred = neural_net.predict(val_features)

In [13]:
from sklearn.metrics import accuracy_score
accuracy_score(val_pred, val_labels)

0.607992308656941

### Graphing

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import rc
sns.set_style("white")
rc('text', usetex=True)

In [None]:
sns.boxplot(val_pred, val_labels)
plt.xlabel("Predicted Value", fontsize="medium")
plt.ylabel("Actual Value", fontsize="medium")
plt.title("Neural Net Performance")
plt.tight_layout()
plt.savefig("neural net box.png", bbox_inches="tight", dpi=400)

In [None]:
sns.violinplot(val_pred, val_labels)
plt.xlabel("Predicted Value", fontsize="medium")
plt.ylabel("Actual Value", fontsize="medium")
plt.title("Neural Net Performance")
plt.tight_layout()
plt.savefig("visualization/neural net violin.png", bbox_inches="tight", dpi=400)