### Sentiment Analysis using a Fully Connected Neural Network

In [162]:
import numpy as np
import pandas as pd

In [163]:
train = pd.read_csv("../train.csv")
val = pd.read_csv("../val.csv")

In [164]:
from sklearn.feature_extraction.text import CountVectorizer

In [165]:
vectorizer = CountVectorizer(max_features=3000) 

In [166]:
train_words = train.iloc[:,2]
train_labels = train.iloc[:,1]

In [167]:
def to_vec (arr): 
    one = [1, 0, 0, 0, 0]
    two = [0, 1, 0, 0, 0]
    three = [0, 0, 1, 0, 0]
    four = [0, 0, 0, 1, 0]
    five = [0, 0, 0, 0, 1]
    train_labels = []
    
    for i in range (0, arr.shape[0]): 
        x = arr.iloc[i]
        if x <= 0.2: 
            value = one
        elif x <= 0.4: 
            value = two
        elif x <= 0.6: 
            value = three
        elif x <= 0.8: 
            value = four 
        elif x <= 1:
            value = five
        else: 
            print(x)
            print(arr.iloc[x])
            assert(1 == 2)
        
        train_labels.append(value)
    
    train_labels = pd.DataFrame(train_labels)
    return train_labels

In [168]:
train_labels = to_vec(train_labels)
train_features = vectorizer.fit_transform(train_words)
print(train_features.shape)
print(train_labels.shape)

(191385, 3000)
(191385, 5)


In [169]:
val_words = val.iloc[:,2]
val_labels = val.iloc[:,1]
val_labels = to_vec(val_labels)
val_features = vectorizer.transform(val_words)
print(val_labels.shape)
print(val_features.shape)

(23923, 5)
(23923, 3000)


In [170]:
from sklearn import preprocessing 
train_features = preprocessing.normalize(train_features)
val_features = preprocessing.normalize(val_features)

### Fully Connected Neural Network

In [171]:
from sklearn.neural_network import MLPClassifier
from sklearn import metrics

In [172]:
neural_net = MLPClassifier(verbose=True, warm_start=True)

In [173]:
neural_net.fit(train_features, train_labels)

Iteration 1, loss = 1.93332744
Iteration 2, loss = 1.64101884
Iteration 3, loss = 1.59267137
Iteration 4, loss = 1.55789583
Iteration 5, loss = 1.52725970
Iteration 6, loss = 1.49924568
Iteration 7, loss = 1.47343806
Iteration 8, loss = 1.44925553
Iteration 9, loss = 1.42693463
Iteration 10, loss = 1.40537766
Iteration 11, loss = 1.38477637
Iteration 12, loss = 1.36526140
Iteration 13, loss = 1.34641358
Iteration 14, loss = 1.32850074
Iteration 15, loss = 1.31095700
Iteration 16, loss = 1.29422281
Iteration 17, loss = 1.27860788
Iteration 18, loss = 1.26356013
Iteration 19, loss = 1.24867858
Iteration 20, loss = 1.23465446
Iteration 21, loss = 1.22146103
Iteration 22, loss = 1.20920264
Iteration 23, loss = 1.19712781
Iteration 24, loss = 1.18579242
Iteration 25, loss = 1.17494915
Iteration 26, loss = 1.16416618
Iteration 27, loss = 1.15426712
Iteration 28, loss = 1.14443509
Iteration 29, loss = 1.13582294
Iteration 30, loss = 1.12655993
Iteration 31, loss = 1.11815276
Iteration 32, los



MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_fun=15000, max_iter=200,
              momentum=0.9, n_iter_no_change=10, nesterovs_momentum=True,
              power_t=0.5, random_state=None, shuffle=True, solver='adam',
              tol=0.0001, validation_fraction=0.1, verbose=True,
              warm_start=True)

### Metrics

In [174]:
val_pred = neural_net.predict(val_features)
val_pred = np.argmax(val_pred, axis=1)
val_pred = np.apply_along_axis(lambda x: x + 1, 0, val_pred)

In [179]:
val_labels.shape
val_labels = val_labels.to_numpy()
val_labels = np.argmax(val_labels, axis=1)
val_labels = np.apply_along_axis(lambda x: x + 1, 0, val_labels)

In [181]:
train_pred = neural_net.predict(train_features)

In [183]:
from sklearn.metrics import accuracy_score
accuracy_score(val_pred, val_labels)
accuracy_score(train_pred, train_labels)

0.802967839694856

### Graphing

In [None]:
import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib import rc
sns.set_style("white")
rc('text', usetex=True)

In [None]:
sns.boxplot(val_pred, val_labels)
plt.xlabel("Predicted Value", fontsize="medium")
plt.ylabel("Actual Value", fontsize="medium")
plt.title("Neural Net Performance")
plt.tight_layout()
plt.savefig("neural net box.png", bbox_inches="tight", dpi=400)

In [None]:
sns.violinplot(val_pred, val_labels)
plt.xlabel("Predicted Value", fontsize="medium")
plt.ylabel("Actual Value", fontsize="medium")
plt.title("Neural Net Performance")
plt.tight_layout()
plt.savefig("visualization/neural net violin.png", bbox_inches="tight", dpi=400)