In [4]:
import pandas as pd
import numpy as np


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import SGD

In [5]:
df = pd.read_csv('embedded_30_tweets.csv')
df.head()

Unnamed: 0,OriginalTweet,Sentiment,babbage_similarity
0,@MeNyrbie @Phil_Gahan @Chrisitv https://t.co/i...,Neutral,"[-0.006444347091019154, 0.013177386485040188, ..."
1,advice Talk to your neighbours family to excha...,Positive,"[-0.006633083801716566, 0.01087373960763216, -..."
2,Coronavirus Australia: Woolworths to give elde...,Positive,"[-0.005033580120652914, 0.01030619814991951, -..."
3,My food stock is not the only one which is emp...,Positive,"[-0.01348057109862566, 0.019621720537543297, 0..."
4,"Me, ready to go at supermarket during the #COV...",Extremely Negative,"[-0.026726385578513145, 0.0207790806889534, 0...."


In [6]:
df['babbage_similarity'] = df.babbage_similarity.apply(eval).apply(np.array)

In [8]:
df['babbage_similarity'][0].shape

(2048,)

In [183]:
df.Sentiment.value_counts()

Positive              10
Neutral                6
Extremely Positive     6
Extremely Negative     4
Negative               4
Name: Sentiment, dtype: int64

In [9]:
from sklearn.preprocessing import OrdinalEncoder

sentiment_column = df[["Sentiment"]]

encoder = OrdinalEncoder()
df['sentiment_encoded'] = encoder.fit_transform(sentiment_column)

In [182]:
encoder.categories_

[array(['Extremely Negative', 'Extremely Positive', 'Negative', 'Neutral',
        'Positive'], dtype=object)]

In [106]:
from sklearn.model_selection import train_test_split
 
X_train, X_test, y_train, y_test = train_test_split(
    df.babbage_similarity.values,
    df.sentiment_encoded.values,
    test_size = 0.2,
    random_state=42
)

In [109]:
X_train

array([array([ 0.0027842 ,  0.01915837,  0.01003171, ..., -0.01389959,
              -0.01993004, -0.02673217])                              ,
       array([-0.01107579,  0.01640144, -0.00014396, ...,  0.00975886,
               0.00374097, -0.01413418])                              ,
       array([-0.01232978,  0.01202253,  0.00701727, ...,  0.01036732,
              -0.04452201, -0.00075574])                              ,
       array([-0.00644435,  0.01317739, -0.01283282, ..., -0.00715676,
              -0.02316985, -0.01435078])                              ,
       array([-0.02672639,  0.02077908,  0.00284367, ..., -0.0013158 ,
              -0.00661774, -0.00029275])                              ,
       array([-0.00212527,  0.0302756 ,  0.01443285, ..., -0.01366298,
              -0.02641694, -0.0105742 ])                              ,
       array([-0.03036317,  0.00143428,  0.00986999, ...,  0.0138219 ,
              -0.00641696, -0.00299817])                              ,

In [146]:
class BasicNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.hid1 = torch.nn.Linear(2048, 10)  # 2048-(10-10)-5
        self.hid2 = torch.nn.Linear(10, 10)
        self.oupt = torch.nn.Linear(10, 5)

        torch.nn.init.xavier_uniform_(self.hid1.weight)
        torch.nn.init.zeros_(self.hid1.bias)
        torch.nn.init.xavier_uniform_(self.hid2.weight)
        torch.nn.init.zeros_(self.hid2.bias)
        torch.nn.init.xavier_uniform_(self.oupt.weight)
        torch.nn.init.zeros_(self.oupt.bias)


    def forward(self, input):

        z = torch.tanh(self.hid1(input))
        z = torch.tanh(self.hid2(z))
        z = self.oupt(z)  # no softmax: CrossEntropyLoss() 
        return z




In [147]:
import torch.optim as optim

model2 = BasicNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model2.parameters(), lr=0.001, )



In [148]:
for epoch in range(100):  # loop over the dataset multiple times

    running_loss = 0.0
    for i in range(len(X_train)):
        # get the inputs; data is a list of [inputs, labels]
        inputs = torch.tensor([X_train[i]], dtype=torch.float32)
  
        labels = torch.tensor([y_train[i]], dtype=torch.long)
   

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model2(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 10 == 9:    # print every 10 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 10))
            running_loss = 0.0

[1,    10] loss: 1.605
[1,    20] loss: 1.611
[2,    10] loss: 1.604
[2,    20] loss: 1.610
[3,    10] loss: 1.604
[3,    20] loss: 1.609
[4,    10] loss: 1.603
[4,    20] loss: 1.608
[5,    10] loss: 1.603
[5,    20] loss: 1.607
[6,    10] loss: 1.603
[6,    20] loss: 1.606
[7,    10] loss: 1.602
[7,    20] loss: 1.605
[8,    10] loss: 1.602
[8,    20] loss: 1.604
[9,    10] loss: 1.601
[9,    20] loss: 1.603
[10,    10] loss: 1.601
[10,    20] loss: 1.602
[11,    10] loss: 1.601
[11,    20] loss: 1.601
[12,    10] loss: 1.600
[12,    20] loss: 1.601
[13,    10] loss: 1.600
[13,    20] loss: 1.600
[14,    10] loss: 1.600
[14,    20] loss: 1.599
[15,    10] loss: 1.599
[15,    20] loss: 1.598
[16,    10] loss: 1.599
[16,    20] loss: 1.597
[17,    10] loss: 1.599
[17,    20] loss: 1.596
[18,    10] loss: 1.599
[18,    20] loss: 1.596
[19,    10] loss: 1.598
[19,    20] loss: 1.595
[20,    10] loss: 1.598
[20,    20] loss: 1.594
[21,    10] loss: 1.598
[21,    20] loss: 1.593
[22,    10

In [168]:
for i in range(len(X_test)):

    total_loss = 0

    inputs = torch.tensor([X_test[i]], dtype=torch.float32)
    labels = torch.tensor([y_test[i]], dtype=torch.long)
    outputs = model2(inputs)
    loss = criterion(outputs, labels)
    total_loss += loss.item()

print(total_loss)


1.825679063796997


In [175]:
#import aoc scikit
from sklearn.metrics import accuracy_score

#predict
y_pred = model2(torch.tensor([X_test], dtype=torch.float32))
y_pred

tensor([[[-0.2905, -0.0675, -0.2575, -0.0648,  0.3592],
         [-0.2923, -0.0241, -0.2300, -0.0353,  0.3714],
         [-0.2806, -0.0065, -0.2332, -0.0280,  0.3773],
         [-0.2910, -0.0391, -0.2541, -0.0433,  0.3636],
         [-0.3002, -0.0519, -0.2407, -0.0439,  0.3654],
         [-0.3257, -0.0832, -0.2585, -0.0571,  0.3607]]],
       grad_fn=<ViewBackward0>)

In [178]:
torch.argmax(y_pred, dim=2)

tensor([[4, 4, 4, 4, 4, 4]])

In [173]:
y_test

array([0., 4., 1., 3., 4., 2.])

In [181]:
accuracy = accuracy_score(y_test, torch.argmax(y_pred, dim=2)[0].detach().numpy())
print(accuracy)

0.3333333333333333
