In [1]:
import torch
from torch import nn
from text_preproc import TextPreproc

class MLPNet(nn.Module):

    def __init__(self, vec_len):
        super().__init__()
        #self.flatten = nn.Flatten
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(in_features=vec_len, out_features=vec_len//10, bias=True),
            nn.ReLU(),
            nn.Linear(in_features=vec_len//10, out_features=vec_len//100, bias=True),
            nn.ReLU(),
            nn.Linear(in_features=vec_len//100, out_features=2, bias=True)
        )

    def forward(self, x):
        #x = self.flatten(x)
        return self.linear_relu_stack(x)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
preproc_model = TextPreproc(rebalance=True)
net_model = MLPNet(preproc_model.get_vector_len()).to(device)
print(net_model)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net_model.parameters(), lr=1e-3)

MLPNet(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=7370, out_features=737, bias=True)
    (1): ReLU()
    (2): Linear(in_features=737, out_features=73, bias=True)
    (3): ReLU()
    (4): Linear(in_features=73, out_features=2, bias=True)
  )
)


In [2]:
from torch.utils.data import DataLoader, Dataset

class VecLoader(Dataset):

    def __init__(self, preproc_model, is_test=False):
        super().__init__()
        train, test = preproc_model.get_train_test_preprocd()

        if not is_test:
            x = train[train.columns[:-1]].values
            y = train[train.columns[-1]].values
        else:
            x = test[train.columns[:-1]].values
            y = test[train.columns[-1]].values

        self.x_train = torch.tensor(x).to(torch.float32)
        self.y_train = torch.tensor(y).to(torch.long)

    def __len__(self):
        return len(self.y_train)

    def __getitem__(self, idx):
        return self.x_train[idx], self.y_train[idx]


train_loader = DataLoader(VecLoader(preproc_model), batch_size=1,shuffle=False)
for i, (data, labels) in enumerate(train_loader):
    print(data.shape, labels.shape)
    print(data, labels)
    break

torch.Size([1, 7370]) torch.Size([1])
tensor([[0., 0., 0.,  ..., 0., 0., 0.]]) tensor([0])


In [3]:
def train(data_loader, model, loss_fn, optimizer):

    size = len(data_loader.dataset)
    model.train()

    for batch, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device)

        # loss between forward and real vals
        pred = model(X)
        loss = loss_fn(pred, y)

        # backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 1000 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):

    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    if correct > 0.95:
        torch.save(net_model.state_dict(), "MLPNet.pth")
        print("Saved PyTorch Model State to MLPNet.pth")

epochs = 10
test_loader = DataLoader(VecLoader(preproc_model, is_test=True), batch_size=10, shuffle=False)
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, net_model, loss_fn, optimizer)
    test(test_loader, net_model, loss_fn)

print("Done!")

Epoch 1
-------------------------------
loss: 0.656216  [    0/ 7445]
loss: 0.658321  [ 1000/ 7445]
loss: 0.649914  [ 2000/ 7445]
loss: 0.646899  [ 3000/ 7445]
loss: 0.648837  [ 4000/ 7445]
loss: 0.759127  [ 5000/ 7445]
loss: 0.729092  [ 6000/ 7445]
loss: 0.730814  [ 7000/ 7445]
Test Error: 
 Accuracy: 52.1%, Avg loss: 0.691794 

Epoch 2
-------------------------------
loss: 0.663754  [    0/ 7445]
loss: 0.661707  [ 1000/ 7445]
loss: 0.650484  [ 2000/ 7445]
loss: 0.646793  [ 3000/ 7445]
loss: 0.648759  [ 4000/ 7445]
loss: 0.758124  [ 5000/ 7445]
loss: 0.727356  [ 6000/ 7445]
loss: 0.728802  [ 7000/ 7445]
Test Error: 
 Accuracy: 52.1%, Avg loss: 0.690681 

Epoch 3
-------------------------------
loss: 0.663808  [    0/ 7445]
loss: 0.661175  [ 1000/ 7445]
loss: 0.649290  [ 2000/ 7445]
loss: 0.645298  [ 3000/ 7445]
loss: 0.647583  [ 4000/ 7445]
loss: 0.757681  [ 5000/ 7445]
loss: 0.725689  [ 6000/ 7445]
loss: 0.726263  [ 7000/ 7445]
Test Error: 
 Accuracy: 52.1%, Avg loss: 0.688753 

Epoc

In [9]:
import random

_, test = preproc_model.get_train_test_preprocd()
for i in range(5):
    n = random.randint(0, len(test))
    x = torch.Tensor([test.iloc[n][test.columns[:-1]]]).to(torch.float32).to(device)
    print(net_model(x).argmax(), test.iloc[n][test.columns[-1]])

tensor(0, device='cuda:0') 0.0
tensor(1, device='cuda:0') 0.0
tensor(1, device='cuda:0') 1.0
tensor(0, device='cuda:0') 0.0
tensor(1, device='cuda:0') 1.0


In [13]:
letter_arr = ["Hi, how are you feeling? You haven't written for a long time, so I thought something might have happened.",
              'Only today! buy one king-size pizza, get one cola for free! Hurry up!',
              'love you sweetie! ;)',
              "hey, do you want to get rich? do you want to afford everything you've been dreaming about for a long time? Buy my book and I'll tell you how to become rich!",
              'bae i cannot wait anymore. I want you now!']

for letter in letter_arr:
    print(net_model(torch.Tensor(preproc_model.preproc_letter(letter)).to(torch.float32).to(device)).argmax())

tensor(0, device='cuda:0')
tensor(0, device='cuda:0')
tensor(0, device='cuda:0')
tensor(0, device='cuda:0')
tensor(0, device='cuda:0')
