In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sentence_transformers import SentenceTransformer

In [2]:
pip install torch

Collecting torch
  Downloading torch-2.2.0-cp310-none-macosx_11_0_arm64.whl.metadata (25 kB)
Collecting typing-extensions>=4.8.0 (from torch)
  Using cached typing_extensions-4.9.0-py3-none-any.whl.metadata (3.0 kB)
Collecting sympy (from torch)
  Using cached sympy-1.12-py3-none-any.whl.metadata (12 kB)
Collecting fsspec (from torch)
  Using cached fsspec-2024.2.0-py3-none-any.whl.metadata (6.8 kB)
Collecting mpmath>=0.19 (from sympy->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.2.0-cp310-none-macosx_11_0_arm64.whl (59.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.7/59.7 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached typing_extensions-4.9.0-py3-none-any.whl (32 kB)
Using cached fsspec-2024.2.0-py3-none-any.whl (170 kB)
Using cached sympy-1.12-py3-none-any.whl (5.7 MB)
Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)
Installing collected packages: mpmath, typing-extensions, symp

In [4]:
encoder = SentenceTransformer('all-mpnet-base-v2')

Downloading (…)".gitattributes";:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)ooling/config.json";:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)"README.md";:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)"config.json";:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)_transformers.json";:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)"data_config.json";:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)e_bert_config.json";:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)al_tokens_map.json";:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)"tokenizer.json";:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)enizer_config.json";:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)"train_script.py";:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)"vocab.txt";:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)"modules.json";:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [6]:
classes = ["clarify", "email", "link", "schedule", "unknown"]

In [10]:
# Walk through all files in a directory
import os
lines = []
for class_ in classes:
    # Read contents of file
    with open('data/action_classification/' + class_ + '.txt', 'r') as f:
        lines += f.readlines()

inputs = encoder.encode(lines)
inputs = torch.Tensor(inputs)
print(inputs.shape)

torch.Size([750, 768])


In [11]:
outputs = []
for class_ in classes:
    output_1he = torch.Tensor([0] * len(classes))
    output_1he[classes.index(class_)] = 1
    for i in range(150):
        outputs.append(output_1he)

outputs = torch.stack(outputs)
print(outputs.shape)

torch.Size([750, 5])


In [12]:
# Split data
from sklearn.model_selection import train_test_split
inputs_train, inputs_test, outputs_train, outputs_test = train_test_split(inputs, outputs, test_size=0.2, random_state=42)

In [13]:
class ActionClassifier(torch.nn.Module):
    def __init__(self):
        super(ActionClassifier, self).__init__()
        self.fc1 = nn.Linear(768, 384)
        self.dropout1 = nn.Dropout(0.2)
        self.fc2 = nn.Linear(384, 384)
        self.dropout2 = nn.Dropout(0.2)
        self.fc3 = nn.Linear(384, 384)
        self.dropout3 = nn.Dropout(0.2)
        self.fc4 = nn.Linear(384, 384)
        self.dropout4 = nn.Dropout(0.2)
        self.fc5 = nn.Linear(384, len(classes))
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = F.gelu(self.fc1(x))
        x = self.dropout1(x)
        x = F.gelu(self.fc2(x))
        x = self.dropout2(x)
        x = F.gelu(self.fc3(x))
        x = self.dropout3(x)
        x = F.gelu(self.fc4(x))
        x = self.dropout4(x)
        x = F.gelu(self.fc5(x))
        x = self.softmax(x)
        return x

model = ActionClassifier()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [None]:
def calculate_entropy(data):
    unique_elements, counts = np.unique(data, return_counts=True)
    probabilities = counts / len(data)
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy

In [8]:
best_test_loss = float('inf')
for epoch in range(5000):
    optimizer.zero_grad()
    preds_train = model(inputs_train)
    
    entropy = calculate_entropy(preds_train)
    if entropy > 0.6:
        
        
    train_loss = criterion(preds_train, outputs_train)
    train_loss.backward()
    train_acc = (preds_train.argmax(1) == outputs_train.argmax(1)).float().mean()
    optimizer.step()

    preds_test = model(inputs_test)
    
    test_loss = criterion(preds_test, outputs_test)
    test_acc = (preds_test.argmax(1) == outputs_test.argmax(1)).float().mean()
    if test_loss < best_test_loss:
        best_test_loss = test_loss
        torch.save(model.state_dict(), 'server/action_classifier.pt')

    if epoch % 100 == 0:
        print('Epoch {} train loss: {} train acc: {} test loss: {} best test loss: {} test acc: {}'.format(epoch, train_loss, train_acc, test_loss, best_test_loss, test_acc))

Epoch 0 train loss: 1.6092833280563354 train acc: 0.2150000035762787 test loss: 1.6100573539733887 best test loss: 1.6100573539733887 test acc: 0.14000000059604645
Epoch 100 train loss: 1.4280565977096558 train acc: 0.4449999928474426 test loss: 1.5340031385421753 best test loss: 1.5340031385421753 test acc: 0.27000001072883606
Epoch 200 train loss: 1.0811355113983154 train acc: 0.8450000286102295 test loss: 1.1846754550933838 best test loss: 1.1846754550933838 test acc: 0.75
Epoch 300 train loss: 0.9466697573661804 train acc: 0.9599999785423279 test loss: 1.0640063285827637 best test loss: 1.058962345123291 test acc: 0.8500000238418579
Epoch 400 train loss: 0.9405715465545654 train acc: 0.9649999737739563 test loss: 1.0655317306518555 best test loss: 1.0533347129821777 test acc: 0.8299999833106995
Epoch 500 train loss: 0.9352471232414246 train acc: 0.9700000286102295 test loss: 1.0620434284210205 best test loss: 1.0394728183746338 test acc: 0.8399999737739563
Epoch 600 train loss: 0.9

In [14]:
action_classifier = ActionClassifier()
action_classifier.load_state_dict(torch.load('server/action_classifier.pt'))
action_classifier.eval()

ActionClassifier(
  (fc1): Linear(in_features=768, out_features=384, bias=True)
  (dropout1): Dropout(p=0.2, inplace=False)
  (fc2): Linear(in_features=384, out_features=384, bias=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (fc3): Linear(in_features=384, out_features=384, bias=True)
  (dropout3): Dropout(p=0.2, inplace=False)
  (fc4): Linear(in_features=384, out_features=384, bias=True)
  (dropout4): Dropout(p=0.2, inplace=False)
  (fc5): Linear(in_features=384, out_features=5, bias=True)
  (softmax): Softmax(dim=1)
)

In [15]:
def infer(text):
    input_ = torch.Tensor(encoder.encode([text]))
    output = action_classifier(input_)
    print(output)
    return classes[output.argmax(1)]

In [16]:
infer('Let\'s all meet sometime to discuss sprint planning for this project. Who\'s available?')

tensor([[1.4797e-07, 2.1970e-04, 1.4797e-07, 9.9978e-01, 1.4797e-07]],
       grad_fn=<SoftmaxBackward0>)


'schedule'