In [1]:
import torch

In [26]:
import json

with open("corpus.json", 'r') as f:
    to_read = json.load(f)
    key_words = to_read['key_words']
print(len(key_words))

770


In [2]:
771 == 3 * 257

True

In [3]:
batch_size = 16
input_shape = (batch_size, 771)
input_layer = torch.rand(size=input_shape)

In [4]:
from torch.nn import AvgPool1d

avg_pool = AvgPool1d(kernel_size=3, stride=2)(input_layer)

In [15]:
avg_pool.shape

torch.Size([16, 385])

In [5]:
from torch.nn import MaxPool1d

max_pool = MaxPool1d(kernel_size=2, stride=2)(avg_pool)

In [20]:
max_pool.shape

torch.Size([16, 192])

In [22]:
avg_pool_2 = AvgPool1d(kernel_size=3, stride=3)(max_pool)
avg_pool_2.shape

torch.Size([16, 64])

In [3]:
import torch

torch.cuda.is_available()

True

In [11]:
from torch import nn

class BlogClassifier(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.base_blog_classifier = nn.Sequential(
            nn.AvgPool1d(kernel_size=3, stride=2),
            nn.MaxPool1d(kernel_size=2, stride=2),
            nn.AvgPool1d(kernel_size=3, stride=3),  # (batch_size, 64)
            nn.Linear(64, 16),
            nn.ReLU(),
            nn.Linear(16, 4),
            nn.ReLU(),
            nn.Linear(4, 2),
            nn.Softmax(dim=1),
        )
    
    def forward(self, x):
        return self.base_blog_classifier(x)

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [13]:
model = BlogClassifier().to(device)
print(model)

BlogClassifier(
  (base_blog_classifier): Sequential(
    (0): AvgPool1d(kernel_size=(3,), stride=(2,), padding=(0,))
    (1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (2): AvgPool1d(kernel_size=(3,), stride=(3,), padding=(0,))
    (3): Linear(in_features=64, out_features=16, bias=True)
    (4): ReLU()
    (5): Linear(in_features=16, out_features=4, bias=True)
    (6): ReLU()
    (7): Linear(in_features=4, out_features=2, bias=True)
    (8): Softmax(dim=1)
  )
)


In [18]:
input_layer = input_layer.to(device)
prediction: torch.Tensor = model(input_layer)

In [25]:
import numpy as np

np.max(prediction.cpu().detach().numpy(), axis=1)


array([0.5241101 , 0.5238142 , 0.5153617 , 0.51904035, 0.52541095,
       0.5220236 , 0.52196616, 0.5267083 , 0.5225529 , 0.5224907 ,
       0.5197014 , 0.52261776, 0.52475965, 0.5184817 , 0.52251923,
       0.52464783], dtype=float32)

In [None]:
# Node_1 -> float -> probability
# Node_2 -> float -> probability

In [33]:
batch_size = 16

# Here we prepare training data
# I am lazy

In [34]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.004)  # SGD with momentum
# We can change this to Adam (better option) but we want just easy project

In [35]:
def train(model: nn.Module, dataloader, device,
          loss_fn, optimizer: torch.optim.Optimizer,
          verbose: bool=False,
          n_epochs: int=50):
    model.train()
    for epoch in range(1, n_epochs + 1):
        for batch, (X, y) in enumerate(dataloader, start=1):
            X, y = X.to(device), y.to(device)

            prediction = model(X)
            class_probabilities = torch.argmax(prediction, dim=1)
            loss = loss_fn(class_probabilities, y)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            if verbose and batch % 10 == 0:
                print(f"Loss {loss.item()} on epoch {epoch}")