In [7]:
import torch
import torch.nn as nn
from transformers import AutoTokenizer


# ==========================================
# POSitional Encoding
# ==========================================
class PositionalEncoding(nn.Module):
    def __init__(self, sequence_length, embedding_dim):
        super().__init__()
        position_matrix = torch.zeros((sequence_length, embedding_dim))

        rows = torch.arange(0, sequence_length).reshape(-1, 1)
        even_columns = torch.arange(0, embedding_dim, 2)
        denominator = torch.pow(10000, even_columns.float() / embedding_dim)
        entries = rows / denominator

        position_matrix[:, ::2] = torch.sin(entries)
        position_matrix[:, 1::2] = torch.cos(entries)

        self.register_buffer("position_matrix", position_matrix)

    def forward(self, word_embedding):
        # word_embedding: [batch, seq, embed_dim]
        seq_len = word_embedding.size(1)
        return word_embedding + self.position_matrix[:seq_len, :]


# ==========================================
# TEXT CLASSIFIER
# ==========================================
class TextClassifier(nn.Module):
    def __init__(self, vocab_size, embedding_dim, sequence_length,
                 nhead=8, ff_dim=2048, num_layers=6):
        super().__init__()

        self.embedding_layer = nn.Embedding(
            num_embeddings=vocab_size,
            embedding_dim=embedding_dim
        )

        self.positional_encoding = PositionalEncoding(
            sequence_length=sequence_length,
            embedding_dim=embedding_dim
        )

        self.encoder_layer = nn.TransformerEncoderLayer(
            d_model=embedding_dim,
            nhead=nhead,
            dim_feedforward=ff_dim,
            batch_first=True,
        )

        # encoder_stack built from encoder_layer (same as training)
        self.encoder_stack = nn.TransformerEncoder(
            self.encoder_layer,
            num_layers=num_layers
        )

        self.classifier = nn.Linear(
            in_features=embedding_dim,
            out_features=2
        )

    def forward(self, x, padding_mask):
        # x: [batch, seq]
        embeddings = self.embedding_layer(x)
        embeddings_plus_position = self.positional_encoding(embeddings)

        # padding_mask: True = real token, False = pad
        src_key_padding_mask = torch.logical_not(padding_mask)

        encoding = self.encoder_stack(
            embeddings_plus_position,
            src_key_padding_mask=src_key_padding_mask
        )

        cls_token = encoding[:, 0, :]  # [batch, embed_dim]
        output = self.classifier(cls_token)
        return output


# ==========================================
# SENTIMENT PREDICTOR
# ==========================================
class SentimentTransformer:
    def __init__(self, model_path="custom_encoder_portable_model.pth"):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Loading model on: {self.device}")

        checkpoint = torch.load(model_path, map_location=self.device)
        cfg = checkpoint["config"]

        self.config = cfg
        self.classes = cfg["classes"]
        self.max_len = cfg["max_len"]

        # tokenizer
        self.tokenizer = AutoTokenizer.from_pretrained(cfg["tokenizer_name"])

        # rebuild model EXACTLY like during training
        self.model = TextClassifier(
            vocab_size=cfg["vocab_size"],
            embedding_dim=cfg["embedding_dim"],
            sequence_length=cfg["max_len"],
            nhead=cfg.get("num_heads", 8),
            ff_dim=cfg.get("ff_dim", 2048),
            num_layers=cfg.get("num_layers", 6),
        ).to(self.device)

        # load weights
        state_dict = {
            k.replace("_orig_mod.", ""): v
            for k, v in checkpoint["model_state_dict"].items()
        }
        self.model.load_state_dict(state_dict)  # strict=True by default

        self.model.eval()
        print("Model loaded successfully.")

    def preprocess(self, text: str):
        encoded = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt",
        )
        return {k: v.to(self.device) for k, v in encoded.items()}

    def predict(self, text: str):
        batch = self.preprocess(text)

        with torch.no_grad():
            logits = self.model(
                batch["input_ids"],
                batch["attention_mask"].bool(),
            )
            probs = torch.softmax(logits, dim=1)
            prediction_idx = torch.argmax(probs, dim=1).item()

        label = self.classes[prediction_idx]
        confidence = probs[0][prediction_idx].item()

        print(f"""
                Prediction:
                    text:       {text}
                    label:      {label}
                    confidence: {confidence:.2%}
                    probs:      {probs[0].cpu().numpy()}
                """)


In [8]:
model = SentimentTransformer()

Loading model on: cpu
Model loaded successfully.


In [10]:
model.predict("This is not a very good car at all")


                Prediction:
                    text:       This is not a very good car at all
                    label:      Negative
                    confidence: 99.95%
                    probs:      [9.994585e-01 5.415360e-04]
                


  output = torch._nested_tensor_from_mask(
