In [54]:
import sys 
sys.path.append("/Users/maxmartyshov/Desktop/IU/year3/PMDL/Sentiment_Analysis_for_Financial_News/src")

In [55]:
from pipline_extract import extract_latest_loaders

dataloaders = extract_latest_loaders()
train_loader = dataloaders['train']
val_loader = dataloaders['validation']

Pipeline artifact [: ae9e60fe-78f1-4f14-becc-d3b5837abed6] loaded successfully


In [56]:
def get_input_example():
    batch = next(iter(train_loader))

    # Move the batch to CPU if needed (for logging purposes)
    for key in batch:
        batch[key] = batch[key].cpu()

    # Prepare the input example
    return {
        "input_ids": batch["input_ids"],
        "attention_mask": batch["attention_mask"],
        "has_source": batch["has_source"]
    }

In [57]:
import torch.nn as nn
import torch

from transformers import BertModel


class SentimentAnalysisModel(nn.Module):
    def __init__(self, bert_model_name='bert-base-uncased', num_labels=3):
        super(SentimentAnalysisModel, self).__init__()

        self.bert = BertModel.from_pretrained(bert_model_name)

        self.linear1 = nn.Linear(self.bert.config.hidden_size + 1, num_labels)

        self.dropout = nn.Dropout(0.3)

    def forward(self, input_ids, attention_mask, has_source):
        embeddings = self.bert(input_ids=input_ids, attention_mask=attention_mask).pooler_output
        has_source = has_source.unsqueeze(1) 
        combined_input = torch.cat((embeddings, has_source), dim=1)

        regularized = self.dropout(combined_input)
        logits = self.linear1(regularized)

        return logits


In [58]:
from tqdm import tqdm

import mlflow
import mlflow.pytorch

def train_one_epoch(model, dataloader, optimizer, criterion, device, epoch):
    model.train()
    train_loss = 0.0
    total = 0.

    loop = tqdm(
        enumerate(dataloader, 1),
        total=len(dataloader),
        desc=f"Epoch {epoch}: train",
        leave=True,
    )

    for _, batch in loop:
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        has_source = batch['has_source'].to(device)
        labels = batch['labels'].to(device)

        optimizer.zero_grad()

        logits = model(input_ids = input_ids, attention_mask=attention_mask, has_source=has_source)

        loss = criterion(logits, labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item() * input_ids.size(0)
        total += labels.size(0)

        loop.set_postfix({"loss": train_loss/total})

    avg_train_loss = train_loss / total
    mlflow.log_metric('train_loss', avg_train_loss, step=epoch)


def val_one_epoch(model, dataloader, criterion, device, epoch, best_so_far, ckpt_name='model'):
    model.eval()
    val_loss = 0.
    correct = 0.
    total = 0.
    with torch.no_grad():
        loop = tqdm(
            enumerate(dataloader, 1),
            total=len(dataloader),
            desc=f"Epoch {epoch}: val",
            leave=True,
        )
        for i, batch in loop:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            has_source = batch['has_source'].to(device)
            labels = batch['labels'].to(device)

            logits = model(input_ids=input_ids, attention_mask=attention_mask, has_source=has_source)

            loss = criterion(logits, labels)
            val_loss += loss.item() * input_ids.size(0)

            _, preds = torch.max(logits, dim=1)
            correct += (preds == labels).sum().item()

            total += labels.size(0)

            loop.set_postfix({"loss": val_loss/total, "acc": correct / total})
        current_acc = correct / total

        avg_val_loss = val_loss / total
        mlflow.log_metric('validation_loss', avg_val_loss, step=epoch)
        mlflow.log_metric('validation_accuracy', current_acc, step=epoch)


        if current_acc > best_so_far:
            print(f"Validation accuracy improved from {best_so_far:.4f} to {current_acc:.4f}. Saving model...")
            mlflow.pytorch.log_model(model, ckpt_name)

            best_so_far = current_acc
    return best_so_far



In [59]:
from mlflow.tracking import MlflowClient

def register_model(run_id, model_name, description):
    client = MlflowClient()
    model_uri = f"runs:/{run_id}/{model_name}"
    result = mlflow.register_model(model_uri, model_name)
    print(f"Model registered with name '{model_name}' and version '{result.version}'")
    client.update_model_version(
        name=model_name,
        version=result.version,
        description=description,
    )
    return result.version


In [60]:
import torch.optim as optim
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

epochs = 10
device = 'mps'
model_name = 'simple_sentiment_analysis_model'
lr = 2e-5

model_desctiption = "BERT, 1 fc layer, 0.3 dropout"

model = SentimentAnalysisModel(bert_model_name='bert-base-uncased', num_labels=3).to(device)
criterion = nn.CrossEntropyLoss()  
optimizer = optim.Adam(model.parameters(), lr=lr)

best_so_far = 0.
mlflow.set_experiment("SentimentAnalysis")

with mlflow.start_run():
    mlflow.log_param("learning_rate", 2e-5)
    mlflow.log_param("epochs", epochs)
    run = mlflow.active_run()
    run_id = run.info.run_id
    for epoch in range(epochs):
        train_one_epoch(model, train_loader, optimizer, criterion, device, epoch)
        best_so_far = val_one_epoch(model, val_loader, criterion, device, epoch, best_so_far, model_name)
    register_model(run_id, model_name, model_desctiption)

Epoch 0: train: 100%|██████████| 106/106 [00:36<00:00,  2.88it/s, loss=0.927]
Epoch 0: val: 100%|██████████| 52/52 [00:06<00:00,  8.61it/s, loss=0.609, acc=0.767]


Validation accuracy improved from 0.0000 to 0.7667. Saving model...


Epoch 1: train: 100%|██████████| 106/106 [00:39<00:00,  2.70it/s, loss=0.517]
Epoch 1: val: 100%|██████████| 52/52 [00:06<00:00,  8.27it/s, loss=0.563, acc=0.794]


Validation accuracy improved from 0.7667 to 0.7939. Saving model...


Epoch 2: train: 100%|██████████| 106/106 [00:40<00:00,  2.60it/s, loss=0.312]
Epoch 2: val: 100%|██████████| 52/52 [00:06<00:00,  8.29it/s, loss=0.471, acc=0.833]


Validation accuracy improved from 0.7939 to 0.8330. Saving model...


Epoch 3: train: 100%|██████████| 106/106 [00:39<00:00,  2.70it/s, loss=0.161]
Epoch 3: val: 100%|██████████| 52/52 [00:06<00:00,  7.84it/s, loss=0.562, acc=0.84] 


Validation accuracy improved from 0.8330 to 0.8403. Saving model...


Epoch 4: train: 100%|██████████| 106/106 [00:44<00:00,  2.36it/s, loss=0.101] 
Epoch 4: val: 100%|██████████| 52/52 [00:08<00:00,  6.43it/s, loss=0.612, acc=0.818]
Epoch 5: train: 100%|██████████| 106/106 [00:56<00:00,  1.89it/s, loss=0.065] 
Epoch 5: val: 100%|██████████| 52/52 [00:10<00:00,  4.99it/s, loss=0.676, acc=0.835]
Epoch 6: train: 100%|██████████| 106/106 [00:59<00:00,  1.78it/s, loss=0.0383]
Epoch 6: val: 100%|██████████| 52/52 [00:08<00:00,  5.93it/s, loss=0.707, acc=0.835]
Epoch 7: train: 100%|██████████| 106/106 [00:50<00:00,  2.11it/s, loss=0.0351]
Epoch 7: val: 100%|██████████| 52/52 [00:07<00:00,  6.56it/s, loss=0.8, acc=0.815]  
Epoch 8: train: 100%|██████████| 106/106 [00:48<00:00,  2.20it/s, loss=0.0338]
Epoch 8: val: 100%|██████████| 52/52 [00:07<00:00,  6.79it/s, loss=0.756, acc=0.831]
Epoch 9: train: 100%|██████████| 106/106 [00:46<00:00,  2.28it/s, loss=0.0295]
Epoch 9: val: 100%|██████████| 52/52 [00:07<00:00,  6.88it/s, loss=0.765, acc=0.832]

Model registered with name 'simple_sentiment_analysis_model' and version '1'



Successfully registered model 'simple_sentiment_analysis_model'.
Created version '1' of model 'simple_sentiment_analysis_model'.


In [63]:
model = mlflow.pytorch.load_model(model_uri=f"models:/{model_name}/latest")

In [65]:
print(model)

SentimentAnalysisModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, el