In [1]:
import sys
from pathlib import Path

# Add parent directory to Python path
parent_dir = Path().resolve().parent
sys.path.insert(0, str(parent_dir))

from model.model_loader import ModelLoader
from data.data_loader import DataLoader
import torch

## Load Model
Initialize the model loader and load the pre-trained model.

In [2]:
# Load the model
model_loader = ModelLoader()
model,tokenizer = model_loader.load_model_and_tokenizer(freeze_strategy='partial-2', use_lora=False)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

print(f"Model loaded successfully on {device}")
print(f"Model type: {type(model)}")

Loaded config from C:\Users\Besitzer\OneDrive\Dokumente\CBS_Copenhagen\Semester\WS2025\AdvNLP\Final Exam\AVDNLP_final_project\configs\config.yaml
Transformer frozen - only training classification heads
Unfroze top 2 layers (out of 6)
Model loaded successfully on cpu
Model type: <class 'model.model_loader.MultiTaskClassifier'>


In [3]:
# Summary of trainable vs frozen parameters
total, trainable = model_loader.count_parameters(model)
print(f"\nTotal parameters: {total:,}")
print(f"Trainable parameters: {trainable:,}")
print(f"Frozen parameters: {total - trainable:,}")
print(f"Trainable percentage: {100 * trainable / total:.2f}%")


Total parameters: 66,367,494
Trainable parameters: 14,180,358
Frozen parameters: 52,187,136
Trainable percentage: 21.37%


In [4]:
print(model)

MultiTaskClassifier(
  (transformer): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)
            (li

In [5]:
# prepare data for a small test
data_loader = DataLoader()
sample_data = data_loader.load_sample_data()

Loaded config from C:\Users\Besitzer\OneDrive\Dokumente\CBS_Copenhagen\Semester\WS2025\AdvNLP\Final Exam\AVDNLP_final_project\configs\config.yaml
Loaded: C:\Users\Besitzer\OneDrive\Dokumente\CBS_Copenhagen\Semester\WS2025\AdvNLP\Final Exam\AVDNLP_final_project\data\processed_data.csv


In [7]:
sample_data.head()

Unnamed: 0,news,price_direction_up,price_direction_constant,price_direction_down,asset_comparision,past_information,future_information
0,gold futures climb; metals stocks narrowly higher,1,0,0,0,1,0
1,gold futures hit fresh high in electronic trading,1,0,0,0,1,0
2,gold prices up as stock market falters,1,0,0,0,1,0
3,Gold turns slightly positive after consumer-pr...,1,0,0,0,1,0
4,Gold prices end modestly higher after three se...,1,0,0,0,1,0


In [None]:
# Prepare a single batch for testing
texts = sample_data['news'].tolist()[:3]  # Take 3 examples

# Prepare labels - combine direction columns into single labels
direction_labels = []
for i in range(3):
    if sample_data['price_direction_up'].tolist()[i] == 1:
        direction_labels.append(1)  # Up
    elif sample_data['price_direction_down'].tolist()[i] == 1:
        direction_labels.append(0)  # Down
    else:
        direction_labels.append(2)  # Constant (if you have 3 classes)

labels = {
    'direction': direction_labels,
    'comparison': sample_data['asset_comparision'].tolist()[:3],
    'future_info': sample_data['future_information'].tolist()[:3]
}

# Tokenize
inputs = tokenizer(
    texts,
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors='pt'
)

# Move to device
input_ids = inputs['input_ids'].to(device)
attention_mask = inputs['attention_mask'].to(device)

# Test inference
with torch.no_grad():
    outputs = model(input_ids, attention_mask)
    
    # Get predicted classes (argmax of logits)
    pred_direction = torch.argmax(outputs['direction'], dim=1)
    pred_comparison = torch.argmax(outputs['comparison'], dim=1)
    pred_future = torch.argmax(outputs['future_info'], dim=1)

print("\nSample predictions (logits):")
print(f"Direction logits: {outputs['direction']}")
print(f"Comparison logits: {outputs['comparison']}")
print(f"Future info logits: {outputs['future_info']}")

print("\nPredicted classes:")
print(f"Direction: {pred_direction.tolist()}")
print(f"Comparison: {pred_comparison.tolist()}")
print(f"Future info: {pred_future.tolist()}")

print("\nTrue labels:")
print(f"Direction: {labels['direction']}")
print(f"Comparison: {labels['comparison']}")
print(f"Future info: {labels['future_info']}")

print("\nAccuracy:")
print(f"Direction: {(pred_direction.cpu() == torch.tensor(labels['direction'])).sum().item()}/{len(labels['direction'])}")
print(f"Comparison: {(pred_comparison.cpu() == torch.tensor(labels['comparison'])).sum().item()}/{len(labels['comparison'])}")
print(f"Future info: {(pred_future.cpu() == torch.tensor(labels['future_info'])).sum().item()}/{len(labels['future_info'])}")


Sample predictions:
Direction: tensor([[ 0.2023,  0.1824],
        [ 0.0477, -0.0041],
        [ 0.1233, -0.0630]])
Comparison: tensor([[ 0.0855, -0.0068],
        [ 0.0087, -0.0705],
        [ 0.1297, -0.0631]])
Future info: tensor([[0.1634, 0.0982],
        [0.0937, 0.0280],
        [0.2265, 0.0471]])

True labels:


KeyError: 'direction'