### TFT PIPELINE

In [9]:
from sklearn.metrics import f1_score
from pytorch_lightning import Trainer
from datetime import datetime
import torch
import os
from pytorch_lightning.callbacks import ModelCheckpoint

import sys
sys.path.insert(0, '/Users/florianrunkel/Documents/02_Uni/04_Masterarbeit/masterthesis/')

from ml_pipe.data.database.mongodb import MongoDb
from ml_pipe.data.dataModule.dataModule import DataModule
from ml_pipe.models.tft.model import TFTModel

In [8]:
def run_pipeline():
    # Datenquelle initialisieren
    mongo = MongoDb()
    datamodule = DataModule(mongo, batch_size=16)
    datamodule.setup()

    # Modell initialisieren
    input_size = datamodule.train_data[0][0].shape[-1]
    print(input_size)
    model = TFTModel(input_size=input_size, hidden_size=32)


    checkpoint_cb = ModelCheckpoint(
        monitor="val_loss",  # oder ein anderer Metric-Name
        save_top_k=1,
        mode="min",
        filename="best-checkpoint"
    )

    # Trainer Setup
    trainer = Trainer(
        max_epochs=50,
        logger=False,
        enable_model_summary=True,
        log_every_n_steps=10,
        callbacks=[checkpoint_cb],
        enable_checkpointing=True
    )

    # Training
    trainer.fit(model, datamodule=datamodule)

    # Testdaten durchlaufen
    trainer.test(model, datamodule=datamodule)

    # Evaluation: F1 Score manuell berechnen
    all_preds = []
    all_targets = []

    model.eval()
    model.freeze()

    for x, y in datamodule.val_dataloader():
        x = x.to(model.device)
        y = y.to(model.device)

        with torch.no_grad():
            preds = model(x)
            preds = (preds > 0.5).float()

        all_preds.extend(preds.cpu().numpy())
        all_targets.extend(y.cpu().numpy())

    f1 = f1_score(all_targets, all_preds)
    print(f"F1 Score auf Validierungsdaten: {f1:.4f}")

    #Modell speichern
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    model_path = f"saved_models/tft_model_{timestamp}.pt"

    #Ordner anlegen (falls nicht vorhanden) und Modell speichern
    os.makedirs(os.path.dirname(model_path), exist_ok=True)
    torch.save(model.state_dict(), model_path)

    print(f"Modell gespeichert unter: {model_path}")
    
run_pipeline()

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


3



  | Name             | Type               | Params | Mode 
----------------------------------------------------------------
0 | input_projection | Linear             | 128    | train
1 | lstm             | LSTM               | 8.4 K  | train
2 | attn             | MultiheadAttention | 4.2 K  | train
3 | gate             | Sequential         | 1.1 K  | train
4 | output_layer     | Sequential         | 33     | train
5 | loss_fn          | BCELoss            | 0      | train
----------------------------------------------------------------
13.9 K    Trainable params
0         Non-trainable params
13.9 K    Total params
0.056     Total estimated model params size (MB)
11        Modules in train mode
0         Modules in eval mode


                                                                           

/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 37/37 [00:00<00:00, 105.29it/s, train_loss=0.329, train_acc=1.000, val_loss=0.571, val_acc=0.747]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 37/37 [00:00<00:00, 105.08it/s, train_loss=0.329, train_acc=1.000, val_loss=0.571, val_acc=0.747]


/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:424: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 16/16 [00:00<00:00, 306.85it/s]


F1 Score auf Validierungsdaten: 0.8287
Modell gespeichert unter: saved_models/tft_model_20250410_162734.pt


In [5]:
def predict(input_sequence, model_path="saved_models/tft_model_20250410_111130.pt", input_size=3, hidden_size=32):
    model = TFTModel(input_size=input_size, hidden_size=hidden_size)
    model.load_state_dict(torch.load(model_path))
    model.eval()

    input_tensor = torch.tensor(input_sequence, dtype=torch.float32).unsqueeze(0)  # [1, seq_len, features]
    with torch.no_grad():
        pred = model(input_tensor)
    return float(pred.item()), "wechselbereit" if pred.item() > 0.5 else "bleibt wahrscheinlich"

In [6]:
example_input = [[24, 2, 1], [36, 3, 1], [12, 3, 1]]  # z. B. [Dauer in Monaten, Level, Branche]
prob, status = predict(example_input)
print(f"Wechselwahrscheinlichkeit: {prob:.2f} → Einschätzung: {status}")

Wechselwahrscheinlichkeit: 0.71 → Einschätzung: wechselbereit


  model.load_state_dict(torch.load(model_path))
