In [1]:
from pathlib import Path
from scipy.io import loadmat
import sys
import os


dataset_path = Path('data') / 'data.mat'
if not dataset_path.exists():
    alt = Path.cwd().parent / 'data' / 'data.mat'
    if alt.exists():
        dataset_path = alt
    else:
        raise FileNotFoundError(f"data.mat not found under {Path.cwd()} or its parent")

notebook_path = os.getcwd() 
print (f"Current notebook path: {notebook_path}")
project_root = os.path.dirname(notebook_path)
if project_root not in sys.path:
    sys.path.insert(0, project_root)
print (f"Added {project_root} to sys.path")

mat_data = loadmat(dataset_path)
print(mat_data.keys())

Current notebook path: /home/luky/skola/KalmanNet-for-state-estimation/TAN
Added /home/luky/skola/KalmanNet-for-state-estimation to sys.path
dict_keys(['__header__', '__version__', '__globals__', 'hB', 'souradniceGNSS', 'souradniceX', 'souradniceY', 'souradniceZ'])


In [2]:
import torch
import matplotlib.pyplot as plt
from utils import trainer
from utils import utils
from Systems import DynamicSystem
import Filters
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import numpy as np
from scipy.io import loadmat
from scipy.interpolate import RegularGridInterpolator
import random

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
# --------------------

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Používané zařízení: {device}")

import torch.autograd
# torch.autograd.set_detect_anomaly(True)

Používané zařízení: cuda


# 4D model


In [3]:
import torch
from math import pi
from Systems import DynamicSystem

state_dim = 6 
obs_dim = 2  
dt = 0.02      
alpha_Q = 1e-3 
alpha_P = 1e-6 

F_base = torch.tensor([[1, 0, dt, 0],
                       [0, 1, 0, dt],
                       [0, 0, 1, 0],
                       [0, 0, 0, 1]], dtype=torch.float)
B_base = torch.tensor([[0, 0],
                       [0, 0],
                       [1, 0],
                       [0, 1]], dtype=torch.float) 

F_aug = torch.zeros(6, 6, dtype=torch.float)
F_aug[0:4, 0:4] = F_base
F_aug[0:4, 4:6] = B_base
F_aug[4:6, 4:6] = torch.eye(2)


Q_base = torch.eye(4, dtype=torch.float) * 1e-6 
Q_u = torch.eye(2, dtype=torch.float) * alpha_Q
Q_aug = torch.block_diag(Q_base, Q_u)

R_val = torch.tensor([[4e-4 * (pi/180)**2, 0],  
                      [0, 1e-4]], dtype=torch.float) 

x_0_base = torch.tensor([0, 0, 0, 0], dtype=torch.float)
x_0_u = torch.zeros(2, dtype=torch.float) 
x_0_aug = torch.cat([x_0_base, x_0_u])

P_0_base = torch.eye(4, dtype=torch.float) 
P_0_u = torch.eye(2, dtype=torch.float) * alpha_P
P_0_aug = torch.block_diag(P_0_base, P_0_u)

def f_linear_augmented(x):

    return (F_aug.to(x.device) @ x.unsqueeze(-1)).squeeze(-1)

def h_polar(x):

    px = x[:, 0]
    py = x[:, 1]
    
    eps = 1e-6
    azimuth = torch.atan2(py, px+eps)
    
    range_val = torch.sqrt(px**2 + py**2)
    # Vrácení jako [B, 2]
    return torch.stack([azimuth, range_val], dim=1)

def h_linear(x):
    # x je [B, 6]
    # H matice [2, 6] = [[1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0]]
    return x[:, 0:2]

obs_dim = 2

system_model = DynamicSystem(
    state_dim=state_dim,
    obs_dim=obs_dim,
    Q=Q_aug.float(),
    R=R_val.float(),
    Ex0=x_0_aug.float(),
    P0=P_0_aug.float(),
    f=f_linear_augmented,
    h=h_polar,
    device=device
)


In [4]:
import torch
from torch.utils.data import TensorDataset, DataLoader
from copy import deepcopy 
import numpy as np
import random 
from utils import trainer 
from state_NN_models import StateKalmanNet_v2 
from Systems import DynamicSystem

def generate_data(system_model, num_trajectories, seq_len, base_dist, device):
    state_dim = system_model.state_dim
    obs_dim = system_model.obs_dim
    x_data = torch.zeros(num_trajectories, seq_len, state_dim, device=device)
    y_data = torch.zeros(num_trajectories, seq_len, obs_dim, device=device)
    
    x_k = base_dist.sample((num_trajectories,))
    
    y_k = system_model.measure(x_k)
    if torch.any(torch.isnan(y_k)):
        print("VAROVÁNÍ: Detekován NaN v y_k při generování dat (krok 0)!")
        
    x_data[:, 0, :] = x_k
    y_data[:, 0, :] = y_k
    
    with torch.no_grad(): 
        for t in range(1, seq_len):
            x_k = system_model.step(x_k) 
            y_k = system_model.measure(x_k)
            
            if torch.any(torch.isnan(y_k)):
                print(f"VAROVÁNÍ: Detekován NaN v y_k při generování dat (krok {t})!")
            
            x_data[:, t, :] = x_k
            y_data[:, t, :] = y_k
            
    return x_data, y_data

TRAIN_SEQ_LEN = 100       # D (Délka segmentu pro trénink)
VALID_SEQ_LEN = 200      # D (Délka segmentu pro validaci)
NUM_TRAIN_SETS = 40    # Počet různých startovních pozic 
TRAJ_PER_SET_TRAIN = 10  # Kolik trajektorií na jednu pozici
NUM_VALID_SETS = 15
TRAJ_PER_SET_VALID = 5
BATCH_SIZE = 256        

TEST_SEQ_LEN = 800
NUM_TEST_SETS = 5
TRAJ_PER_SET_TEST = 2

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Používané zařízení: {device}")

original_system_model = system_model 
default_P0 = original_system_model.P0.clone()
default_Ex0 = original_system_model.Ex0.clone()
state_dim = original_system_model.state_dim
obs_dim = original_system_model.obs_dim
print(f"Benchmark model načten: State Dim={state_dim}, Obs Dim={obs_dim}")

POS_MEAN_RANGE_X_MIN = 500.0  # Minimální startovní pozice X
POS_MEAN_RANGE_X_MAX = 5000.0 # Maximální startovní pozice X
POS_MEAN_RANGE_Y = 5000.0     # Rozsah Y +/-
VEL_MEAN_RANGE = 20.0         # Rozsah rychlosti +/-

print(f"Generuji trajektorie v 'bezpečné zóně': x > {POS_MEAN_RANGE_X_MIN}")

# --- Generování trénovacích dat ---
print("Generuji trénovací data...")
all_x_train = []
all_y_train = []

for i in range(NUM_TRAIN_SETS):
    # Vytvoříme náhodný posun POUZE v bezpečné zóně
    random_pos_offset_x = (torch.rand((), device=device) * (POS_MEAN_RANGE_X_MAX - POS_MEAN_RANGE_X_MIN)) + POS_MEAN_RANGE_X_MIN
    random_pos_offset_y = (torch.rand((), device=device) * 2 - 1) * POS_MEAN_RANGE_Y
    random_vel_offset = (torch.rand(2, device=device) * 2 - 1) * VEL_MEAN_RANGE
    
    current_Ex0 = default_Ex0.clone()
    current_Ex0[0] += random_pos_offset_x
    current_Ex0[1] += random_pos_offset_y
    current_Ex0[2:4] += random_vel_offset
    
    current_base_dist = torch.distributions.MultivariateNormal(current_Ex0, default_P0)

    x_batch, y_batch = generate_data(
        original_system_model,
        num_trajectories=TRAJ_PER_SET_TRAIN,
        seq_len=TRAIN_SEQ_LEN,
        base_dist=current_base_dist,
        device=device
    )
    all_x_train.append(x_batch)
    all_y_train.append(y_batch)

x_train = torch.cat(all_x_train, dim=0)
y_train = torch.cat(all_y_train, dim=0)
print(f"Finální trénovací data: x={x_train.shape}, y={y_train.shape}")

# --- Generování validačních dat ---
print("Generuji validační data...")
all_x_val = []
all_y_val = []
for i in range(NUM_VALID_SETS):
    # Opět v bezpečné zóně
    random_pos_offset_x = (torch.rand((), device=device) * (POS_MEAN_RANGE_X_MAX - POS_MEAN_RANGE_X_MIN)) + POS_MEAN_RANGE_X_MIN
    random_pos_offset_y = (torch.rand((), device=device) * 2 - 1) * POS_MEAN_RANGE_Y
    random_vel_offset = (torch.rand(2, device=device) * 2 - 1) * VEL_MEAN_RANGE
    
    current_Ex0 = default_Ex0.clone()
    current_Ex0[0] += random_pos_offset_x
    current_Ex0[1] += random_pos_offset_y
    current_Ex0[2:4] += random_vel_offset
    
    current_base_dist = torch.distributions.MultivariateNormal(current_Ex0, default_P0)

    x_batch, y_batch = generate_data(
        original_system_model,
        num_trajectories=TRAJ_PER_SET_VALID,
        seq_len=VALID_SEQ_LEN,
        base_dist=current_base_dist,
        device=device
    )
    all_x_val.append(x_batch)
    all_y_val.append(y_batch)

x_val = torch.cat(all_x_val, dim=0)
y_val = torch.cat(all_y_val, dim=0)
print(f"Finální validační data: x={x_val.shape}, y={y_val.shape}")


x_train_flat = x_train.view(-1, state_dim)
x_mean = x_train_flat.mean(dim=0).to(device)
x_std = x_train_flat.std(dim=0).to(device)
x_std[x_std == 0] = 1.0 
print(f"  Vypočtený průměr stavů (x_mean): {x_mean.cpu().numpy()}")
print(f"  Vypočtená odchylka stavů (x_std): {x_std.cpu().numpy()}")

y_train_flat = y_train.view(-1, obs_dim)
y_mean = y_train_flat.mean(dim=0).to(device)
y_std = y_train_flat.std(dim=0).to(device)
y_std[y_std == 0] = 1.0 
print(f"  Vypočtený průměr měření (y_mean): {y_mean.cpu().numpy()}")
print(f"  Vypočtená odchylka měření (y_std): {y_std.cpu().numpy()}")



print("Generuji testovací data...")
all_x_test = []
all_y_test = []
for i in range(NUM_TEST_SETS):
    random_pos_offset_x = (torch.rand((), device=device) * (POS_MEAN_RANGE_X_MAX - POS_MEAN_RANGE_X_MIN)) + POS_MEAN_RANGE_X_MIN
    random_pos_offset_y = (torch.rand((), device=device) * 2 - 1) * POS_MEAN_RANGE_Y
    random_vel_offset = (torch.rand(2, device=device) * 2 - 1) * VEL_MEAN_RANGE
    
    current_Ex0 = default_Ex0.clone()
    current_Ex0[0] += random_pos_offset_x
    current_Ex0[1] += random_pos_offset_y
    current_Ex0[2:4] += random_vel_offset
    
    current_base_dist = torch.distributions.MultivariateNormal(current_Ex0, default_P0)

    x_batch, y_batch = generate_data(
        original_system_model,
        num_trajectories=TRAJ_PER_SET_TEST,
        seq_len=TEST_SEQ_LEN,
        base_dist=current_base_dist,
        device=device
    )
    all_x_test.append(x_batch)
    all_y_test.append(y_batch)

x_test = torch.cat(all_x_test, dim=0)
y_test = torch.cat(all_y_test, dim=0)
print(f"Finální testovací data: x={x_test.shape}, y={y_test.shape}")

train_dataset = TensorDataset(x_train, y_train)
val_dataset = TensorDataset(x_val, y_val)
test_dataset = TensorDataset(x_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)
print("\nDataLoadery jsou připraveny pro trénink.")

Používané zařízení: cuda
Benchmark model načten: State Dim=6, Obs Dim=2
Generuji trajektorie v 'bezpečné zóně': x > 500.0
Generuji trénovací data...
Finální trénovací data: x=torch.Size([400, 100, 6]), y=torch.Size([400, 100, 2])
Generuji validační data...
Finální validační data: x=torch.Size([75, 200, 6]), y=torch.Size([75, 200, 2])
  Vypočtený průměr stavů (x_mean): [ 2.8736792e+03  2.0110551e+02  2.1186538e+00 -5.0605571e-01
 -8.5354422e-04  9.0008685e-03]
  Vypočtená odchylka stavů (x_std): [1.2399069e+03 2.8767297e+03 1.5535028e+01 1.5607470e+01 2.3161271e-01
 2.2101921e-01]
  Vypočtený průměr měření (y_mean): [6.8106331e-02 4.0876814e+03]
  Vypočtená odchylka měření (y_std): [8.0387437e-01 1.1840886e+03]
Generuji testovací data...
Finální testovací data: x=torch.Size([10, 800, 6]), y=torch.Size([10, 800, 2])

DataLoadery jsou připraveny pro trénink.


In [5]:

# state_knet2 = StateKalmanNet_v2(
#     system_model=original_system_model, 
#     device=device,
#     hidden_size_multiplier=2,
#     output_layer_multiplier=1,
#     num_gru_layers=1
# ).to(device)

# trained_model = trainer.train_state_KalmanNet_sliding_window(
#     model=state_knet2,
#     train_loader=train_loader,
#     val_loader=val_loader,
#     device=device,
#     epochs=200,
#     lr=1e-3,
#     clip_grad=1.0,
#     early_stopping_patience=20,
#     tbptt_k=2,
#     tbptt_w=8,
#     optimizer_=torch.optim.AdamW,
#     weight_decay_=1e-3,

# )
# print(trained_model)

# Grid search

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F
from IPython.display import display
import time

# --- 1. Definice prostoru pro HPO (Grid Search) ---
print("Spouštím Grid Search pro HPO...")
start_time_hpo = time.time()

# Zde definujte hodnoty, které chcete testovat
hidden_multipliers = [2, 4] 
output_multipliers = [1, 2, 3, 4]
gru_hidden_dim_multipliers = [4, 6,8]

results_list = []

# --- 2. Spuštění smyček Grid Search ---
total_runs = len(hidden_multipliers) * len(output_multipliers) * len(gru_hidden_dim_multipliers)
run_count = 0

for h_mult in hidden_multipliers:
    for o_mult in output_multipliers:
        for g_mult in gru_hidden_dim_multipliers: 
            run_count += 1
            run_id = f"h{h_mult}_o{o_mult}_g{g_mult}"
            print(f"\n{'='*80}")
            print(f"Běh HPO {run_count}/{total_runs}: {run_id} (Hidden: {h_mult}, Output: {o_mult}, GRU: {g_mult})")
            print(f"{'='*80}")
            
            start_time_run = time.time()
            
            # Inicializace metrik pro případ selhání
            best_train_loss = float('inf')
            best_val_loss = float('inf')
            final_test_mse = float('inf')

            # --- 3. BLOK TRY...EXCEPT PRO ODCHYCENÍ NESTABILITY ---
            try:
                # --- 3a. Inicializace modelu ---
                current_model = StateKalmanNet_v2(
                    system_model=original_system_model, 
                    device=device,
                    hidden_size_multiplier=h_mult,
                    output_layer_multiplier=o_mult,
                    gru_hidden_dim_multiplier=g_mult, 
                    num_gru_layers=1
                ).to(device)
                print(f"Model inicializován: {current_model}")

                # --- 3b. Trénování modelu ---
                training_results = trainer.train_state_KalmanNet_sliding_window_grid_search(
                    model=current_model,
                    train_loader=train_loader,
                    val_loader=val_loader,
                    device=device,
                    epochs=200, 
                    lr=1e-4,
                    clip_grad=1.0,
                    early_stopping_patience=30, 
                    tbptt_k=2,
                    tbptt_w=8,
                    optimizer_=torch.optim.AdamW,
                    weight_decay_=1e-3,
                    verbose=False
                )
                
                best_train_loss = training_results['best_train_loss']
                best_val_loss = training_results['best_val_loss']
                current_model = training_results['model']
                
                print(f"Trénování dokončeno. Nejlepší Train Loss: {best_train_loss:.6f}, Val Loss: {best_val_loss:.6f}")

                # --- 3c. Evaluace na testovací sadě ---
                print("Evaluace na testovacích datech...")
                current_model.eval() 
                
                test_mse_list = []
                with torch.no_grad():
                    for x_true_seq_batch, y_test_seq_batch in test_loader:
                        y_test_seq_gpu = y_test_seq_batch.squeeze(0).to(device)
                        x_true_seq_gpu = x_true_seq_batch.squeeze(0).to(device)
                        initial_state = x_true_seq_gpu[0, :].unsqueeze(0)
                        TEST_SEQ_LEN = x_true_seq_gpu.shape[0] 

                        current_model.reset(batch_size=1, initial_state=initial_state)
                        model_preds = []
                        for t in range(1, TEST_SEQ_LEN):
                            step_output = current_model.step(y_test_seq_gpu[t, :].unsqueeze(0))
                            
                            if current_model.returns_covariance:
                                x_filtered_t = step_output[0]
                            else:
                                x_filtered_t = step_output
                            model_preds.append(x_filtered_t)
                        
                        full_x_hat_model = torch.cat([initial_state, torch.cat(model_preds, dim=0)], dim=0)
                        mse = F.mse_loss(full_x_hat_model[1:], x_true_seq_gpu[1:]).item()
                        
                        # Zkontrolujeme, zda i MSE není náhodou NaN/Inf
                        if not np.isfinite(mse):
                            print("Varování: MSE na testovací sadě je NaN/Inf!")
                            raise RuntimeError("Selhání při evaluaci (NaN MSE)")
                            
                        test_mse_list.append(mse)

                final_test_mse = np.mean(test_mse_list)
                print(f"Evaluace dokončena. Průměrné Test MSE: {final_test_mse:.6f}.")

            # --- 3d. Zpracování výjimky (když se objeví NaN) ---
            except RuntimeError as e:
                if "NaN" in str(e) or "Inf" in str(e):
                    print(f"\n!!!!!!!!!!!!!!!!! POZOR !!!!!!!!!!!!!!!!!")
                    print(f"Běh {run_id} selhal kvůli numerické nestabilitě (NaN/Inf).")
                    print(f"Chyba: {e}")
                    print(f"Tento běh bude zaznamenán s MSE = 'inf' a HPO bude pokračovat.")
                    print(f"!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n")
                    final_test_mse = float('inf') # Penalizace
                else:
                    # Pokud je to jiná chyba, chceme, aby HPO spadlo
                    raise e 
            
            run_duration = time.time() - start_time_run
            print(f"Doba trvání běhu: {run_duration:.2f}s")
            
            # --- 6. Uložení výsledků ---
            results_list.append({
                "run_id": run_id,
                "h_mult": h_mult,
                "o_mult": o_mult,
                "g_mult": g_mult,
                "best_train_loss": best_train_loss,
                "best_val_loss": best_val_loss,
                "test_mse": final_test_mse,
                "duration_s": run_duration
            })

# --- 7. Zobrazení finální tabulky ---
print("\n" + "="*80)
print(f"Grid Search HPO Dokončen! Celkový čas: {(time.time() - start_time_hpo) / 60:.2f} minut.")
print("="*80)

# Vytvoření a seřazení DataFrame
results_df = pd.DataFrame(results_list)
results_df = results_df.sort_values(by="test_mse", ascending=True)

pd.set_option('display.float_format', '{:.6f}'.format)
display(results_df)

print("\nNejlepší konfigurace (podle Test MSE):")
print(results_df.iloc[0])

Spouštím Grid Search pro HPO...

Běh HPO 1/18: h2_o1_g1 (Hidden: 2, Output: 1, GRU: 1)
Model inicializován: StateKalmanNet_v2(
  (dnn): DNN_KalmanNet_v2(
    (input_norm): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
    (input_layer): Sequential(
      (0): Linear(in_features=16, out_features=128, bias=True)
      (1): ReLU()
    )
    (gru): GRU(128, 40)
    (output_hidden_layer): Sequential(
      (0): Linear(in_features=40, out_features=12, bias=True)
      (1): ReLU()
    )
    (output_final_linear): Linear(in_features=12, out_features=12, bias=True)
  )
)




Trénování dokončeno. Nejlepší Train Loss: 26.727408, Val Loss: 282.764160
Evaluace na testovacích datech...
Evaluace dokončena. Průměrné Test MSE: 727735792144363968658407424.000000.
Doba trvání běhu: 66.88s

Běh HPO 2/18: h2_o1_g2 (Hidden: 2, Output: 1, GRU: 2)
Model inicializován: StateKalmanNet_v2(
  (dnn): DNN_KalmanNet_v2(
    (input_norm): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
    (input_layer): Sequential(
      (0): Linear(in_features=16, out_features=128, bias=True)
      (1): ReLU()
    )
    (gru): GRU(128, 80)
    (output_hidden_layer): Sequential(
      (0): Linear(in_features=80, out_features=12, bias=True)
      (1): ReLU()
    )
    (output_final_linear): Linear(in_features=12, out_features=12, bias=True)
  )
)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! SELHÁNÍ DETEKOVÁNO !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
Příčina: Tensor 'norm_innovation (vstup GRU)' obsahuje NaN nebo Inf.
Hodnota selhaného tensoru [norm_innovation (vstup GRU)]:
NELZE ZÍSKAT HODNOTU

Unnamed: 0,run_id,h_mult,o_mult,g_mult,best_train_loss,best_val_loss,test_mse,duration_s
11,h4_o1_g4,4,1,4,1182.867933,377.549255,2.0241535387961007e+22,94.200687
0,h2_o1_g1,2,1,1,26.727408,282.76416,7.277357921443638e+26,66.880106
5,h2_o2_g4,2,2,4,23.200051,204.03627,1.4059510806047607e+27,136.937259
6,h2_o4_g1,2,4,1,24.100512,240.805145,1.696467954282524e+33,58.677547
15,h4_o4_g1,4,4,1,inf,inf,inf,0.463744
14,h4_o2_g4,4,2,4,inf,inf,inf,1.121404
13,h4_o2_g2,4,2,2,inf,inf,inf,0.626339
12,h4_o2_g1,4,2,1,inf,inf,inf,1.226384
10,h4_o1_g2,4,1,2,inf,inf,inf,12.524306
8,h2_o4_g4,2,4,4,inf,inf,inf,3.35137



Nejlepší konfigurace (podle Test MSE):
run_id                                  h4_o1_g4
h_mult                                         4
o_mult                                         1
g_mult                                         4
best_train_loss                      1182.867933
best_val_loss                         377.549255
test_mse          20241535387961007276032.000000
duration_s                             94.200687
Name: 11, dtype: object
