## LNN Versione 2 Addestramento

In [1]:
import os

os.environ["TF_USE_LEGACY_KERAS"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Configurazione Memoria OK: {len(gpus)} GPU")
    except RuntimeError as e:
        print(f"Errore Configurazione Memoria: {e}")

from ncps.tf import CfC

import pandas as pd
import numpy as np
import glob
import gc
import joblib
import pyarrow.parquet as pq
from collections import Counter
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import mixed_precision
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, TimeDistributed, RepeatVector
from tensorflow.keras.optimizers.schedules import CosineDecayRestarts, ExponentialDecay
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
from ncps.wirings import AutoNCP





# PATH
INPUT_DIR = '../Pre-Elaborazione Dati/Dataset' 
SCALER_PATH = 'scaler.joblib' 
COLONNE_FEATURES = ['Latitude', 'Longitude', 'SOG', 'COG']

WINDOW_SIZE = 30  
BATCH_SIZE = 64 

all_files = sorted(glob.glob(os.path.join(INPUT_DIR, '*.parquet')))
TRAIN_FILES = all_files[0:16]
VAL_FILES = all_files[16:20]

print("Configurazione LNN caricata.")

Configurazione Memoria OK: 1 GPU
Configurazione LNN caricata.


#### Funzioni

In [3]:
def create_windows(data_np, window_size):
    windows = []
    for i in range(len(data_np) - window_size + 1):
        windows.append(data_np[i : i + window_size])
    return windows

def data_generator(file_paths, scaler, features, window_size, batch_size, shuffle_files=False):
    
    file_buffer = {} 
    window_buffer = [] 
    CHUNK_SIZE_ROWS = 500_000

    while True:
        if shuffle_files:
             # Shuffle disattivato forzatamente per garantire la sequenzialit√†
            shuffle_files = False 
            
        for file_path in file_paths:
            chunk_buffer = {}
            try:
                pf = pq.ParquetFile(file_path)
                for batch in pf.iter_batches(batch_size=CHUNK_SIZE_ROWS, columns=features + ['TrajectoryID']):
                    df_chunk = batch.to_pandas()
                    df_chunk[features] = scaler.transform(df_chunk[features])
                    next_chunk_buffer = {}
                    
                    for tid, group in df_chunk.groupby('TrajectoryID'):
                        if tid in chunk_buffer:
                            trajectory_data = pd.concat([chunk_buffer.pop(tid), group])
                        else:
                            trajectory_data = group
                        
                        if tid in file_buffer:
                            trajectory_data = pd.concat([file_buffer.pop(tid), trajectory_data])
                        
                        # Se la traiettoria tocca la fine del chunk, bufferizzala
                        if trajectory_data.iloc[-1].name == df_chunk.iloc[-1].name:
                            next_chunk_buffer[tid] = trajectory_data
                            continue 
                            
                        if len(trajectory_data) < window_size:
                            continue 
                            
                        trajectory_np = trajectory_data[features].to_numpy()
                        new_windows = create_windows(trajectory_np, window_size)
                        window_buffer.extend(new_windows)
                        
                        next_chunk_buffer[tid] = trajectory_data.iloc[-(window_size - 1):]

                        while len(window_buffer) >= batch_size:
                            batch_to_yield = window_buffer[:batch_size]
                            window_buffer = window_buffer[batch_size:]
                            yield (np.array(batch_to_yield), np.array(batch_to_yield))
                    
                    chunk_buffer = next_chunk_buffer
                file_buffer = chunk_buffer
            except Exception as e:
                print(f"\nErrore lettura {file_path}: {e}")
                continue
print("Funzioni definite")

Funzioni definite


#### Scaler e Generatori

In [4]:
print("Inizializzazione generatori")
scaler = joblib.load(SCALER_PATH)

train_gen = data_generator(
    file_paths=TRAIN_FILES,
    scaler=scaler,
    features=COLONNE_FEATURES,
    window_size=WINDOW_SIZE,
    batch_size=BATCH_SIZE,
    shuffle_files=False 
)

val_gen = data_generator(
    file_paths=VAL_FILES,
    scaler=scaler,
    features=COLONNE_FEATURES,
    window_size=WINDOW_SIZE,
    batch_size=BATCH_SIZE,
    shuffle_files=False
)
print("Generatori pronti.")

Inizializzazione generatori
Generatori pronti.


#### Modello LNN

In [5]:
n_features = len(COLONNE_FEATURES)
latent_dim = 128
output_dim = 64
wiring = AutoNCP(latent_dim,output_dim) # Definisce una wiring sparsa

# Encoder
inputs = Input(shape=(WINDOW_SIZE, n_features))
# LAYER LIQUIDO 1 (Encoder): USIAMO WIRING SPARSA
lnn_encoder = CfC(wiring, return_sequences=False, mixed_memory=True)(inputs) 

# Decoder
repeat_vector = RepeatVector(WINDOW_SIZE)(lnn_encoder)
lnn_decoder = CfC(wiring, return_sequences=True, mixed_memory=True)(repeat_vector)

output = TimeDistributed(Dense(n_features))(lnn_decoder)

model_lnn = Model(inputs, output) # Rinominato per evitare confusione

model_lnn.summary()

I0000 00:00:1764265362.146125  374329 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 3945 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 30, 4)]           0         
                                                                 
 cf_c (CfC)                  (None, 64)                123844    
                                                                 
 repeat_vector (RepeatVecto  (None, 30, 64)            0         
 r)                                                              
                                                                 
 cf_c_1 (CfC)                (None, 30, 64)            154564    
                                                                 
 time_distributed (TimeDist  (None, 30, 4)             260       
 ributed)                                                        
                                                                 
Total params: 278668 (1.06 MB)
Trainable params: 278668 (1.06

#### Addestramento LNN

In [None]:
SCALER_PATH = 'scaler.joblib' 

try:
    if os.path.exists(SCALER_PATH):
        print(f"Caricamento parametri fisici da {SCALER_PATH}...")
        scaler = joblib.load(SCALER_PATH)
        
        MEAN_LAT, STD_LAT = scaler.mean_[0], scaler.scale_[0]
        MEAN_LON, STD_LON = scaler.mean_[1], scaler.scale_[1]
        MEAN_SOG, STD_SOG = scaler.mean_[2], scaler.scale_[2]
        MEAN_COG, STD_COG = scaler.mean_[3], scaler.scale_[3]
        
        print("Scaler letto correttamente per la LNN.")
    else:
        raise FileNotFoundError("File scaler non trovato.")
except Exception as e:
    print(f"Errore lettura Scaler: {e}. Uso default.")
    MEAN_LAT, STD_LAT = 44.0, 1.0
    MEAN_LON, STD_LON = 9.0, 1.0
    MEAN_SOG, STD_SOG = 10.0, 5.0
    MEAN_COG, STD_COG = 180.0, 90.0


# DEFINIZIONE LOSS IBRIDA

def physics_informed_loss(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)
    
    # Dati
    lat_true, lon_true = y_true[:, :, 0], y_true[:, :, 1]
    sog_true, cog_true = y_true[:, :, 2], y_true[:, :, 3]
    lat_pred, lon_pred = y_pred[:, :, 0], y_pred[:, :, 1]
    sog_pred, cog_pred = y_pred[:, :, 2], y_pred[:, :, 3]
    
    # MSE Ancoraggio (GPS) - Peso x2 sulla posizione
    mse_pos = tf.reduce_mean(tf.square(lat_true - lat_pred) + tf.square(lon_true - lon_pred))
    mse_dyn = tf.reduce_mean(tf.square(sog_true - sog_pred) + tf.square(cog_true - cog_pred))
    data_loss = 2.0 * mse_pos + 1.0 * mse_dyn

    # Fisica (Dead Reckoning)
    d_lat_net = (lat_pred[:, 1:] - lat_pred[:, :-1]) * STD_LAT
    d_lon_net = (lon_pred[:, 1:] - lon_pred[:, :-1]) * STD_LON
    
    pred_sog_real = (sog_pred[:, :-1] * STD_SOG) + MEAN_SOG
    pred_cog_deg = (cog_pred[:, :-1] * STD_COG) + MEAN_COG
    pred_cog_rad = pred_cog_deg * 0.0174533
    
    K = 0.00032410 
    
    d_lat_phys = (pred_sog_real * K) * tf.cos(pred_cog_rad)
    d_lon_phys = (pred_sog_real * K) * tf.sin(pred_cog_rad)
    
    physics_error = tf.reduce_mean(tf.square(d_lat_net - d_lat_phys) + tf.square(d_lon_net - d_lon_phys))

    return data_loss + 0.5 * physics_error


# CONFIGURAZIONE TRAINING LNN

STEPS_PER_EPOCH_LNN = 40000 
VALIDATION_STEPS_LNN = 8000 
EPOCHS_LNN = 40 

# Learning Rate Schedule (Cosine Decay)
initial_learning_rate = 0.0005  
lr_schedule = CosineDecayRestarts(
    initial_learning_rate,
    first_decay_steps=5 * STEPS_PER_EPOCH_LNN,
    t_mul=2.0,                  
    m_mul=0.9,                  
    alpha=1e-6
)

optimizer_lnn_final = Adam(learning_rate=lr_schedule, clipvalue=0.5)

print("Compilazione LNN con Hybrid Physics Loss")
model_lnn.compile(optimizer=optimizer_lnn_final, loss=physics_informed_loss)

# CALLBACKS
checkpoint = ModelCheckpoint(
    'lnn_autoencoder_best.weights.h5',
    monitor='val_loss',
    save_best_only=True,     
    mode='min',
    verbose=1,
    save_weights_only=True
)

early_stopping = EarlyStopping(
    monitor='val_loss', patience=7, mode='min', verbose=1, restore_best_weights=True
)

csv_logger = CSVLogger('training_log_lnn.csv', append=True)

# START TRAINING
try:
    print("Avvio Addestramento LNN")
    history_lnn = model_lnn.fit(
        train_gen,
        steps_per_epoch=STEPS_PER_EPOCH_LNN,
        epochs=EPOCHS_LNN,
        validation_data=val_gen,
        validation_steps=VALIDATION_STEPS_LNN,
        callbacks=[checkpoint, early_stopping, csv_logger],
        verbose=1
    )
    print("\nAddestramento LNN Completato.")
    
except KeyboardInterrupt:
    print("\nInterrotto manualmente.")