## LNN Addestramento

In [1]:
import os

os.environ["TF_USE_LEGACY_KERAS"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Configurazione Memoria OK: {len(gpus)} GPU")
    except RuntimeError as e:
        print(f"Errore Configurazione Memoria: {e}")

from ncps.tf import CfC

import pandas as pd
import numpy as np
import glob
import gc
import joblib
import pyarrow.parquet as pq
from collections import Counter
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import mixed_precision
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, TimeDistributed, RepeatVector
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# PATH
INPUT_DIR = '../Pre-Elaborazione Dati/Dataset' 
SCALER_PATH = 'scaler.joblib' 
COLONNE_FEATURES = ['Latitude', 'Longitude', 'SOG', 'COG']

WINDOW_SIZE = 30  
BATCH_SIZE = 64 

all_files = sorted(glob.glob(os.path.join(INPUT_DIR, '*.parquet')))
TRAIN_FILES = all_files[0:16]
VAL_FILES = all_files[16:20]

print("Configurazione LNN caricata.")

Configurazione Memoria OK: 1 GPU
Configurazione LNN caricata.


#### Funzioni

In [2]:
def create_windows(data_np, window_size):
    windows = []
    for i in range(len(data_np) - window_size + 1):
        windows.append(data_np[i : i + window_size])
    return windows

def data_generator(file_paths, scaler, features, window_size, batch_size, shuffle_files=False):
    
    file_buffer = {} 
    window_buffer = [] 
    CHUNK_SIZE_ROWS = 500_000

    while True:
        if shuffle_files:
             # Shuffle disattivato forzatamente per garantire la sequenzialità
            shuffle_files = False 
            
        for file_path in file_paths:
            chunk_buffer = {}
            try:
                pf = pq.ParquetFile(file_path)
                for batch in pf.iter_batches(batch_size=CHUNK_SIZE_ROWS, columns=features + ['TrajectoryID']):
                    df_chunk = batch.to_pandas()
                    df_chunk[features] = scaler.transform(df_chunk[features])
                    next_chunk_buffer = {}
                    
                    for tid, group in df_chunk.groupby('TrajectoryID'):
                        if tid in chunk_buffer:
                            trajectory_data = pd.concat([chunk_buffer.pop(tid), group])
                        else:
                            trajectory_data = group
                        
                        if tid in file_buffer:
                            trajectory_data = pd.concat([file_buffer.pop(tid), trajectory_data])
                        
                        # Se la traiettoria tocca la fine del chunk, bufferizzala
                        if trajectory_data.iloc[-1].name == df_chunk.iloc[-1].name:
                            next_chunk_buffer[tid] = trajectory_data
                            continue 
                            
                        if len(trajectory_data) < window_size:
                            continue 
                            
                        trajectory_np = trajectory_data[features].to_numpy()
                        new_windows = create_windows(trajectory_np, window_size)
                        window_buffer.extend(new_windows)
                        
                        next_chunk_buffer[tid] = trajectory_data.iloc[-(window_size - 1):]

                        while len(window_buffer) >= batch_size:
                            batch_to_yield = window_buffer[:batch_size]
                            window_buffer = window_buffer[batch_size:]
                            yield (np.array(batch_to_yield), np.array(batch_to_yield))
                    
                    chunk_buffer = next_chunk_buffer
                file_buffer = chunk_buffer
            except Exception as e:
                print(f"\nErrore lettura {file_path}: {e}")
                continue
print("Funzioni definite")

Funzioni definite


#### Scaler e Generatori

In [4]:
print("Inizializzazione generatori")
scaler = joblib.load(SCALER_PATH)

train_gen = data_generator(
    file_paths=TRAIN_FILES,
    scaler=scaler,
    features=COLONNE_FEATURES,
    window_size=WINDOW_SIZE,
    batch_size=BATCH_SIZE,
    shuffle_files=False 
)

val_gen = data_generator(
    file_paths=VAL_FILES,
    scaler=scaler,
    features=COLONNE_FEATURES,
    window_size=WINDOW_SIZE,
    batch_size=BATCH_SIZE,
    shuffle_files=False
)
print("Generatori pronti.")

Inizializzazione generatori
Generatori pronti.


#### Modello LNN

In [8]:
n_features = len(COLONNE_FEATURES)
latent_dim = 32 # Stessa dimensione dell'LSTM

# Encoder
inputs = Input(shape=(WINDOW_SIZE, n_features))

# LAYER LIQUIDO 1 (Encoder)
lnn_encoder = CfC(latent_dim, return_sequences=False, mixed_memory=True)(inputs)

# Ponte
repeat_vector = RepeatVector(WINDOW_SIZE)(lnn_encoder)

# LAYER LIQUIDO 2 (Decoder)
lnn_decoder = CfC(latent_dim, return_sequences=True, mixed_memory=True)(repeat_vector)

# Output
output = TimeDistributed(Dense(n_features))(lnn_decoder)

model_lnn = Model(inputs, output)
model_lnn.compile(optimizer='adam', loss='mae')

print("Modello LNN (Liquid Neural Network) creato e compilato.")
model_lnn.summary()

Modello LNN (Liquid Neural Network) creato e compilato.
Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 30, 4)]           0         
                                                                 
 cf_c_2 (CfC)                (None, 32)                29568     
                                                                 
 repeat_vector_1 (RepeatVec  (None, 30, 32)            0         
 tor)                                                            
                                                                 
 cf_c_3 (CfC)                (None, 30, 32)            33152     
                                                                 
 time_distributed_1 (TimeDi  (None, 30, 4)             132       
 stributed)                                                      
                                                                 
Tot

#### Addestramento LNN

In [12]:
# Parametri
STEPS_PER_EPOCH_LNN = 40000
VALIDATION_STEPS_LNN = 8000 
EPOCHS_LNN = 100 

# DEFINIZIONE OTTIMIZZATORE
optimizer_lnn = tf.keras.optimizers.Adam(
    learning_rate=0.00025, 
    clipvalue=0.5          # Taglio netto ai gradienti
)

# RICOMPILAZIONE (Reset)
model_lnn.compile(optimizer=optimizer_lnn, loss='mae')
print("Modello Resettato.")

# CALLBACKS
checkpoint = ModelCheckpoint(
    'lnn_autoencoder_best.keras', 
    monitor='val_loss', 
    save_best_only=True, 
    mode='min', 
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_loss', 
    patience=10, # Diamo tempo perché imparerà più lentamente
    mode='min', 
    verbose=1, 
    restore_best_weights=True
)

csv_logger = tf.keras.callbacks.CSVLogger('training_log_lnn_v4.csv', append=True)

#START
try:
    history_lnn = model_lnn.fit(
        train_gen,
        steps_per_epoch=STEPS_PER_EPOCH_LNN,
        epochs=EPOCHS_LNN,
        validation_data=val_gen,
        validation_steps=VALIDATION_STEPS_LNN,
        callbacks=[checkpoint, early_stopping, csv_logger],
        verbose=1
    )
    print("\nAddestramento LNN Completato!")
    
except KeyboardInterrupt:
    print("\nInterrotto manualmente.")

Modello Resettato.
Epoch 1/100
Epoch 1: val_loss improved from inf to 0.79569, saving model to lnn_autoencoder_best.keras
Epoch 2/100
Epoch 2: val_loss improved from 0.79569 to 0.68646, saving model to lnn_autoencoder_best.keras
Epoch 3/100
Epoch 3: val_loss did not improve from 0.68646
Epoch 4/100
Epoch 4: val_loss improved from 0.68646 to 0.62918, saving model to lnn_autoencoder_best.keras
Epoch 5/100
Epoch 5: val_loss improved from 0.62918 to 0.57174, saving model to lnn_autoencoder_best.keras
Epoch 6/100
Epoch 6: val_loss did not improve from 0.57174
Epoch 7/100
Epoch 7: val_loss improved from 0.57174 to 0.57095, saving model to lnn_autoencoder_best.keras
Epoch 8/100
Epoch 8: val_loss did not improve from 0.57095
Epoch 9/100
Epoch 9: val_loss did not improve from 0.57095
Epoch 10/100
Epoch 10: val_loss did not improve from 0.57095
Epoch 11/100
Epoch 11: val_loss did not improve from 0.57095
Epoch 12/100
Epoch 12: val_loss did not improve from 0.57095
Epoch 13/100
Epoch 13: val_loss