## LNN Addestramento

In [1]:
import os

os.environ["TF_USE_LEGACY_KERAS"] = "1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Configurazione Memoria OK: {len(gpus)} GPU")
    except RuntimeError as e:
        print(f"Errore Configurazione Memoria: {e}")

from ncps.tf import CfC

import pandas as pd
import numpy as np
import glob
import gc
import joblib
import pyarrow.parquet as pq
from collections import Counter
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import mixed_precision
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, TimeDistributed, RepeatVector
from tensorflow.keras.optimizers.schedules import CosineDecayRestarts, ExponentialDecay
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, CSVLogger
from ncps.wirings import AutoNCP





# PATH
INPUT_DIR = '../Pre-Elaborazione Dati/Dataset' 
SCALER_PATH = 'scaler.joblib' 
COLONNE_FEATURES = ['Latitude', 'Longitude', 'SOG', 'COG']

WINDOW_SIZE = 30  
BATCH_SIZE = 64 

all_files = sorted(glob.glob(os.path.join(INPUT_DIR, '*.parquet')))
TRAIN_FILES = all_files[0:16]
VAL_FILES = all_files[16:20]

print("Configurazione LNN caricata.")

Configurazione Memoria OK: 1 GPU
Configurazione LNN caricata.


#### Funzioni

In [2]:
def create_windows(data_np, window_size):
    windows = []
    for i in range(len(data_np) - window_size + 1):
        windows.append(data_np[i : i + window_size])
    return windows

def data_generator(file_paths, scaler, features, window_size, batch_size, shuffle_files=False):
    
    file_buffer = {} 
    window_buffer = [] 
    CHUNK_SIZE_ROWS = 500_000

    while True:
        if shuffle_files:
             # Shuffle disattivato forzatamente per garantire la sequenzialit√†
            shuffle_files = False 
            
        for file_path in file_paths:
            chunk_buffer = {}
            try:
                pf = pq.ParquetFile(file_path)
                for batch in pf.iter_batches(batch_size=CHUNK_SIZE_ROWS, columns=features + ['TrajectoryID']):
                    df_chunk = batch.to_pandas()
                    df_chunk[features] = scaler.transform(df_chunk[features])
                    next_chunk_buffer = {}
                    
                    for tid, group in df_chunk.groupby('TrajectoryID'):
                        if tid in chunk_buffer:
                            trajectory_data = pd.concat([chunk_buffer.pop(tid), group])
                        else:
                            trajectory_data = group
                        
                        if tid in file_buffer:
                            trajectory_data = pd.concat([file_buffer.pop(tid), trajectory_data])
                        
                        # Se la traiettoria tocca la fine del chunk, bufferizzala
                        if trajectory_data.iloc[-1].name == df_chunk.iloc[-1].name:
                            next_chunk_buffer[tid] = trajectory_data
                            continue 
                            
                        if len(trajectory_data) < window_size:
                            continue 
                            
                        trajectory_np = trajectory_data[features].to_numpy()
                        new_windows = create_windows(trajectory_np, window_size)
                        window_buffer.extend(new_windows)
                        
                        next_chunk_buffer[tid] = trajectory_data.iloc[-(window_size - 1):]

                        while len(window_buffer) >= batch_size:
                            batch_to_yield = window_buffer[:batch_size]
                            window_buffer = window_buffer[batch_size:]
                            yield (np.array(batch_to_yield), np.array(batch_to_yield))
                    
                    chunk_buffer = next_chunk_buffer
                file_buffer = chunk_buffer
            except Exception as e:
                print(f"\nErrore lettura {file_path}: {e}")
                continue
print("Funzioni definite")

Funzioni definite


#### Scaler e Generatori

In [3]:
print("Inizializzazione generatori")
scaler = joblib.load(SCALER_PATH)

train_gen = data_generator(
    file_paths=TRAIN_FILES,
    scaler=scaler,
    features=COLONNE_FEATURES,
    window_size=WINDOW_SIZE,
    batch_size=BATCH_SIZE,
    shuffle_files=False 
)

val_gen = data_generator(
    file_paths=VAL_FILES,
    scaler=scaler,
    features=COLONNE_FEATURES,
    window_size=WINDOW_SIZE,
    batch_size=BATCH_SIZE,
    shuffle_files=False
)
print("Generatori pronti.")

Inizializzazione generatori
Generatori pronti.


#### Modello LNN

In [4]:
n_features = len(COLONNE_FEATURES)
latent_dim = 128
output_dim = 64
wiring = AutoNCP(latent_dim,output_dim) # Definisce una wiring sparsa

# Encoder
inputs = Input(shape=(WINDOW_SIZE, n_features))
# LAYER LIQUIDO 1 (Encoder): USIAMO WIRING SPARSA
lnn_encoder = CfC(wiring, return_sequences=False, mixed_memory=True)(inputs) 

# Decoder
repeat_vector = RepeatVector(WINDOW_SIZE)(lnn_encoder)
lnn_decoder = CfC(wiring, return_sequences=True, mixed_memory=True)(repeat_vector)

output = TimeDistributed(Dense(n_features))(lnn_decoder)

model_lnn = Model(inputs, output) # Rinominato per evitare confusione

model_lnn.summary()

I0000 00:00:1764090248.612338   12020 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4130 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4050 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 30, 4)]           0         
                                                                 
 cf_c (CfC)                  (None, 64)                123844    
                                                                 
 repeat_vector (RepeatVecto  (None, 30, 64)            0         
 r)                                                              
                                                                 
 cf_c_1 (CfC)                (None, 30, 64)            154564    
                                                                 
 time_distributed (TimeDist  (None, 30, 4)             260       
 ributed)                                                        
                                                                 
Total params: 278668 (1.06 MB)
Trainable params: 278668 (1.06

#### Addestramento LNN

In [7]:
STEPS_PER_EPOCH_LNN = 40000 
VALIDATION_STEPS_LNN = 8000 
EPOCHS_LNN = 20 

initial_learning_rate = 0.0005  
T_0 = 5
lr_schedule = CosineDecayRestarts(
    initial_learning_rate,
    first_decay_steps=T_0 * STEPS_PER_EPOCH_LNN,
    t_mul=2.0,                  
    m_mul=0.9,                  
    alpha=1e-6 # LR minimo
)

# OTTIMIZZATORE
optimizer_lnn_final = Adam(
    learning_rate=lr_schedule, 
    clipvalue=0.5             
)

model_lnn.compile(optimizer=optimizer_lnn_final, loss='mae')

#CALLBACKS
checkpoint = ModelCheckpoint(
    'lnn_autoencoder_best.weights.h5',
    monitor='val_loss',
    save_best_only=True,     
    mode='min',
    verbose=1,
    save_weights_only=True # Salva solo i pesi numerici
)

early_stopping = EarlyStopping(
    monitor='val_loss', patience=10, mode='min', verbose=1, restore_best_weights=True
)

csv_logger = tf.keras.callbacks.CSVLogger('training_log_lnn.csv', append=True)

# START
try:
    history_lnn = model_lnn.fit(
        train_gen,
        steps_per_epoch=STEPS_PER_EPOCH_LNN,
        epochs=EPOCHS_LNN,
        validation_data=val_gen,
        validation_steps=VALIDATION_STEPS_LNN,
        callbacks=[checkpoint, early_stopping, csv_logger],
        verbose=1
    )
    print("\nAddestramento LNN Completato.")
    
except KeyboardInterrupt:
    print("\nInterrotto manualmente.")

Epoch 1/20
Epoch 1: val_loss improved from inf to 0.05828, saving model to lnn_autoencoder_best.weights.h5
Epoch 2/20
Epoch 2: val_loss improved from 0.05828 to 0.05225, saving model to lnn_autoencoder_best.weights.h5
Epoch 3/20
Epoch 3: val_loss improved from 0.05225 to 0.03114, saving model to lnn_autoencoder_best.weights.h5
Epoch 4/20
Epoch 4: val_loss improved from 0.03114 to 0.02890, saving model to lnn_autoencoder_best.weights.h5
Epoch 5/20
Epoch 5: val_loss did not improve from 0.02890
Epoch 6/20
Epoch 6: val_loss did not improve from 0.02890
Epoch 7/20
Epoch 7: val_loss did not improve from 0.02890
Epoch 8/20
Epoch 8: val_loss did not improve from 0.02890
Epoch 9/20
Epoch 9: val_loss did not improve from 0.02890
Epoch 10/20
Epoch 10: val_loss improved from 0.02890 to 0.02820, saving model to lnn_autoencoder_best.weights.h5
Epoch 11/20
Epoch 11: val_loss did not improve from 0.02820
Epoch 12/20
Epoch 12: val_loss improved from 0.02820 to 0.02764, saving model to lnn_autoencoder_