In [None]:
import numpy as np
import h5py
import tensorflow as tf

def normalize_events(events):    
    return events / events.max()

# Normalize truths independently
def normalize_truths(truths):
    truths_max = truths.max(axis=(1, 2, 3), keepdims=True)
    return truths / truths_max


# Load data
with h5py.File('/home/filippo/DUNE/data/numu-CC-QE/lightweight_dataset_30cm.h5', 'r') as f:
    events = f['inputs'][:]    # Load all inputs into memory
    truths = f['targets'][:]   # Load all targets into memory

# ----------------------------
# 1. Normalize inputs to [0, 1]
# ----------------------------
normalized_events = normalize_events(events)
normalized_truths = normalize_truths(truths)
# Inputs: (N, 60)
X = normalized_events

# Targets: (N, 2, 4, 5) with values in [0, 1]
y = normalized_truths

# ---- Build the model ----
input_dim = 60
output_shape = (2, 4, 5)  #30cm
#output_shape = (3, 6, 7)  #20cm
#output_shape = (4, 8, 9)  #15cm

# model = tf.keras.Sequential([
#     tf.keras.layers.Input(shape=(input_dim,)),
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.Dense(64, activation='relu'),
#     tf.keras.layers.Dense(np.prod(output_shape), activation='sigmoid'),
#     tf.keras.layers.Reshape(output_shape)
# ])

# model = tf.keras.Sequential([
#     tf.keras.layers.Input(shape=(input_dim,)),
    
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.Dropout(0.2),
    
#     tf.keras.layers.Dense(128, activation='relu'),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.Dropout(0.2),
    
#     tf.keras.layers.Dense(64, activation='relu'),
    
#     tf.keras.layers.Dense(np.prod(output_shape), activation='sigmoid'),
#     tf.keras.layers.Reshape(output_shape)
# ])

def normalize_output(x):
    max_val = tf.reduce_max(x, axis=[1, 2, 3], keepdims=True)
    return x / (max_val + 1e-6)

model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(input_dim,)),
    
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dropout(0.2),
    
    tf.keras.layers.Dense(64, activation='relu'),
    
    tf.keras.layers.Dense(np.prod(output_shape), activation='relu'),
    tf.keras.layers.Reshape(output_shape),
    tf.keras.layers.Lambda(normalize_output)
])


model.compile(optimizer='adam', loss='mse', metrics=['mae'])

model.summary()

# ---- Train the model ----

history = model.fit(X, y, epochs=50, batch_size=32, validation_split=0.3)

In [None]:
import matplotlib.pyplot as plt

# ---- Plot loss and MAE curves ----

plt.figure(figsize=(12, 6))

# Plot MSE Loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss (MSE)', lw=2)
plt.plot(history.history['val_loss'], label='Val Loss (MSE)', lw=2)
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.title('Training and Validation MSE Loss')
plt.legend()
plt.grid(True)

# Plot MAE
plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Train MAE', lw=2)
plt.plot(history.history['val_mae'], label='Val MAE', lw=2)
plt.xlabel('Epoch')
plt.ylabel('MAE')
plt.title('Training and Validation MAE')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
# Flatten all values in normalized_truths into a 1D array
all_values = normalized_truths.flatten()

# Plot histogram
plt.figure(figsize=(8, 5))
plt.hist(all_values, bins=50, color='skyblue', edgecolor='black', log=True)
plt.xlabel('Normalized Value')
plt.ylabel('Frequency')
plt.title('Histogram of All Normalized Truth Values')
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
def plot_truth_vs_pred(truth, pred, sample_idx=0):
    fig, axes = plt.subplots(3, truth.shape[0], figsize=(18, 9))

    for i in range(truth.shape[0]):
        # Truth (fixed scale 0–1)
        im0 = axes[0, i].imshow(truth[i], cmap='viridis', aspect='auto', vmin=0, vmax=1)
        axes[0, i].set_title(f'Truth - Channel {i}')
        plt.colorbar(im0, ax=axes[0, i])

        # Prediction (fixed scale 0–1)
        im1 = axes[1, i].imshow(pred[i], cmap='viridis', aspect='auto', vmin=0, vmax=1)
        axes[1, i].set_title(f'Prediction - Channel {i}')
        plt.colorbar(im1, ax=axes[1, i])

        # Absolute Difference (fixed scale 0–1, or tweak if needed)
        im2 = axes[2, i].imshow(np.abs(truth[i] - pred[i]), cmap='hot', aspect='auto', vmin=0, vmax=1)
        axes[2, i].set_title(f'Abs Diff - Channel {i}')
        plt.colorbar(im2, ax=axes[2, i])

    plt.suptitle(f'Truth vs Prediction with Fixed Color Scales — Sample #{sample_idx}', fontsize=16)
    plt.tight_layout()
    plt.show()

# Get one prediction
sample_idx = 110
truth_sample = y[sample_idx]                                 
predicted = model.predict(X[sample_idx:sample_idx+1])[0]
plot_truth_vs_pred(y[sample_idx], predicted, sample_idx)

In [None]:
from tensorflow.keras.utils import plot_model

plot_model(
    model,
    #to_file="model_architecture.png",
    show_shapes=True,
    show_layer_names=True,
    dpi=96
)
