In [None]:
import argparse
import os
import pickle
from pennylane import numpy as np
import pennylane as qml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import pandas as pd
import json

def get_args():
    parser = argparse.ArgumentParser(
        description="Parse the script arguments."
    )

    parser.add_argument(
        "--dataset-path",
        type=str,
        required=True,
        help="Path to the dataset."
    )

    parser.add_argument(
        "--output-dir",
        type=str,
        required=True,
        help="Path to the output directory."
    )

    return parser.parse_args()

def ingest_dataset(path, n_pkts=10, n_features=4):
    with open(path, "rb") as f:
        biflows = pickle.load(f)
        labels = pickle.load(f)
    biflows = np.array(biflows)[:,:n_pkts,:n_features]
    return biflows, labels

if __name__ == "__main__":
    # Parsing degli argomenti
    args = get_args()
    dataset_path = args.dataset_path
    output_dir = args.output_dir

    n_pkts = 10
    n_features = 4
    seed = 2025
    
    n_qubits = 5
    n_layers = 3

    # Riproducibilità
    np.random.seed(seed)                    # NumPy
    tf.random.set_seed(seed)                # TensorFlow
    tf.keras.utils.set_random_seed(seed)    # Keras

    # Caricamento del dataset
    X, y = ingest_dataset(dataset_path, n_pkts=n_pkts, n_features=n_features)
    num_classes = len(np.unique(y))

    # Codifica delle label
    le = LabelEncoder()
    y = le.fit_transform(y)

    # Partizionamento in train, validation e test set (proporzioni 80/20, 80/20)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=seed, stratify=y,
    )

    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler.fit(np.reshape(X_train, [-1, n_features]))
    res_samples_train = scaler.transform(np.reshape(X_train, [-1, n_features]))
    res_samples_test = scaler.transform(np.reshape(X_test, [-1, n_features]))
    X_train = np.reshape(res_samples_train, [-1, n_pkts, n_features])
    X_test = np.reshape(res_samples_test, [-1, n_pkts, n_features])

    X_train, X_valid, y_train, y_valid = train_test_split(
        X_train, y_train, test_size=0.2, random_state=seed, stratify=y_train,
    )

    ohe_y_train = tf.keras.utils.to_categorical(y_train, num_classes=num_classes)
    ohe_y_valid = tf.keras.utils.to_categorical(y_valid, num_classes=num_classes)

    # Definizione del circuito quantistico
    dev = qml.device("default.qubit", seed=seed, wires=n_qubits)
    @qml.qnode(dev , interface="tf")
    def qnode(inputs, weights):
        
        # Feature map
        qml.AmplitudeEmbedding(inputs, wires=range(n_qubits), normalize=True, pad_with=0.0)

        # Ansatz
        qml.StronglyEntanglingLayers(weights, wires=range(n_qubits))

        # Processo di misurazione
        return qml.probs(wires=range(n_qubits))

    # Pesi dell'ansatz
    weights = np.random.rand(n_layers*n_qubits*3).reshape(n_layers,n_qubits,3)
    weights = tf.Variable(weights, dtype=tf.float64, trainable=True)
    weight_shapes = {"weights": (n_layers,n_qubits,3)}

    #Definizione del modello
    model = Sequential(name='ampe_fixed_probs')
    model.add(Flatten())
    model.add(Dense(2**n_qubits,activation='sigmoid', input_dim=n_pkts*n_features))
    model.add(qml.qnn.KerasLayer(qnode, weight_shapes, output_dim=2**n_qubits)) # <-- qui c'è la parte quantum
    model.add(Dense(num_classes, activation='softmax'))
    model.build(np.shape(X_train))

    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    earlystop = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=10, verbose=1, mode='auto')
    callbacks = [earlystop]

    os.makedirs(f"{output_dir}", exist_ok=True)

    with open(f"{output_dir}/model_summary.txt", "w") as f:
        model.summary(print_fn=lambda x: f.write(x + "\n"))

    history = model.fit(X_train, ohe_y_train, validation_data=(X_valid,ohe_y_valid),
                    epochs=1, batch_size=50,
                    callbacks=callbacks, verbose=2)
    
    os.makedirs(output_dir, exist_ok=True)
    df_history = pd.DataFrame(history.history)
    df_history.to_csv(f"{output_dir}/training_history.csv", index=False)

    y_pred_probs = model.predict(X_test)
    y_pred = np.argmax(y_pred_probs, axis=1)

    # Salvataggio dei risultati
    soft_values = [",".join(map(str, probs)) for probs in y_pred_probs]
    df_soft = pd.DataFrame({
        "Actual": y_test,
        "soft_values": soft_values
    })
    df_pred = pd.DataFrame({
        "Actual": y_test,
        "Predicted": y_pred
    })

    df_soft.to_csv(f"{output_dir}/soft_values.dat", sep="\t", index=False)
    df_pred.to_csv(f"{output_dir}/predictions.dat", sep="\t", index=False)
    labels_map = {}
    for c, enc_c in zip(le.classes_, le.transform(le.classes_)):
        labels_map[str(enc_c)] = c
    with open(f"{output_dir}/labels_map.json", 'w') as f:
        json.dump(labels_map, f)