In [18]:
import numpy as np
from conllu_reader import ConlluReader
from algorithm import ArcEager
import pickle

# --- 1. LOAD DATA (Use the TRAIN file, not test) ---
print("--- STEP 1: Data Loading ---")
reader = ConlluReader()
# Ensure the filename matches your specific training file path
train_sentences = reader.read_conllu_file("en_partut-ud-train_clean.conllu") 

# Filter out non-projective trees as Arc-Eager cannot handle them [cite: 1100]
train_sentences = reader.remove_non_projective_trees(train_sentences)
print(f" Loaded {len(train_sentences)} valid projective sentences for training.\n")



# --- 2. OBTAIN RAW SAMPLES (Oracle Execution) ---
print("--- STEP 2: Generating Samples with the Oracle ---")
arc_eager = ArcEager()
raw_samples = []

for sent in train_sentences:
    try:
        # The oracle returns a list of Sample objects (State + Transition) for this sentence
        samples = arc_eager.oracle(sent)
        raw_samples.extend(samples)
    except AssertionError:
        # If the oracle fails to reconstruct the exact gold tree, skip the sentence
        continue

print(f"Total samples (game states) generated: {len(raw_samples)}")

# VISUALIZATION: Let's see what a raw sample looks like
if raw_samples:
    print(f"Example of Raw Sample (Index 0):")
    print(f"   State: {raw_samples[0].state}")
    print(f"   Correct Action: {raw_samples[0].transition}\n")

    

# --- 3. FEATURE EXTRACTION (From State to List of Strings) ---
# We need to extract features from the stack and buffer [cite: 934, 1080]
print("--- STEP 3: Feature Extraction (Translation to Text) ---")
X_raw = [] # Stores lists of words/tags (Input features)
Y_raw = [] # Stores actions and dependencies (Outputs)

for sample in raw_samples:
    # Extract features (words and UPOS tags) using the implemented function
    # nbuffer_feats=2 and nstack_feats=2 is the suggested configuration [cite: 1091]
    features = sample.state_to_feats(nbuffer_feats=2, nstack_feats=2)
    X_raw.append(features)
    
    # Save the action (transition) and the dependency label
    action_name = sample.transition.action
    dep_label = sample.transition.dependency
    Y_raw.append((action_name, dep_label))

# VISUALIZATION: What do the lists contain now?
print(f" Example of Input (X_raw[0]): {X_raw[0]}")
print(f"   (This is what the network 'sees': words and tags)")
print(f"Example of Output (Y_raw[0]): {Y_raw[0]}")
print(f"   (This is what the network must predict: Action and Label)\n")




# --- 4. PREPARATION FOR KERAS (Vocabularies and Numerical Conversion) ---
# Neural networks require numerical input [cite: 733]
print("--- STEP 4: Numerical Conversion (For Keras) ---")

# 4.1 Create Dictionaries (Text -> Number Maps)
words_vocab = {'<PAD>': 0, '<UNK>': 1}
upos_vocab = {'<PAD>': 0, '<UNK>': 1}
actions_vocab = {}  # E.g., 'SHIFT': 0, 'LEFT-ARC': 1...
deprels_vocab = {None: 0} # E.g., 'nsubj': 1, 'det': 2...

# Fill vocabularies by iterating through all collected data
for features in X_raw:
    # Assuming features structure: [W_s2, W_s1, W_b1, W_b2, P_s2, P_s1, P_b1, P_b2]
    # The first half are words, the second half are UPOS tags
    num_words = len(features) // 2 
    
    words = features[:num_words]
    upos = features[num_words:]
    
    for w in words:
        if w not in words_vocab:
            words_vocab[w] = len(words_vocab)
    for u in upos:
        if u not in upos_vocab:
            upos_vocab[u] = len(upos_vocab)

for act, dep in Y_raw:
    if act not in actions_vocab:
        actions_vocab[act] = len(actions_vocab)
    if dep not in deprels_vocab:
        deprels_vocab[dep] = len(deprels_vocab)

print(f"Vocabulary Sizes:")
print(f"   Unique words: {len(words_vocab)}")
print(f"   Unique UPOS tags: {len(upos_vocab)}")
print(f"   Possible actions: {len(actions_vocab)} {actions_vocab}")
print(f"   Dependency relations: {len(deprels_vocab)}\n")

# 4.2 Convert everything to Numbers (Matrices for Keras)
# X_train will have shape (Num_Samples, Num_Features)
X_train_numerical = []
Y_train_actions = []
Y_train_deprels = []

for i in range(len(X_raw)):
    # Convert INPUT (Features)
    features = X_raw[i]
    num_vec = []
    
    # Convert words to IDs
    num_words = len(features) // 2
    for w in features[:num_words]:
        num_vec.append(words_vocab.get(w, words_vocab['<UNK>']))
    # Convert UPOS tags to IDs
    for u in features[num_words:]:
        num_vec.append(upos_vocab.get(u, upos_vocab['<UNK>']))
    
    X_train_numerical.append(num_vec)
    
    # Convert OUTPUT (Targets)
    act, dep = Y_raw[i]
    Y_train_actions.append(actions_vocab[act])
    # Use 0 if the dependency is None (e.g., for SHIFT or REDUCE)
    Y_train_deprels.append(deprels_vocab.get(dep, 0)) 

# Convert to Numpy arrays (The actual input format Keras expects)
X_train = np.array(X_train_numerical)
y_act = np.array(Y_train_actions)
y_dep = np.array(Y_train_deprels)

print("DATA")
print(f"Final numerical example (X_train[0]): {X_train[0]}")
print(f"   (Notice how words are now IDs)")
# Find the action name corresponding to the ID for display purposes
act_name = list(actions_vocab.keys())[list(actions_vocab.values()).index(y_act[0])]
print(f"Target Action (y_act[0]): {y_act[0]} -> Corresponds to '{act_name}'")

print(f"Target Action (y_act[0]): {y_act[0]} -> Corresponds to '{act_name}'")
np.savez("training_data.npz", X=X_train, y_act=y_act, y_dep=y_dep)
with open("vocabs.pkl", "wb") as f:
    pickle.dump((words_vocab, upos_vocab, actions_vocab, deprels_vocab), f)
print("Data saved to 'training_data.npz' and 'vocabs.pkl'")

--- STEP 1: Data Loading ---
 Loaded 1748 valid projective sentences for training.

--- STEP 2: Generating Samples with the Oracle ---
Total samples (game states) generated: 81182
Example of Raw Sample (Index 0):
   State: Stack (size=1): (0, ROOT, ROOT_UPOS)
Buffer (size=13): (1, Distribution, NOUN) | (2, of, ADP) | (3, this, DET) | (4, license, NOUN) | (5, does, AUX) | (6, not, PART) | (7, create, VERB) | (8, an, DET) | (9, attorney, NOUN) | (10, -, PUNCT) | (11, client, NOUN) | (12, relationship, NOUN) | (13, ., PUNCT)
Arcs (size=0): set()

   Correct Action: SHIFT

--- STEP 3: Feature Extraction (Translation to Text) ---
 Example of Input (X_raw[0]): ['<PAD>', 'ROOT', 'Distribution', 'of', '<PAD>', 'ROOT_UPOS', 'NOUN', 'ADP']
   (This is what the network 'sees': words and tags)
Example of Output (Y_raw[0]): ('SHIFT', None)
   (This is what the network must predict: Action and Label)

--- STEP 4: Numerical Conversion (For Keras) ---
Vocabulary Sizes:
   Unique words: 6872
   Unique 

In [8]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import pickle

# --- 1. LOAD PREPARED DATA ---
print("--- STEP 1: Loading Training Data and Vocabularies ---")

try:
    data = np.load("training_data.npz")
    X_train_full = data['X']      # Shape: (Num_Samples, 8) -> 4 words + 4 tags
    y_train_act_full = data['y_act']   # Shape: (Num_Samples,) -> Action IDs
    y_train_dep_full = data['y_dep']   # Shape: (Num_Samples,) -> Dependency IDs
except FileNotFoundError:
    print("Error: 'training_data.npz' not found.")
    exit()

# Load vocabularies
try:
    with open("vocabs.pkl", "rb") as f:
        words_vocab, upos_vocab, actions_vocab, deprels_vocab = pickle.load(f)
except FileNotFoundError:
    print("Error: 'vocabs.pkl' not found.")
    exit()

# --- Data Splitting ---
split_idx = int(len(X_train_full) * 0.9)

# Inputs
X_train, X_val = X_train_full[:split_idx], X_train_full[split_idx:]

# Outputs (We need TWO sets of targets now)
y_train_act, y_val_act = y_train_act_full[:split_idx], y_train_act_full[split_idx:]
y_train_dep, y_val_dep = y_train_dep_full[:split_idx], y_train_dep_full[split_idx:]

print(f"Training samples: {len(X_train)}")
print(f"Validation samples: {len(X_val)}")

# --- 2. SEPARATE INPUTS (Words vs Tags) ---
# X contains [Word_S2, Word_S1, Word_B1, Word_B2, Tag_S2, Tag_S1, Tag_B1, Tag_B2]
num_features_total = X_train.shape[1]
num_word_feats = num_features_total // 2 

X_train_words = X_train[:, :num_word_feats]
X_train_tags  = X_train[:, num_word_feats:]

X_val_words = X_val[:, :num_word_feats]
X_val_tags  = X_val[:, num_word_feats:]

--- STEP 1: Loading Training Data and Vocabularies ---
Training samples: 73063
Validation samples: 8119


In [9]:
# --- 3. DEFINE HYPERPARAMETERS ---
WORD_EMBED_DIM = 32
POS_EMBED_DIM = 10
HIDDEN_UNITS = 100

NUM_WORDS = len(words_vocab) + 1
NUM_TAGS = len(upos_vocab) + 1
NUM_ACTIONS = len(actions_vocab)  # Output 1 size (e.g., 4: SHIFT, REDUCE, LA, RA)
NUM_DEPRELS = len(deprels_vocab)  # Output 2 size (e.g., 44 dependency labels)

print(f"Output 1 (Actions): {NUM_ACTIONS} classes")
print(f"Output 2 (Labels): {NUM_DEPRELS} classes")


# --- 4. BUILD THE MODEL (Multi-Output) ---
print("--- STEP 2: Building Multi-Output Neural Network ---")

# A. Input Layers
input_words = layers.Input(shape=(num_word_feats,), name="input_words")
input_tags  = layers.Input(shape=(num_word_feats,), name="input_tags")

# B. Embedding Layers
embed_words = layers.Embedding(input_dim=NUM_WORDS, output_dim=WORD_EMBED_DIM, name="embed_words")(input_words)
embed_tags  = layers.Embedding(input_dim=NUM_TAGS, output_dim=POS_EMBED_DIM, name="embed_tags")(input_tags)

# C. Flatten & Concatenate
flat_words = layers.Flatten()(embed_words)
flat_tags  = layers.Flatten()(embed_tags)
merged = layers.Concatenate(name="concat_features")([flat_words, flat_tags])

# D. Shared Hidden Layers
# This layer learns features relevant for BOTH tasks (action and label prediction)
hidden = layers.Dense(HIDDEN_UNITS, activation='relu', name="hidden_shared")(merged)
hidden = layers.Dropout(0.2)(hidden)

# E. Output Layers (The Two Heads)
# Head 1: Predicts the transition action (SHIFT, REDUCE, etc.)
output_action = layers.Dense(NUM_ACTIONS, activation='softmax', name="action_output")(hidden)

# Head 2: Predicts the dependency label (nsubj, det, etc.)
output_label = layers.Dense(NUM_DEPRELS, activation='softmax', name="label_output")(hidden)

# Create Model with 2 inputs and 2 outputs
model = models.Model(
    inputs=[input_words, input_tags], 
    outputs=[output_action, output_label], 
    name="ArcEager_MultiOutput_Parser"
)

model.summary()

Output 1 (Actions): 4 classes
Output 2 (Labels): 44 classes
--- STEP 2: Building Multi-Output Neural Network ---


In [10]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping

def build_and_train_parser(
    # Input Data
    X_train_words, X_train_tags, y_train_act, y_train_dep,
    X_val_words, X_val_tags, y_val_act, y_val_dep,
    # Fixed Dimensions (Vocabularies)
    num_words, num_tags, num_actions, num_deprels,
    # Hyperparameters (Variables)
    word_embed_dim=100, # Aumentado el default
    pos_embed_dim=25,   # Aumentado el default
    hidden_units=200,   # Aumentado el default
    learning_rate=0.001,
    dropout_rate=0.3,
    batch_size=64,
    epochs=30,          # Más épocas (el EarlyStopping cortará si es necesario)
    model_name="Parser_Model"
):
    """
    Builds, compiles, and trains a multi-output neural network for dependency parsing.
    NOW IMPROVED WITH A DEEPER ARCHITECTURE.
    """
    
    print(f"\n{'='*60}")
    print(f"TRAINING MODEL: {model_name}")
    print(f"Params: WordEmb={word_embed_dim}, PosEmb={pos_embed_dim}, Hidden={hidden_units}, LR={learning_rate}, Drop={dropout_rate}, Batch={batch_size}")
    print(f"{'='*60}\n")

    # --- 1. Architecture ---
    
    # Input Layers
    input_words = layers.Input(shape=(X_train_words.shape[1],), name="input_words")
    input_tags  = layers.Input(shape=(X_train_tags.shape[1],), name="input_tags")

    # Embedding Layers
    embed_words = layers.Embedding(input_dim=num_words, output_dim=word_embed_dim, name="embed_words")(input_words)
    embed_tags  = layers.Embedding(input_dim=num_tags, output_dim=pos_embed_dim, name="embed_tags")(input_tags)

    # Flattening
    flat_words = layers.Flatten(name="flatten_words")(embed_words)
    flat_tags  = layers.Flatten(name="flatten_tags")(embed_tags)

    # Concatenation
    merged = layers.Concatenate(name="concat_features")([flat_words, flat_tags])

    # --- MEJORA: ARQUITECTURA PROFUNDA (Deep Network) ---
    
    # Hidden Layer 1
    hidden = layers.Dense(hidden_units, activation='relu', name="hidden_1")(merged)
    if dropout_rate > 0:
        hidden = layers.Dropout(dropout_rate, name="dropout_1")(hidden)

    # Hidden Layer 2 (NUEVA)
    # Añadimos una segunda capa para refinar las características aprendidas
    # Usamos la mitad de neuronas que la capa anterior (estructura de embudo)
    hidden_2_units = max(hidden_units // 2, 50) # Aseguramos mínimo 50 neuronas
    hidden = layers.Dense(hidden_2_units, activation='relu', name="hidden_2")(hidden)
    if dropout_rate > 0:
        hidden = layers.Dropout(dropout_rate, name="dropout_2")(hidden)

    # Output Layers (Two Heads)
    output_action = layers.Dense(num_actions, activation='softmax', name="action_output")(hidden)
    output_label = layers.Dense(num_deprels, activation='softmax', name="label_output")(hidden)

    # Create the Model
    model = models.Model(
        inputs=[input_words, input_tags], 
        outputs=[output_action, output_label], 
        name=model_name
    )

    # Print Model Summary
    model.summary()

    # --- 2. Compilation ---
    model.compile(
        optimizer=optimizers.Adam(learning_rate=learning_rate),
        loss={
            "action_output": "sparse_categorical_crossentropy",
            "label_output": "sparse_categorical_crossentropy"
        },
        metrics={
            "action_output": ["accuracy"],
            "label_output": ["accuracy"]
        }
    )

    # --- 3. Callbacks ---
    early_stopping = EarlyStopping(
        monitor='val_action_output_accuracy', 
        mode='max',
        patience=4, # Un poco más de paciencia para modelos grandes
        restore_best_weights=True,
        verbose=1
    )

    # --- 4. Training ---
    print("\nStarting Training...")
    history = model.fit(
        x=[X_train_words, X_train_tags],
        y=[y_train_act, y_train_dep],
        epochs=epochs,
        batch_size=batch_size,
        validation_data=([X_val_words, X_val_tags], [y_val_act, y_val_dep]),
        callbacks=[early_stopping],
        verbose=1
    )
    
    print(f"--- Training Finished for {model_name} ---")
    return model, history

# --- HYPERPARAMETER GRID DEFINITION (MEJORADA) ---
# Configuraciones diseñadas para maximizar el accuracy
hyperparameter_grid = [
    # 1. Una configuración equilibrada pero potente
    {
        "word_embed_dim": 100, "pos_embed_dim": 25, "hidden_units": 256, 
        "learning_rate": 0.001, "batch_size": 64, "dropout_rate": 0.3,
        "model_name": "Balanced_Deep_Model"
    },
    # 2. "La Bestia": Embeddings muy grandes y mucha capacidad (Cuidado con la memoria RAM)
    {
        "word_embed_dim": 200, "pos_embed_dim": 50, "hidden_units": 512, 
        "learning_rate": 0.001, "batch_size": 128, "dropout_rate": 0.4,
        "model_name": "High_Capacity_Model"
    },
    # 3. Ajuste fino: Tasa de aprendizaje más lenta para encontrar el mínimo global mejor
    {
        "word_embed_dim": 100, "pos_embed_dim": 25, "hidden_units": 300, 
        "learning_rate": 0.0005, "batch_size": 32, "dropout_rate": 0.3,
        "model_name": "Fine_Tuned_Slow_Learner"
    },
    # 4. Mucha regularización: Útil si ves que el Training Acc es 99% pero Val Acc es bajo
    {
        "word_embed_dim": 100, "pos_embed_dim": 25, "hidden_units": 256, 
        "learning_rate": 0.001, "batch_size": 64, "dropout_rate": 0.5,
        "model_name": "High_Regularization_Model"
    }
]

# --- EXECUTION LOOP ---

all_histories = {}
best_val_accuracy = 0.0
best_model = None
best_model_name = ""

print(f"Starting Hyperparameter Search over {len(hyperparameter_grid)} models...")

for params in hyperparameter_grid:
    
    # Call the function
    model, history = build_and_train_parser(
        X_train_words, X_train_tags, y_train_act, y_train_dep,
        X_val_words, X_val_tags, y_val_act, y_val_dep,
        NUM_WORDS, NUM_TAGS, NUM_ACTIONS, NUM_DEPRELS,
        word_embed_dim=params["word_embed_dim"],
        pos_embed_dim=params["pos_embed_dim"],
        hidden_units=params["hidden_units"],
        learning_rate=params["learning_rate"],
        dropout_rate=params["dropout_rate"],
        batch_size=params["batch_size"],
        epochs=25, 
        model_name=params["model_name"]
    )
    
    all_histories[params["model_name"]] = history.history
    
    # Evaluate performance based on Action Accuracy
    best_epoch_acc = max(history.history['val_action_output_accuracy'])
    print(f"Result {params['model_name']}: Best Validation Action Accuracy = {best_epoch_acc:.4f}")
    
    if best_epoch_acc > best_val_accuracy:
        print(f" >> New Best Model Found! (Previous best: {best_val_accuracy:.4f})")
        best_val_accuracy = best_epoch_acc
        best_model = model
        best_model_name = params["model_name"]

print(f"\n{'='*60}")
print(f"SEARCH COMPLETE")
print(f"Best Model: '{best_model_name}' with Action Accuracy: {best_val_accuracy:.4f}")
print(f"{'='*60}\n")

# --- SAVE BEST MODEL ---
if best_model:
    save_filename = f"{best_model_name}_best.keras"
    print(f"Saving best model to: {save_filename}")
    best_model.save(save_filename)

Starting Hyperparameter Search over 4 models...

TRAINING MODEL: Balanced_Deep_Model
Params: WordEmb=100, PosEmb=25, Hidden=256, LR=0.001, Drop=0.3, Batch=64




Starting Training...
Epoch 1/25
[1m1142/1142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 8ms/step - action_output_accuracy: 0.8232 - action_output_loss: 0.4558 - label_output_accuracy: 0.7616 - label_output_loss: 0.8474 - loss: 1.3031 - val_action_output_accuracy: 0.8797 - val_action_output_loss: 0.3112 - val_label_output_accuracy: 0.8379 - val_label_output_loss: 0.4611 - val_loss: 0.7726
Epoch 2/25
[1m1142/1142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - action_output_accuracy: 0.9126 - action_output_loss: 0.2362 - label_output_accuracy: 0.8688 - label_output_loss: 0.3997 - loss: 0.6358 - val_action_output_accuracy: 0.8763 - val_action_output_loss: 0.3305 - val_label_output_accuracy: 0.8540 - val_label_output_loss: 0.4250 - val_loss: 0.7559
Epoch 3/25
[1m1142/1142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - action_output_accuracy: 0.9472 - action_output_loss: 0.1481 - label_output_accuracy: 0.9055 - label_output_loss: 0.2789 - l


Starting Training...
Epoch 1/25
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 16ms/step - action_output_accuracy: 0.8263 - action_output_loss: 0.4504 - label_output_accuracy: 0.7690 - label_output_loss: 0.8250 - loss: 1.2756 - val_action_output_accuracy: 0.8827 - val_action_output_loss: 0.3076 - val_label_output_accuracy: 0.8495 - val_label_output_loss: 0.4362 - val_loss: 0.7464
Epoch 2/25
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - action_output_accuracy: 0.9181 - action_output_loss: 0.2267 - label_output_accuracy: 0.8735 - label_output_loss: 0.3807 - loss: 0.6074 - val_action_output_accuracy: 0.8859 - val_action_output_loss: 0.3164 - val_label_output_accuracy: 0.8483 - val_label_output_loss: 0.4281 - val_loss: 0.7473
Epoch 3/25
[1m571/571[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 16ms/step - action_output_accuracy: 0.9518 - action_output_loss: 0.1352 - label_output_accuracy: 0.9150 - label_output_loss: 0.2545 - loss


Starting Training...
Epoch 1/25
[1m2284/2284[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 7ms/step - action_output_accuracy: 0.8225 - action_output_loss: 0.4632 - label_output_accuracy: 0.7614 - label_output_loss: 0.8629 - loss: 1.3262 - val_action_output_accuracy: 0.8770 - val_action_output_loss: 0.3208 - val_label_output_accuracy: 0.8364 - val_label_output_loss: 0.4699 - val_loss: 0.7913
Epoch 2/25
[1m2284/2284[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - action_output_accuracy: 0.9081 - action_output_loss: 0.2511 - label_output_accuracy: 0.8625 - label_output_loss: 0.4237 - loss: 0.6748 - val_action_output_accuracy: 0.8802 - val_action_output_loss: 0.3230 - val_label_output_accuracy: 0.8517 - val_label_output_loss: 0.4282 - val_loss: 0.7518
Epoch 3/25
[1m2284/2284[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 7ms/step - action_output_accuracy: 0.9429 - action_output_loss: 0.1592 - label_output_accuracy: 0.9019 - label_output_loss: 0.2981 -


Starting Training...
Epoch 1/25
[1m1142/1142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 9ms/step - action_output_accuracy: 0.8057 - action_output_loss: 0.5140 - label_output_accuracy: 0.7247 - label_output_loss: 0.9961 - loss: 1.5103 - val_action_output_accuracy: 0.8706 - val_action_output_loss: 0.3379 - val_label_output_accuracy: 0.8303 - val_label_output_loss: 0.5088 - val_loss: 0.8470
Epoch 2/25
[1m1142/1142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - action_output_accuracy: 0.8927 - action_output_loss: 0.2956 - label_output_accuracy: 0.8391 - label_output_loss: 0.5119 - loss: 0.8074 - val_action_output_accuracy: 0.8768 - val_action_output_loss: 0.3294 - val_label_output_accuracy: 0.8423 - val_label_output_loss: 0.4506 - val_loss: 0.7803
Epoch 3/25
[1m1142/1142[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 8ms/step - action_output_accuracy: 0.9265 - action_output_loss: 0.2088 - label_output_accuracy: 0.8756 - label_output_loss: 0.3873 - l

## 5. Inference (Decoding) and Evaluation

Una vez tenemos nuestro modelo entrenado (y guardado como .keras), llega el momento de usarlo para procesar datos nuevos (Test Set) y construir los árboles sintácticos.

Para hacer esto de manera eficiente, implementaremos una estrategia de Decodificación Vertical (Vertical Decoding). En lugar de procesar una oración completa hasta el final antes de pasar a la siguiente (Horizontal), procesamos un lote (batch) de oraciones simultáneamente.

1. Extraemos el estado actual de todas las oraciones activas en el lote.

2. La red neuronal predice la siguiente acción para todo el lote en una sola operación matricial (mucho más rápido para la GPU/CPU).

3. Aplicamos las transiciones correspondientes a cada oración.

4. Si una oración termina, sale del lote activo.
    
5. Repetimos hasta que todas las oraciones del lote hayan terminado.

In [11]:
import os
import pickle
import numpy as np
import tensorflow as tf
from conllu_reader import ConlluReader
from algorithm import ArcEager, Transition
from postprocessor import PostProcessor

# --- 1. CARGA DE RECURSOS ---
print("--- STEP 1: Loading Resources for Inference ---")

# 1. Cargar datos de TEST (No los usamos para entrenar, solo para evaluar)
reader = ConlluReader()
# Asegúrate de que el nombre del archivo sea correcto
test_sentences = reader.read_conllu_file("en_partut-ud-test_clean.conllu") 
print(f"Loaded {len(test_sentences)} test sentences.")

# 2. Cargar Vocabularios (Deben ser los MISMOS que en el entrenamiento)
with open("vocabs.pkl", "rb") as f:
    words_vocab, upos_vocab, actions_vocab, deprels_vocab = pickle.load(f)

# Invertir vocabularios para decodificar (Necesitamos pasar de ID -> Texto)
id_to_action = {v: k for k, v in actions_vocab.items()}
id_to_deprel = {v: k for k, v in deprels_vocab.items()}

# 3. Cargar el Mejor Modelo Entrenado
# Usamos el nombre del archivo que guardaste en el paso anterior
model_path = "High_Capacity_Model_best.keras"  # O "Advanced_Parser_Best.keras" si usaste el último código
if not os.path.exists(model_path):
    # Fallback por si el nombre es diferente
    model_path = "Parser_Model.keras" 

print(f"Loading model from: {model_path}")
model = tf.keras.models.load_model(model_path)
print("Model loaded successfully.")

--- STEP 1: Loading Resources for Inference ---
Loaded 153 test sentences.
Loading model from: High_Capacity_Model_best.keras
Model loaded successfully.


### Funciones Auxiliares para Inferencia

Necesitamos una función que tome una lista de objetos State y los convierta en las matrices numéricas X_words y X_tags que espera el modelo.

Debemos usar el mismo tamaño de ventana (nbuffer_feats y nstack_feats) que usamos durante el entrenamiento

In [12]:
def get_features_from_states(states, words_vocab, upos_vocab, nbuffer=2, nstack=2):
    """
    Extrae características de un lote de estados activos y las vectoriza.
    """
    batch_words = []
    batch_tags = []
    
    # Usamos un Sample dummy para acceder a la lógica de extracción ya implementada
    from algorithm import Sample
    
    for state in states:
        # Creamos un sample temporal (sin transición, porque no la sabemos aún)
        dummy_sample = Sample(state, None) 
        
        # Extraemos features (strings)
        feats = dummy_sample.state_to_feats(nbuffer_feats=nbuffer, nstack_feats=nstack)
        
        # Separar palabras y tags (la primera mitad son palabras, la segunda tags)
        num_words = len(feats) // 2
        words = feats[:num_words]
        upos = feats[num_words:]
        
        # Vectorización: Convertir strings a IDs usando los vocabularios cargados
        # Usamos .get(x, UNK) para manejar palabras desconocidas
        word_ids = [words_vocab.get(w, words_vocab['<UNK>']) for w in words]
        tag_ids = [upos_vocab.get(t, upos_vocab['<UNK>']) for t in upos]
        
        batch_words.append(word_ids)
        batch_tags.append(tag_ids)
        
    return np.array(batch_words), np.array(batch_tags)

### Ejecución del Bucle Vertical

A continuación, procesamos todas las oraciones del test set. Para no saturar la memoria, procesamos en lotes grandes (por ejemplo, 256 oraciones a la vez).

Dentro del bucle, implementamos una lógica de seguridad: la red neuronal devuelve probabilidades para todas las acciones. Nosotros ordenamos esas acciones de mayor a menor probabilidad y elegimos la primera que sea válida según las reglas del Arc-Eager (función is_valid). Esto evita que el parser se rompa intentando hacer un LEFT-ARC cuando no debe.

In [13]:
print("\n--- STEP 2: Running Vertical Decoding ---")

arc_eager = ArcEager()
final_trees = [] # Aquí guardaremos las oraciones procesadas

# Configuración
BATCH_SIZE = 256 # Número de oraciones a procesar en paralelo
# IMPORTANTE: Ajusta esto al valor que usaste en el entrenamiento (2, 3, o 4)
WINDOW_SIZE = 2 

idx = 0
total_sents = len(test_sentences)

while idx < total_sents:
    # Barra de progreso simple
    end_idx = min(idx + BATCH_SIZE, total_sents)
    print(f"Processing sentences {idx} to {end_idx}...", end="\r")
    
    # 1. Preparar el lote inicial
    batch_sentences = test_sentences[idx : end_idx]
    active_states = []
    
    # Inicializamos los estados y limpiamos predicciones anteriores
    for sent in batch_sentences:
        for token in sent:
            token.head = 0 # Reiniciamos a root por defecto
            token.dep = "root" # Ojo con el formato string
        
        state = arc_eager.create_initial_state(sent)
        active_states.append(state)
    
    # 2. Bucle hasta que todos los estados de este lote terminen
    while active_states:
        
        # A. Extraer Features del lote actual
        X_words, X_tags = get_features_from_states(
            active_states, words_vocab, upos_vocab, 
            nbuffer=WINDOW_SIZE, nstack=WINDOW_SIZE
        )
        
        # B. Predicción de la Red (Batch prediction)
        # El modelo devuelve una lista: [probabilidades_acciones, probabilidades_etiquetas]
        preds = model.predict([X_words, X_tags], verbose=0)
        pred_actions_probs = preds[0] 
        pred_labels_probs = preds[1] 
        
        # C. Aplicar transiciones y filtrar terminados
        next_active_states = []
        
        for i, state in enumerate(active_states):
            # Recuperamos probabilidades para este estado específico
            act_probs = pred_actions_probs[i]
            lbl_probs = pred_labels_probs[i]
            
            # 1. Decidir la Acción: Ordenamos de mayor a menor probabilidad
            best_act_indices = np.argsort(act_probs)[::-1]
            
            # 2. Decidir la Etiqueta: Tomamos la más probable (argmax)
            best_label_id = np.argmax(lbl_probs)
            pred_label = id_to_deprel[best_label_id]
            
            # 3. Buscar la mejor acción VÁLIDA
            transition_applied = False
            
            for act_id in best_act_indices:
                act_name = id_to_action[act_id]
                
                # Comprobar precondiciones (Reglas del juego)
                is_valid = False
                if act_name == "SHIFT" and len(state.B) > 0:
                     is_valid = True
                elif act_name == "LEFT-ARC" and arc_eager.LA_is_valid(state):
                    is_valid = True
                elif act_name == "RIGHT-ARC" and arc_eager.RA_is_valid(state):
                    is_valid = True
                elif act_name == "REDUCE" and arc_eager.REDUCE_is_valid(state):
                    is_valid = True
                
                if is_valid:
                    # Crear transición y aplicar
                    transition = Transition(act_name, pred_label)
                    arc_eager.apply_transition(state, transition)
                    transition_applied = True
                    break 
            
            if not transition_applied:
                # Fallback de emergencia: Si nada es válido, forzar SHIFT o terminar
                if len(state.B) > 0:
                    arc_eager.apply_transition(state, Transition("SHIFT"))
                else:
                    # Estado atascado (no debería ocurrir), lo forzamos a terminar
                    pass

            # D. Verificar si la oración ha terminado
            if not arc_eager.final_state(state):
                next_active_states.append(state)
            else:
                # --- RECONSTRUCCIÓN DEL ÁRBOL ---
                # El estado final contiene los arcos en state.A
                # Volcamos esa información en los tokens originales
                arcs = state.A
                # state.S y state.B contienen referencias a los tokens originales en 'batch_sentences'
                # El token 0 es ROOT, los demás son 1..N
                
                # Mapa rápido para acceder a los tokens de esta oración por ID
                # (Necesario porque state.A usa IDs enteros)
                # Recopilamos todos los tokens que pasaron por el stack/buffer
                # Como 'sent' es la lista original, usamos esa.
                # Ojo: hay que buscar qué 'sent' corresponde a este 'state'. 
                # Pero como iteramos en orden, podemos usar un mapeo o simplemente confiar
                # en que los tokens dentro de state.S/B son objetos únicos en memoria.
                
                # Manera más segura: Iterar sobre los arcos y asignar al token correspondiente
                # Como tenemos el objeto state, podemos intentar recuperar la frase original si la guardamos,
                # pero los tokens en state.A son solo IDs.
                # SOLUCIÓN: Los objetos Token en state.S son los mismos que en batch_sentences.
                # Podemos iterar sobre la oración original en batch_sentences que corresponde a este índice 'i'
                # PERO 'i' cambia al filtrar la lista.
                # MEJOR APROXIMACIÓN: El objeto 'state' contiene tokens. Usamos el token.id para asignar.
                # Pero state.A tiene IDs, no objetos.
                
                # Truco: Al inicio del bucle grande, guardamos la tupla (state, sentence).
                # Como aquí no lo hice, iteramos sobre batch_sentences para buscar la que coincide? No, ineficiente.
                
                # CORRECCIÓN EN VIVO: La lista `active_states` pierde la sincronía con `batch_sentences`.
                # Sin embargo, los tokens son objetos mutables. Si actualizamos un token, se actualiza en la lista final.
                # Necesitamos encontrar el objeto token que corresponde al ID 'dependent_id'.
                
                # Vamos a asumir que podemos acceder a los tokens desde el estado inicial o
                # simplemente reconstruir al final.
                # Para simplificar en este notebook, usaremos un método de "fuerza bruta" local:
                # Los tokens en el stack/buffer son la referencia. Pero al final el buffer está vacío.
                # La mejor forma es guardar la referencia a la lista de tokens dentro del estado o en una tupla.
                pass 

        active_states = next_active_states
    
    # --- ASIGNACIÓN FINAL DE ARCOS ---
    # Como el procesamiento vertical complica saber qué estado es qué oración al final,
    # haremos la asignación de arcos JUSTO ANTES de eliminar el estado de active_states?
    # No, lo más limpio es modificar el bucle anterior ligeramente.
    
    # *Corrección lógica para el notebook*:
    # En lugar de 'active_states = [state]', usaremos 'active_states = [(state, sentence_tokens)]'
    # Re-ejecuta el bucle de arriba con esta modificación mental o usa el siguiente bloque corregido:
    
    idx += BATCH_SIZE

print("\nInference completed.")


--- STEP 2: Running Vertical Decoding ---
Processing sentences 0 to 153...
Inference completed.


### Corrección del Bucle de Inferencia (Manejo de Referencias)

Para asegurarnos de asignar los padres a la oración correcta durante el procesamiento por lotes, usamos tuplas (state, sentence) en la lista activa.

In [15]:
# --- BUCLE DE INFERENCIA CORREGIDO Y COMPLETO ---
final_trees = []
idx = 0

while idx < total_sents:
    end_idx = min(idx + BATCH_SIZE, total_sents)
    print(f"Processing sentences {idx} to {end_idx}...", end="\r")
    
    batch_sentences = test_sentences[idx : end_idx]
    
    # Guardamos pares (Estado, ListaDeTokensOriginal)
    active_pairs = [] 
    
    for sent in batch_sentences:
        # Limpieza inicial
        for token in sent:
            token.head = "_" 
            token.dep = "_"
        state = arc_eager.create_initial_state(sent)
        active_pairs.append((state, sent))
    
    while active_pairs:
        # Desempaquetar solo estados para feature extraction
        current_states = [p[0] for p in active_pairs]
        
        X_words, X_tags = get_features_from_states(
            current_states, words_vocab, upos_vocab, 
            nbuffer=WINDOW_SIZE, nstack=WINDOW_SIZE
        )
        
        preds = model.predict([X_words, X_tags], verbose=0)
        pred_actions = preds[0]
        pred_labels = preds[1]
        
        next_active_pairs = []
        
        for i, (state, sent_tokens) in enumerate(active_pairs):
            # ... (Lógica de selección de mejor acción igual que antes) ...
            act_probs = pred_actions[i]
            lbl_probs = pred_labels[i]
            best_act_indices = np.argsort(act_probs)[::-1]
            best_label_id = np.argmax(lbl_probs)
            pred_label = id_to_deprel[best_label_id]
            
            transition_applied = False
            for act_id in best_act_indices:
                act_name = id_to_action[act_id]
                is_valid = False
                # Chequeos de validez
                if act_name == "SHIFT" and len(state.B) > 0: is_valid = True
                elif act_name == "LEFT-ARC" and arc_eager.LA_is_valid(state): is_valid = True
                elif act_name == "RIGHT-ARC" and arc_eager.RA_is_valid(state): is_valid = True
                elif act_name == "REDUCE" and arc_eager.REDUCE_is_valid(state): is_valid = True
                
                if is_valid:
                    arc_eager.apply_transition(state, Transition(act_name, pred_label))
                    transition_applied = True
                    break
            
            if not transition_applied and len(state.B) > 0:
                arc_eager.apply_transition(state, Transition("SHIFT"))

            # Chequeo de estado final
            if not arc_eager.final_state(state):
                next_active_pairs.append((state, sent_tokens))
            else:
                # --- VOLCADO DE RESULTADOS ---
                # Ahora sí tenemos acceso a 'sent_tokens' garantizado
                generated_arcs = state.A # set de (head, label, dep)
                
                for (head_id, label, dep_id) in generated_arcs:
                    # dep_id corresponde al índice en la lista sent_tokens?
                    # Generalmente Token.id empieza en 1. sent_tokens[0] es ROOT(id=0).
                    # Entonces sent_tokens[dep_id] es el token correcto.
                    if dep_id < len(sent_tokens):
                        sent_tokens[dep_id].head = head_id
                        sent_tokens[dep_id].dep = label
                        
        active_pairs = next_active_pairs
        
    final_trees.extend(batch_sentences)
    idx += BATCH_SIZE

print("\nDecoding finished.")

Processing sentences 0 to 153...
Decoding finished.


## 6. Post-processing and Saving

Los árboles generados por un parser "greedy" (codicioso) como este pueden tener pequeños defectos estructurales, como tener múltiples raíces o nodos sin padre. Usamos el PostProcessor para aplicar reglas heurísticas y arreglar estos árboles antes de guardar.

In [16]:
print("--- STEP 3: Post-processing and Saving ---")

output_filename = "system_prediction.conllu"

# 1. Guardar la predicción cruda
reader.write_conllu_file(output_filename, final_trees)

# 2. Aplicar correcciones (Single Root, Connected Tree)
postprocessor = PostProcessor()
corrected_trees = postprocessor.postprocess(output_filename)

# 3. Sobreescribir con la versión corregida
reader.write_conllu_file(output_filename, corrected_trees)

print(f"Final predictions saved to: {output_filename}")

--- STEP 3: Post-processing and Saving ---
Final predictions saved to: system_prediction.conllu


## 7. Official Evaluation

Finalmente, usamos el script conll18_ud_eval.py para comparar nuestra predicción (system_prediction.conllu) contra el archivo de referencia (en_partut-ud-test_clean.conllu).

Las métricas principales son:

    UAS (Unlabeled Attachment Score): % de padres correctos.

    LAS (Labeled Attachment Score): % de padres Y etiquetas correctos (La métrica más importante).

In [17]:
print("--- STEP 4: Official Evaluation ---")

# Definir archivos
gold_file = "en_partut-ud-test_clean.conllu"
system_file = "system_prediction.conllu"

# Ejecutar el script de evaluación
# Usamos os.system para simular la ejecución en terminal
import os
command = f"python conll18_ud_eval.py {gold_file} {system_file} -v"

print(f"Running command: {command}\n")
os.system(command)

--- STEP 4: Official Evaluation ---
Running command: python conll18_ud_eval.py en_partut-ud-test_clean.conllu system_prediction.conllu -v

Metric     | Precision |    Recall |  F1 Score | AligndAcc
-----------+-----------+-----------+-----------+-----------
Tokens     |    100.00 |    100.00 |    100.00 |
Sentences  |    100.00 |    100.00 |    100.00 |
Words      |    100.00 |    100.00 |    100.00 |
UPOS       |    100.00 |    100.00 |    100.00 |    100.00
XPOS       |    100.00 |    100.00 |    100.00 |    100.00
UFeats     |    100.00 |    100.00 |    100.00 |    100.00
AllTags    |    100.00 |    100.00 |    100.00 |    100.00
Lemmas     |    100.00 |    100.00 |    100.00 |    100.00
UAS        |     78.52 |     78.52 |     78.52 |     78.52
LAS        |     69.01 |     69.01 |     69.01 |     69.01
CLAS       |     57.82 |     54.58 |     56.15 |     54.58
MLAS       |     56.51 |     53.34 |     54.88 |     53.34
BLEX       |     57.82 |     54.58 |     56.15 |     54.58


0