<h1> Pred Morph using LSTM

In [None]:
from baseline_LSTM_MORPH import *
import copy

In [None]:
# Load data
sentences, morphs = load_data("UD_French-Sequoia/fr_sequoia-ud-train.conllu")

In [None]:
# Build vocabulary
word_counts = Counter(word for sentence in sentences for word in sentence)
word_to_ix = {word: i + 1 for i, word in enumerate(word_counts)}  # +1 for padding
word_to_ix['<PAD>'] = 0

morph_counts = set(trait for trait_list in morphs for trait in trait_list if trait != '_')
morph_to_ix = {trait: i+1 for i, trait in enumerate(morph_counts)}
morph_to_ix['_'] = 0

In [None]:
# Model parameters
embedding_dim = 64
hidden_dim = 128
morphs_size = len(morph_to_ix)
epochs = 50
batch_size = 16

In [None]:
# Create dataset and data loader
dataset = MORPHDataset(sentences, morphs, word_to_ix, morph_to_ix)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

In [None]:
model = MORPH_Model(len(word_to_ix), embedding_dim, hidden_dim, len(morph_to_ix))
loss_function = nn.CrossEntropyLoss(ignore_index=-1)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

<h2> Training model

In [None]:
#Entrainement du modèle
for epoch in range(epochs):
    total_loss = 0
    for batch in data_loader:
        sentence_batch, morphs_batch = batch
        model.zero_grad()
        morph_scores = model(sentence_batch)
        loss = loss_function(morph_scores.view(-1, len(morph_to_ix)), morphs_batch.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}")

In [None]:
loss, accuracy, f1 = evaluate_model(model, data_loader, loss_function, morph_to_ix)
print(f"Train Accuracy : {accuracy:.4f}")
print(f"Train loss : {loss:.4f}")
print(f"Train F1 score : {f1:.4f}")

<h3> Testing model

In [None]:
sentences_test, morphs_test = load_data("UD_French-Sequoia/fr_sequoia-ud-test.conllu")

In [None]:
dataset_test = MORPHDataset(sentences_test, morphs_test, word_to_ix, morph_to_ix)
data_loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

In [None]:
loss, accuracy, f1 = evaluate_model(model, data_loader_test, loss_function, morph_to_ix)
print(f"Test Accuracy : {accuracy:.4f}")
print(f"Test loss : {loss:.4f}")
print(f"Test F1 score : {f1:.4f}")