In [None]:
from baseline_transformer_LEM_conv import *
import copy

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())  # Renvoie True si un GPU est disponible

batch_size=16
epochs=300

In [None]:
# Load data using the load_data_1 function
sentences, lemmas = load_data("UD_French-Sequoia/fr_sequoia-ud-train.conllu")

# Create character and tag mappings
char_counts = Counter(char for sentence in sentences for word in sentence for char in word)
char_to_ix = {char: i for i, char in enumerate(char_counts, start=2)}
char_to_ix['<PAD>'], char_to_ix['<UNK>'] = 0, 1  # Padding and unknown character

lemma_counts = Counter(lemma for lemma_list in lemmas for lemma in lemma_list)
lemma_to_ix = {lemma: i+1 for i, lemma in enumerate(lemma_counts)}  #+1 pour le padding
lemma_to_ix['<PAD>'] = 0

max_word_len = max(len(word) for sentence in sentences for word in sentence)

# Now load the data in the desired format using the load_data function
train_sentences, train_lem = load_data_preprocess("UD_French-Sequoia/fr_sequoia-ud-train.conllu", char_to_ix, max_word_len)
validation_sentences, validation_lem = load_data_preprocess("UD_French-Sequoia/fr_sequoia-ud-dev.conllu", char_to_ix, max_word_len)

# Rest of your code for Dataset, DataLoader, Model initialization, etc.

# Dataset and DataLoader
dataset = LEMDataset(train_sentences, train_lem, lemma_to_ix, max_word_len,char_to_ix)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

validation_dataset = LEMDataset(validation_sentences, validation_lem, lemma_to_ix, max_word_len,char_to_ix)
validation_data_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

In [None]:
# Model initialization
num_chars = len(char_to_ix)
char_embedding_dim = 512
num_filters = 1024
kernel_size = 6
nhead = 2
nhid = 1024
nlayers = 1
tagset_size = len(lemma_to_ix)

model = LEMTransformerModel(num_chars, char_embedding_dim, num_filters, kernel_size, nhead, nhid, nlayers, tagset_size)

# Loss and Optimizer
loss_function = nn.CrossEntropyLoss(ignore_index=-1)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)


<h2> Training

In [None]:
patience = 20  # Nombre d'époques à attendre après la dernière amélioration de la loss de validation
best_val_accuracy = 0
epochs_no_improve = 0

#Training
for epoch in range(epochs): 
    model.train()
    model.to(device)  # Déplacer le modèle sur le GPU si disponible
    total_loss = 0
    for sentence_in, targets in data_loader:
        sentence_in, targets = sentence_in.to(device), targets.to(device)  # Déplacer les données sur le périphérique
        optimizer.zero_grad()
        tag_scores = model(sentence_in)
        loss = loss_function(tag_scores.view(-1, len(lemma_to_ix)), targets.view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    # Utiliser la fonction modifiée pour évaluer la validation loss et l'accuracy
    val_loss, val_accuracy, f1 = evaluate_model(model, validation_data_loader, loss_function,device, lemma_to_ix)
    print(f"Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}, f1_score: {f1:.4f}")
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        best_model = copy.deepcopy(model)
        print(f"best model accuracy: {best_val_accuracy:.4f}")
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1

    # Arrêt précoce si aucune amélioration
    if epochs_no_improve == patience:
        print("Arrêt précoce : La loss de validation ne s'améliore plus")
        break
    print(f"Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}")

In [None]:
loss, accuracy, f1  = evaluate_model(best_model, data_loader, loss_function, device, lemma_to_ix)
print(f"Train Accuracy : {accuracy:.4f}")
print(f"Train loss : {loss:.4f}")
print(f"Train F1 score : {f1:.4f}")

<h2> Testing

In [None]:
sentences_test, lem_test = load_data_preprocess("UD_French-Sequoia/fr_sequoia-ud-test.conllu", char_to_ix, max_word_len)

In [None]:
dataset_test = LEMDataset(sentences_test, lem_test, lemma_to_ix, max_word_len,char_to_ix)
data_loader_test = DataLoader(dataset_test, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

In [None]:
loss, accuracy, f1 = evaluate_model(model, data_loader_test, loss_function, device, lemma_to_ix)
print(f"Test Accuracy base model: {accuracy:.4f}")
print(f"Test loss base model: {loss:.4f}")
print(f"Test F1 score base model: {f1:.4f}")

In [None]:
loss, accuracy, f1 = evaluate_model(best_model, data_loader_test, loss_function, device, lemma_to_ix)
print(f"Test Accuracy best model: {accuracy:.4f}")
print(f"Test loss best model: {loss:.4f}")
print(f"Test F1 score best model: {f1:.4f}")