In [11]:
from baseline_transformer_POS_conv import *
import copy
import time

In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(torch.cuda.is_available())  # Renvoie True si un GPU est disponible

batch_size=16
epochs=100

True


In [13]:
# Load data using the load_data_1 function
sentences, pos_tags = load_data_1("UD_French-Sequoia/fr_sequoia-ud-train.conllu")

# Create character and tag mappings
char_counts = Counter(char for sentence in sentences for word in sentence for char in word)
char_to_ix = {char: i for i, char in enumerate(char_counts, start=2)}
char_to_ix['<PAD>'], char_to_ix['<UNK>'] = 0, 1  # Padding and unknown character

tag_counts = Counter(tag for tags in pos_tags for tag in tags)
tag_to_ix = {tag: i for i, tag in enumerate(tag_counts)}

max_word_len = max(len(word) for sentence in sentences for word in sentence)

# Now load the data in the desired format using the load_data function
train_sentences, train_pos_tags = load_data("UD_French-Sequoia/fr_sequoia-ud-train.conllu", char_to_ix, max_word_len)
validation_sentences, validation_pos_tags = load_data("UD_French-Sequoia/fr_sequoia-ud-dev.conllu", char_to_ix, max_word_len)

# Rest of your code for Dataset, DataLoader, Model initialization, etc.

# Dataset and DataLoader
dataset = POSDataset(train_sentences, train_pos_tags, tag_to_ix, max_word_len,char_to_ix)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

validation_dataset = POSDataset(validation_sentences, validation_pos_tags, tag_to_ix, max_word_len,char_to_ix)
validation_data_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)


test_sentences, test_pos_tags = load_data("UD_French-Sequoia/fr_sequoia-ud-test.conllu", char_to_ix, max_word_len)


test_dataset = POSDataset(test_sentences, test_pos_tags, tag_to_ix, max_word_len,char_to_ix)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)



In [14]:

# Model initialization
num_chars = len(char_to_ix)
char_embedding_dim = 512
num_filters = 256
kernel_size = 3
nhead = 4
nhid = 512
nlayers = 3
tagset_size = len(tag_to_ix)

char_embedding_dim_tab = [512, 256, 128]
num_filters_tab = [256, 128]
kernel_size_tab = [2,3,4]
nhead_tab = [4]
nhid_tab = [512, 256, 128]
nlayers_tab = [1,2,3, 4, 5]



In [15]:
patience = 2
epochs=50
results = []
for char_embedding_dim in char_embedding_dim_tab:
    for num_filters in num_filters_tab:
        for kernel_size in kernel_size_tab:
            for nhead in nhead_tab:
                for nhid in nhid_tab:
                    for nlayers in nlayers_tab:
                        start_time = time.time()
                        best_val_accuracy = 0
                        epochs_no_improve = 0
                        model = POSTransformerModel(num_chars, char_embedding_dim, num_filters, kernel_size, nhead, nhid, nlayers, tagset_size)
                        loss_function = nn.CrossEntropyLoss(ignore_index=-1)
                        optimizer = optim.SGD(model.parameters(), lr=0.01)
                        #Training
                        for epoch in range(epochs): 
                            model.train()
                            model.to(device)  # Déplacer le modèle sur le GPU si disponible
                            total_loss = 0
                            for sentence_in, targets in data_loader:
                                sentence_in, targets = sentence_in.to(device), targets.to(device)  # Déplacer les données sur le périphérique
                                optimizer.zero_grad()
                                tag_scores = model(sentence_in)
                                loss = loss_function(tag_scores.view(-1, len(tag_to_ix)), targets.view(-1))
                                loss.backward()
                                optimizer.step()
                                total_loss += loss.item()

                            # Utiliser la fonction modifiée pour évaluer la validation loss et l'accuracy
                            val_loss, val_accuracy = evaluate_model(model, validation_data_loader, loss_function,device, tag_to_ix)
                            print(f"Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.4f}")
                            if val_accuracy > best_val_accuracy:
                                best_val_accuracy = val_accuracy
                                best_model = copy.deepcopy(model)
                                print(f"best model accuracy: {best_val_accuracy:.4f}")
                                epochs_no_improve = 0
                            else:
                                epochs_no_improve += 1

                            # Arrêt précoce si aucune amélioration
                            if epochs_no_improve == patience:
                                print("Arrêt précoce : La loss de validation ne s'améliore plus")
                                break
                            print(f"Epoch {epoch+1}, Loss: {total_loss / len(data_loader)}")
                        end_time = time.time()
                        elapsed_time = end_time - start_time
                        final_epoch = epoch + 1  # ou epoch si vous utilisez break dans la boucle
                        loss, accuracy = evaluate_model(best_model, test_data_loader, loss_function, device, tag_to_ix)
                        print(f"Test Accuracy : {accuracy:.4f}")

                        # Stocker les résultats
                        result = {
                            'char_embedding_dim': char_embedding_dim,
                            'num_filters': num_filters,
                            'kernel_size': kernel_size,
                            'nhead': nhead,
                            'nhid': nhid,
                            'nlayers': nlayers,
                            'best_val_accuracy': best_val_accuracy,
                            'final_epoch': final_epoch,
                            'elapsed_time': elapsed_time,
                            'test_accuracy': accuracy
                        }
                        results.append(result)
                        



Epoch 1, Validation Loss: 0.9867, Validation Accuracy: 0.6801
best model accuracy: 0.6801
Epoch 1, Loss: 1.5117519634110588
Epoch 2, Validation Loss: 0.7947, Validation Accuracy: 0.7355
best model accuracy: 0.7355
Epoch 2, Loss: 0.9995766976049968
Epoch 3, Validation Loss: 0.7221, Validation Accuracy: 0.7535
best model accuracy: 0.7535
Epoch 3, Loss: 0.8602390847035817
Epoch 4, Validation Loss: 0.6617, Validation Accuracy: 0.7720
best model accuracy: 0.7720
Epoch 4, Loss: 0.7863857235227313
Epoch 5, Validation Loss: 0.6227, Validation Accuracy: 0.7866
best model accuracy: 0.7866
Epoch 5, Loss: 0.7353454798460006
Epoch 6, Validation Loss: 0.5863, Validation Accuracy: 0.8011
best model accuracy: 0.8011
Epoch 6, Loss: 0.6944177363600049
Epoch 7, Validation Loss: 0.5606, Validation Accuracy: 0.8113
best model accuracy: 0.8113
Epoch 7, Loss: 0.6654419977750097
Epoch 8, Validation Loss: 0.5409, Validation Accuracy: 0.8192
best model accuracy: 0.8192
Epoch 8, Loss: 0.6358740908758981
Epoch 9,

KeyboardInterrupt: 