In [2]:
%cd D:\Project\Toolkit_for_Preprocessing_MXH\ViHOS_chunking

from IPython.display import clear_output
clear_output()

In [2]:
import tensorflow as tf
import torch

tf.keras.backend.clear_session()
# clear gpu memory using torch
torch.cuda.empty_cache()
# clear output
clear_output()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
train_path = (r"Data\Chunking_data\train.csv")
dev_path = (r"Data\Chunking_data\dev.csv")
test_path = (r"Data\Chunking_data\test.csv")
test_index = 50 # default None value
batch_size = 64
max_len = 64
shuffle = False

In [4]:
from transformers import (
    XLMRobertaModel,
    AutoTokenizer
)

input_model = XLMRobertaModel.from_pretrained("vinai/phobert-base")
tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
input_model.resize_token_embeddings(len(tokenizer))
clear_output()

In [5]:
from Code.Dataset import split_path, create_dataloader

if test_index != None and test_index > 3:
    # Load the data
    train_path, dev_path, test_path = split_path(test_path, test_index, train_path, dev_path, test_path)
elif test_index != None: 
    print("Test index out of range. Please provide a valid interger index greater than 3.")


Training set: 46 samples
Test set: 11 samples
Development set: 15 samples


In [6]:
train_dataloader = create_dataloader(train_path, batch_size=batch_size, tokenizer = tokenizer, max_len=max_len, shuffle=False)
dev_dataloader = create_dataloader(dev_path, batch_size=batch_size, tokenizer = tokenizer, max_len=max_len, shuffle=False)
test_dataloader = create_dataloader(test_path, batch_size=batch_size, tokenizer = tokenizer, max_len=max_len, shuffle=False)

Columns: Index(['index', 'Chunk', 'Tag', 'sentence_id', 'Spans'], dtype='object')
Columns: Index(['index', 'Chunk', 'Tag', 'sentence_id', 'Spans'], dtype='object')
Columns: Index(['index', 'Chunk', 'Tag', 'sentence_id', 'Spans'], dtype='object')


In [7]:
from Code.Model import setup_model, MultiTaskModel, train, test

# Set up the model and training components
model, criterion_span, optimizer_spans, device, num_epochs = setup_model(
    input_model=input_model,
    model_class=MultiTaskModel,
    lr=5e-6,
    weight_decay=1e-5,
    num_epochs=2
)


In [8]:
train(
    model=model,
    train_dataloader=train_dataloader,
    dev_dataloader=dev_dataloader,
    criterion_span=criterion_span,
    optimizer_spans=optimizer_spans,
    device=device,
    num_epochs=num_epochs
)

Epoch: 1


                                                         

Training Loss: 0.7193
Validation Loss: 0.7762
Span Macro F1-Score: 0.2103
Epoch: 2


                                                         

Training Loss: 0.6946
Validation Loss: 0.7561
Span Macro F1-Score: 0.2103




In [9]:
# Now you can call your train and test functions with the returned objects


# Testing the model after training
span_preds, span_targets = test(
    model=model,
    test_dataloader=test_dataloader,
    device=device
)


Testing: 100%|██████████| 1/1 [00:00<00:00,  4.74it/s]

Span F1 Score: 0.3333





In [17]:
import json
test_results = {
    "predictions": span_preds.tolist(),
    "targets": span_targets.tolist()
}
with open('result.json', 'w') as f:
    json.dump(test_results, f, indent=4)
    # print(f"Test results saved to {args.output_json}")

    # Save the trained model
    model_save_path ="trained_model.pth"
    torch.save(model.state_dict(), model_save_path)
    print(f"Model saved to {model_save_path}")

Model saved to trained_model.pth


In [3]:
!python main.py --train_path "Data\Chunking_data\train.csv" --dev_path "Data\Chunking_data\dev.csv" --test_path "Data\Chunking_data\test.csv" --batch_size 64 --max_len 128 --lr 5e-6 --num_epochs 2 --output_json "test_results.json" --output_dir "output" --test_index 50

Using device: cuda
Training set: 46 samples
Test set: 11 samples
Development set: 15 samples
Columns: Index(['index', 'Chunk', 'Tag', 'sentence_id', 'Spans'], dtype='object')
Columns: Index(['index', 'Chunk', 'Tag', 'sentence_id', 'Spans'], dtype='object')
Columns: Index(['index', 'Chunk', 'Tag', 'sentence_id', 'Spans'], dtype='object')
Epoch: 1
Training Loss: 0.6914
Validation Loss: 0.8323
Span Macro F1-Score: 0.1765
Epoch: 2
Training Loss: 0.7274
Validation Loss: 0.8061
Span Macro F1-Score: 0.1765
Span F1 Score: 0.3750
Test results saved to test_results.json
Model saved to output\trained_model.pth


You are using a model of type roberta to instantiate a model of type xlm-roberta. This is not supported for all configurations of models and can yield errors.
  return self.fget.__get__(instance, owner)()

Training:   0%|          | 0/1 [00:00<?, ?it/s]
Training: 100%|██████████| 1/1 [00:37<00:00, 37.61s/it]
                                                       

Validation:   0%|          | 0/1 [00:00<?, ?it/s]
Validation: 100%|██████████| 1/1 [00:02<00:00,  2.57s/it]
                                                         

Training:   0%|          | 0/1 [00:00<?, ?it/s]
Training: 100%|██████████| 1/1 [00:31<00:00, 31.43s/it]
                                                       

Validation:   0%|          | 0/1 [00:00<?, ?it/s]
Validation: 100%|██████████| 1/1 [00:03<00:00,  3.46s/it]
                                                         

Testing:   0%|          | 0/1 [00:00<?, ?it/s]
Testing: 100%|██████████| 1/1 [00:02<00:00,  2.01s/it]
Testing: 100%|██████████| 1/1 [00:02