In [6]:
model_checkpoint="models/marian/marian_output/checkpoint-9"
model_checkpoint_ct="models/marian/ct/checkpoint-9"

In [7]:
import tqdm as notebook_tqdm

In [None]:
### Eval original model 
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
from sacrebleu.metrics import BLEU
from tqdm import tqdm

def translate(model, tokenizer, text, device):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=150).to(device)
    translated = model.generate(**inputs)
    return tokenizer.batch_decode(translated, skip_special_tokens=True)

def calculate_bleu(references, hypotheses):
    bleu = BLEU()
    return bleu.corpus_score(hypotheses, [references]).score

def validate_model(model_name, dataset_name, use_gpu=False):
    device = torch.device("cuda" if torch.cuda.is_available() and use_gpu else "cpu")
    print(f"Using device: {device}")

    # Load model and tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

    # Load dataset
    dataset = load_dataset(dataset_name, split="validation")

    # Translate and calculate BLEU score
    hypotheses = []
    references = []

    for batch in tqdm(dataset, desc="Translating"):
        source_text = batch['translation']['dyu']  # Adjust this based on your dataset's column names
        reference = batch['translation']['fr']  # Adjust this based on your dataset's column names
        
        translation = translate(model, tokenizer, source_text, device)[0]
        print(reference)
        print(translation)
        
        hypotheses.append(translation)
        references.append(reference)
        # break

    bleu_score = calculate_bleu(references, hypotheses)
    print(f"BLEU Score: {bleu_score:.2f}")

# Run validation with GPU
# validate_model(model_checkpoint, "uvci/Koumankan_mt_dyu_fr", use_gpu=True)

# Run validation without GPU
validate_model(model_checkpoint, "uvci/Koumankan_mt_dyu_fr", use_gpu=False)

In [25]:
### Eval ct model
# import torch
# from transformers import AutoTokenizer
import sentencepiece as spm
import ctranslate2
from datasets import load_dataset
from sacrebleu.metrics import BLEU
from tqdm import tqdm


device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

sp_source_model = spm.SentencePieceProcessor(model_file=model_checkpoint_ct+'/source.spm')
sp_target_model = spm.SentencePieceProcessor(model_file=model_checkpoint_ct+'/target.spm')


def translate(translator, text, device):
    tokens = sp_source_model.encode(text, out_type=str)
    try:
        results = translator.translate_batch([tokens])
        # The translated results are token strings, so we need to convert them to IDs before decoding
        translations = []
        for translation in results:
            decoded_text = sp_target_model.decode(translation.hypotheses[0])
            translations.append(decoded_text)
    except Exception as e:
        print(f"Translation error: {e}")
        translations = [""]  # Return empty string if translation fails
    return translations

def calculate_bleu(references, hypotheses):
    bleu = BLEU()
    return bleu.corpus_score(hypotheses, [references]).score

def validate_model(model_path, dataset_name):
    # Load CTranslate2 model
    translator = ctranslate2.Translator(model_path, device=device)
    # Load dataset
    dataset = load_dataset(dataset_name, split="validation")
    # Translate and calculate BLEU score
    hypotheses = []
    references = []

    for batch in tqdm(dataset, desc="Translating"):
        source_text = batch['translation']['dyu']  # Adjust this based on your dataset's column names
        reference = batch['translation']['fr']  # Adjust this based on your dataset's column names
        
        # translation = translate(translator, tokenizer, source_text, device)[0]
        tokenizer=None
        translation = translate(translator, source_text, device)[0]
        hypotheses.append(translation)
        references.append(reference)
        # print(hypotheses,"----", reference)
        break

    bleu_score = calculate_bleu(references, hypotheses)
    print(f"BLEU Score: {bleu_score:.2f}")

validate_model(model_checkpoint_ct, "uvci/Koumankan_mt_dyu_fr")

Using device: cpu


Translating:   0%|          | 0/1471 [00:00<?, ?it/s]

['▁I', '▁t', 'ɔ', 'g', 'ɔ', '▁bi', '▁co', 'god', 'ɔ']


Translating:   0%|          | 0/1471 [00:05<?, ?it/s]

Il I I I I I I I tseg z z-------- je j'ai z zh zh zhhh zhhhggggg de z b- cougogogo b- le dieu de la codiesse zsssie zhhhhishsa zhhhhhhhhhhhhhhhhhhhhkakakah b b- cojuju dieu de codia dieu de  codi dieu de ég codiif, le dieu co dieu de ég coa dieu de ég codia le dieu de z co le dieu z, le dieu co le dieu de z, le dieu de z
["Il I I I I I I I tseg z z-------- je j'ai z zh zh zhhh zhhhggggg de z b- cougogogo b- le dieu de la codiesse zsssie zhhhhishsa zhhhhhhhhhhhhhhhhhhhhkakakah b b- cojuju dieu de codia dieu de  codi dieu de ég codiif, le dieu co dieu de ég coa dieu de ég codia le dieu de z co le dieu z, le dieu co le dieu de z, le dieu de z"] ---- Tu portes un nom de fantaisie.
BLEU Score: 0.47



