In [2]:
pip install sacrebleu rouge-score

Collecting sacrebleu
  Downloading sacrebleu-2.4.3-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting portalocker (from sacrebleu)
  Downloading portalocker-2.10.1-py3-none-any.whl.metadata (8.5 kB)
Collecting lxml (from sacrebleu)
  Downloading lxml-5.3.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.8 kB)
Collecting nltk (from rouge-score)
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Downloading sacrebleu-2.4.3-py3-none-any.whl (103 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.0/104.0 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lxml-5.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (5.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m65.1 MB/s[0m eta

In [11]:
pip install -U transformers

Collecting transformers
  Downloading transformers-4.45.2-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.45.2-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m58.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading tokenizers-0.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m104.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.19.1
    Uninstalling tokenizers-0.19.1:
      Successfully uninsta

In [None]:
import os
import json
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from sacrebleu import corpus_bleu
from rouge_score import rouge_scorer
from google.cloud import storage

# Check if CUDA is available
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Path to the checkpoint folder in GCS
checkpoint_path = 'gs://translation-datasets/nllb-finetuned/nllb-finetuned/checkpoint-72000'
local_checkpoint_path = '/tmp/checkpoint-72000'

# Function to download model files from GCS to local storage
def download_model_from_gcs(gcs_path, local_path):
    client = storage.Client()
    bucket_name, folder_path = gcs_path.replace('gs://', '').split('/', 1)
    bucket = client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=folder_path)

    os.makedirs(local_path, exist_ok=True)

    for blob in blobs:
        local_file_path = os.path.join(local_path, blob.name.split('/')[-1])
        blob.download_to_filename(local_file_path)
        print(f"Downloaded {blob.name} to {local_file_path}")

# Download the model from GCS
download_model_from_gcs(checkpoint_path, local_checkpoint_path)

# Load the fine-tuned model and tokenizer from the local path
model = AutoModelForSeq2SeqLM.from_pretrained(local_checkpoint_path)
tokenizer = AutoTokenizer.from_pretrained(local_checkpoint_path)

# Move the model to the appropriate device, The device is determined based on whether a GPU is available 'cuda' or not 'cpu'
model = model.to(device)

# Function to load the JSONL test data from GCS and limit to 10 examples
def load_jsonl(file_path, max_examples=30):
    test_data = []
    client = storage.Client()
    bucket_name, blob_name = file_path.replace('gs://', '').split('/', 1)
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(blob_name)
    content = blob.download_as_text(encoding='utf-8')

    # Process the content and limit to max_examples
    for i, line in enumerate(content.splitlines()):
        if i >= max_examples:
            break
        test_data.append(json.loads(line))
    return test_data

#def load_jsonl(file_path):
#    test_data = []
#    client = storage.Client()
#    bucket_name, blob_name = file_path.replace('gs://', '').split('/', 1)
#    bucket = client.bucket(bucket_name)
#    blob = bucket.blob(blob_name)
#    content = blob.download_as_text(encoding='utf-8')

    # Process the content and load all examples
#    for line in content.splitlines():
#        test_data.append(json.loads(line))
    
#    return test_data

# Function to perform translation
def translate_text(model, tokenizer, text, source_lang_code, target_lang_code):
    tokenizer.src_lang = source_lang_code
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True).to(device)

    # Generate translation
    forced_bos_token_id = tokenizer.convert_tokens_to_ids(target_lang_code)
    generated_tokens = model.generate(**inputs, forced_bos_token_id=forced_bos_token_id)
    translated_text = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)[0]

    return translated_text

# Load the test dataset
test_data_path = 'gs://translation-datasets/test.jsonl'
test_data = load_jsonl(test_data_path,max_examples=30)

# Perform translation and store results
reference_texts = []
generated_texts = []

for sample in test_data:
    source_text = sample['source_text']
    target_text = sample['target_text']
    source_lang_code = sample['source_lang']
    target_lang_code = sample['target_lang']

    # Perform translation
    translated_text = translate_text(model, tokenizer, source_text, source_lang_code, target_lang_code)

    # Store the reference and generated texts for evaluation
    reference_texts.append(target_text)
    generated_texts.append(translated_text)

# Function to evaluate BLEU score
def evaluate_bleu(reference_texts, generated_texts):
    bleu = corpus_bleu(generated_texts, [reference_texts])
    return bleu.score

# Function to evaluate ROUGE scores
def evaluate_rouge(reference_texts, generated_texts):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge_scores = []

    for ref, gen in zip(reference_texts, generated_texts):
        score = scorer.score(ref, gen)
        rouge_scores.append(score)

    # Return the average ROUGE scores
    avg_rouge1 = sum([score['rouge1'].fmeasure for score in rouge_scores]) / len(rouge_scores)
    avg_rouge2 = sum([score['rouge2'].fmeasure for score in rouge_scores]) / len(rouge_scores)
    avg_rougeL = sum([score['rougeL'].fmeasure for score in rouge_scores]) / len(rouge_scores)

    return {
        "ROUGE-1": avg_rouge1,
        "ROUGE-2": avg_rouge2,
        "ROUGE-L": avg_rougeL
    }

# Evaluate BLEU score
bleu_score = evaluate_bleu(reference_texts, generated_texts)
print(f"BLEU score: {bleu_score}")

# Evaluate ROUGE scores
rouge_scores = evaluate_rouge(reference_texts, generated_texts)
for metric, score in rouge_scores.items():
    print(f"{metric}: {score}")

In [5]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

local_checkpoint_path = '/tmp/checkpoint-72000'

model = AutoModelForSeq2SeqLM.from_pretrained(local_checkpoint_path)
tokenizer = AutoTokenizer.from_pretrained(local_checkpoint_path)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = model.to(device)

source_lang = "eng_Latn"
target_lang = "fra_Latn"

def translate_batch_nllb(texts, source_lang, target_lang):

    tokenizer.src_lang = source_lang

    inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True).to(device)

    forced_bos_token_id = tokenizer.convert_tokens_to_ids(target_lang)

    generated_tokens = model.generate(**inputs, forced_bos_token_id=forced_bos_token_id)

    translations = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
    return translations
 
texts = [
    "Hello, how are you?",
    "This is a test of the NLLB model.",
    "Let's see how well it performs."
]

translated_texts = translate_batch_nllb(texts, source_lang, target_lang)

for i, translation in enumerate(translated_texts):
    print(f"Original: {texts[i]}")
    print(f"Translated: {translation}\n")

Original: Hello, how are you?
Translated: Bonjour, comment allez-vous?

Original: This is a test of the NLLB model.
Translated: Ceci est un test du modèle NLLB.

Original: Let's see how well it performs.
Translated: Voyons si ça marche.

