In [None]:
pip install datasets transformers[sentencepiece] sacrebleu evaluate

Collecting sacrebleu
  Downloading sacrebleu-2.5.1-py3-none-any.whl.metadata (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.8/51.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting portalocker (from sacrebleu)
  Downloading portalocker-3.1.1-py3-none-any.whl.metadata (8.6 kB)
Collecting colorama (from sacrebleu)
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Downloading sacrebleu-2.5.1-py3-none-any.whl (104 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m104.1/104.1 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Downloading portalocker-3.1.1-py3-none-any.whl (19 kB)
Installing collected packag

In [None]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
# Choose a base model for English to French translation
model_checkpoint = "Helsinki-NLP/opus-mt-en-fr"

In [None]:
# Create a minimal custom dataset
from datasets import Dataset

# Sample translation pairs
en_texts = ["Hello, how are you?", "I love learning languages.", "The weather is nice today.", "What time is it?", "Paris is the capital of France."]
fr_texts = ["Bonjour, comment allez-vous ?", "J'adore apprendre des langues.", "Le temps est beau aujourd'hui.", "Quelle heure est-il ?", "Paris est la capitale de la France."]

# Create dataset dictionary
dataset_dict = {
    "en": en_texts,
    "fr": fr_texts
}

# Create a Hugging Face dataset
custom_dataset = Dataset.from_dict(dataset_dict)
raw_datasets = {"train": custom_dataset}

# Split the dataset into train and validation
train_val_split = custom_dataset.train_test_split(test_size=0.2, seed=42)
raw_datasets = {
    "train": train_val_split["train"],
    "validation": train_val_split["test"]
}

In [None]:
from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
# Add this after defining model_checkpoint but before the data collator
from transformers import AutoTokenizer

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Define preprocessing function (if needed for custom dataset)
def preprocess_function(examples):
    inputs = examples["en"]
    targets = examples["fr"]
    model_inputs = tokenizer(inputs, max_length=128, truncation=True)

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=128, truncation=True)

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs
# Apply preprocessing to each split in the dataset dictionary
tokenized_datasets = {}
for split in raw_datasets.keys():
    tokenized_datasets[split] = raw_datasets[split].map(preprocess_function, batched=True)

# Now create the data collator
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
# Load model
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)
# Define language variables before setting up training arguments
source_lang = "en"
target_lang = "fr"

# Then continue with your training setup
batch_size = 16
args = Seq2SeqTrainingArguments(
    f"{model_checkpoint.split('/')[-1]}-finetuned-{source_lang}-to-{target_lang}",
    eval_strategy="epoch",  # Changed from evaluation_strategy to eval_strategy
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=3,
    predict_with_generate=True,
    push_to_hub=True,
)
# Data collator
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

# Define metrics computation
import numpy as np
# Load the evaluation metric
import evaluate
metric = evaluate.load("sacrebleu")

# Define metrics computation
import numpy as np

def postprocess_text(preds, labels):
    preds = [pred.strip() for pred in preds]
    labels = [[label.strip()] for label in labels]
    return preds, labels

def compute_metrics(eval_preds):
    preds, labels = eval_preds
    if isinstance(preds, tuple):
        preds = preds[0]

    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)

    # Replace -100 in the labels as we can't decode them
    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Post-processing
    decoded_preds, decoded_labels = postprocess_text(decoded_preds, decoded_labels)

    result = metric.compute(predictions=decoded_preds, references=decoded_labels)
    result = {"bleu": result["score"]}

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in preds]
    result["gen_len"] = np.mean(prediction_lens)
    result = {k: round(v, 4) for k, v in result.items()}
    return result

# Create trainer
trainer = Seq2SeqTrainer(
    model,
    args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    data_collator=data_collator,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train model
trainer.train()

# Push to Hub
trainer.push_to_hub()

Map:   0%|          | 0/4 [00:00<?, ? examples/s]

Map:   0%|          | 0/1 [00:00<?, ? examples/s]

Downloading builder script:   0%|          | 0.00/8.15k [00:00<?, ?B/s]

  trainer = Seq2SeqTrainer(


Epoch,Training Loss,Validation Loss,Bleu,Gen Len
1,No log,0.126937,100.0,9.0
2,No log,0.122284,100.0,9.0
3,No log,0.119761,100.0,9.0




CommitInfo(commit_url='https://huggingface.co/Yazanref/opus-mt-en-fr-finetuned-en-to-fr/commit/9a58982b62a58101cadde7f3027324222507b3e3', commit_message='End of training', commit_description='', oid='9a58982b62a58101cadde7f3027324222507b3e3', pr_url=None, repo_url=RepoUrl('https://huggingface.co/Yazanref/opus-mt-en-fr-finetuned-en-to-fr', endpoint='https://huggingface.co', repo_type='model', repo_id='Yazanref/opus-mt-en-fr-finetuned-en-to-fr'), pr_revision=None, pr_num=None)

In [None]:
from transformers import pipeline

# Load your fine-tuned model
model_name = f"{model_checkpoint.split('/')[-1]}-finetuned-{source_lang}-to-{target_lang}"
translator = pipeline("translation", model=model_name)

# Translate text
text = "i will make a new software project and i will send it to you as soon as possible"
translated = translator(text)
print(translated[0]['translation_text'])

Device set to use cpu


Je vais faire un nouveau projet de logiciel et je vous l'enverrai dès que possible


In [None]:
from transformers import pipeline
import pandas as pd
import numpy as np
import os

# Load your fine-tuned model
model_name = f"{model_checkpoint.split('/')[-1]}-finetuned-{source_lang}-to-{target_lang}"
translator = pipeline("translation", model=model_name)

# Create a more comprehensive English-French dataset
english_sentences = [
    # Greetings and basic conversation
    "Hello, how are you?",
    "My name is John.",
    "Nice to meet you.",
    "Where do you live?",
    "I live in Paris.",
    "What time is it?",
    "It's three o'clock.",
    "How old are you?",
    "I am twenty-five years old.",

    # Travel
    "Where is the train station?",
    "How much does this cost?",
    "I need a hotel room.",
    "Can you help me find my way?",
    "Is there a restaurant nearby?",
    "What time does the museum open?",
    "I'd like to rent a car.",
    "How far is the airport?",

    # Food
    "I would like to order a coffee.",
    "This food is delicious.",
    "Could I have the bill please?",
    "Do you have vegetarian options?",
    "What's the specialty of this restaurant?",

    # Work and education
    "I'm working on a new project.",
    "The meeting starts at 9 AM.",
    "I studied computer science at university.",
    "She is a software engineer.",
    "We need to finish this report by tomorrow.",

    # Descriptions
    "The weather is beautiful today.",
    "This book is very interesting.",
    "The building is tall and modern.",
    "She has long brown hair.",
    "His car is red and fast.",

    # Longer sentences
    "I'm planning to travel to France next summer to visit museums and try local cuisine.",
    "The technological advancements in artificial intelligence have transformed many industries over the past decade.",
    "Environmental protection is becoming increasingly important as we face climate change challenges.",
    "Learning a new language opens doors to different cultures and perspectives around the world.",
    "The company announced a new strategic partnership that will expand their operations globally.",
]

french_sentences = [
    # These would ideally be professional translations, but for this example I'll provide them
    # Greetings and basic conversation
    "Bonjour, comment allez-vous ?",
    "Je m'appelle John.",
    "Ravi de vous rencontrer.",
    "Où habitez-vous ?",
    "J'habite à Paris.",
    "Quelle heure est-il ?",
    "Il est trois heures.",
    "Quel âge avez-vous ?",
    "J'ai vingt-cinq ans.",

    # Travel
    "Où est la gare ?",
    "Combien ça coûte ?",
    "J'ai besoin d'une chambre d'hôtel.",
    "Pouvez-vous m'aider à trouver mon chemin ?",
    "Y a-t-il un restaurant à proximité ?",
    "À quelle heure ouvre le musée ?",
    "Je voudrais louer une voiture.",
    "À quelle distance est l'aéroport ?",

    # Food
    "Je voudrais commander un café.",
    "Cette nourriture est délicieuse.",
    "Pourrais-je avoir l'addition s'il vous plaît ?",
    "Avez-vous des options végétariennes ?",
    "Quelle est la spécialité de ce restaurant ?",

    # Work and education
    "Je travaille sur un nouveau projet.",
    "La réunion commence à 9 heures.",
    "J'ai étudié l'informatique à l'université.",
    "Elle est ingénieure en logiciel.",
    "Nous devons terminer ce rapport d'ici demain.",

    # Descriptions
    "Le temps est magnifique aujourd'hui.",
    "Ce livre est très intéressant.",
    "Le bâtiment est grand et moderne.",
    "Elle a de longs cheveux bruns.",
    "Sa voiture est rouge et rapide.",

    # Longer sentences
    "Je prévois de voyager en France l'été prochain pour visiter des musées et goûter à la cuisine locale.",
    "Les avancées technologiques en intelligence artificielle ont transformé de nombreuses industries au cours de la dernière décennie.",
    "La protection de l'environnement devient de plus en plus importante face aux défis du changement climatique.",
    "Apprendre une nouvelle langue ouvre des portes vers différentes cultures et perspectives à travers le monde.",
    "L'entreprise a annoncé un nouveau partenariat stratégique qui élargira leurs opérations à l'échelle mondiale.",
]

# Create a DataFrame
df = pd.DataFrame({
    'English': english_sentences,
    'French': french_sentences
})

# Save to Excel
excel_file_path = 'en_fr_translation_dataset.xlsx'
df.to_excel(excel_file_path, index=False)
print(f"Dataset saved to {excel_file_path}")

# Test loading the Excel file
def load_excel_dataset(file_path):
    df = pd.read_excel(file_path)
    return df

# Load the dataset
test_df = load_excel_dataset(excel_file_path)
print(f"Loaded dataset with {len(test_df)} sentences")

# Test the model on 5 random sentences from the Excel file
sample_indices = np.random.choice(len(test_df), 5, replace=False)
for idx in sample_indices:
    en_text = test_df.iloc[idx]['English']
    fr_text_reference = test_df.iloc[idx]['French']

    # Translate with the model
    translated = translator(en_text)
    fr_text_model = translated[0]['translation_text']

    print(f"\nEnglish: {en_text}")
    print(f"Reference French: {fr_text_reference}")
    print(f"Model Translation: {fr_text_model}")

# Function to evaluate the model on the entire dataset
def evaluate_translations(model, dataset):
    results = []

    for idx, row in dataset.iterrows():
        en_text = row['English']
        fr_reference = row['French']

        # Translate
        translated = model(en_text)
        fr_model = translated[0]['translation_text']

        results.append({
            'English': en_text,
            'Reference': fr_reference,
            'Model_Translation': fr_model
        })

    # Create DataFrame with results
    results_df = pd.DataFrame(results)

    # Save results
    results_df.to_excel('translation_evaluation_results.xlsx', index=False)
    print("Evaluation results saved to 'translation_evaluation_results.xlsx'")

    return results_df

# Uncomment to run full evaluation
# evaluation_results = evaluate_translations(translator, test_df)

Device set to use cpu


Dataset saved to en_fr_translation_dataset.xlsx
Loaded dataset with 37 sentences

English: I would like to order a coffee.
Reference French: Je voudrais commander un café.
Model Translation: Je voudrais commander un café.

English: Is there a restaurant nearby?
Reference French: Y a-t-il un restaurant à proximité ?
Model Translation: Y a-t-il un restaurant à proximité ?

English: I live in Paris.
Reference French: J'habite à Paris.
Model Translation: Je vis à Paris.

English: The building is tall and modern.
Reference French: Le bâtiment est grand et moderne.
Model Translation: Le bâtiment est grand et moderne.

English: Learning a new language opens doors to different cultures and perspectives around the world.
Reference French: Apprendre une nouvelle langue ouvre des portes vers différentes cultures et perspectives à travers le monde.
Model Translation: L'apprentissage d'une nouvelle langue ouvre des portes à différentes cultures et perspectives à travers le monde.


In [None]:
# Load your own Excel file
your_file_path = '/content/en_fr_translation_dataset.xlsx'
your_df = pd.read_excel(your_file_path)

# Adjust column names if needed
your_df = your_df.rename(columns={'English_Column_Name': 'English', 'French_Column_Name': 'French'})

# Test your model on this dataset
sample_indices = np.random.choice(len(your_df), 5, replace=False)
for idx in sample_indices:
    en_text = your_df.iloc[idx]['English']
    fr_text_reference = your_df.iloc[idx]['French']

    # Translate with the model
    translated = translator(en_text)
    fr_text_model = translated[0]['translation_text']

    print(f"\nEnglish: {en_text}")
    print(f"Reference French: {fr_text_reference}")
    print(f"Model Translation: {fr_text_model}")


English: My name is John.
Reference French: Je m'appelle John.
Model Translation: Mon nom est John.

English: I live in Paris.
Reference French: J'habite à Paris.
Model Translation: Je vis à Paris.

English: What time is it?
Reference French: Quelle heure est-il ?
Model Translation: Quelle heure est-il ?

English: Where do you live?
Reference French: Où habitez-vous ?
Model Translation: Où habitez-vous ?

English: How much does this cost?
Reference French: Combien ça coûte ?
Model Translation: Combien cela coûte-t-il ?


In [None]:
import pandas as pd
import numpy as np
import time
from tqdm.notebook import tqdm
from transformers import pipeline

# Load your fine-tuned model
model_name = f"{model_checkpoint.split('/')[-1]}-finetuned-{source_lang}-to-{target_lang}"
translator = pipeline("translation", model=model_name)

# Step 1: Load the CSV file
def load_translation_csv(file_path, encoding='utf-8'):
    try:
        # Try to automatically detect column names
        df = pd.read_csv(file_path, encoding=encoding)

        # If there are no column headers in the file, pandas will assign default names
        if len(df.columns) == 2:
            if 'English' not in df.columns or 'French' not in df.columns:
                df.columns = ['English', 'French']

        print(f"Loaded dataset with {len(df)} rows and {len(df.columns)} columns")
        print(f"Column names: {df.columns.tolist()}")
        return df
    except Exception as e:
        print(f"Error loading CSV: {e}")

        # Fallback: Try loading without headers
        try:
            df = pd.read_csv(file_path, encoding=encoding, header=None)
            df.columns = ['English', 'French']
            print(f"Loaded dataset without headers, with {len(df)} rows")
            return df
        except Exception as e2:
            print(f"Fallback loading also failed: {e2}")
            return None

# Step 2: Sample the dataset (processing all 175K+ sentences would take too long for demonstration)
def sample_dataset(df, n=10, random_state=42):
    if len(df) <= n:
        return df
    return df.sample(n, random_state=random_state)

# Step 3: Translate and evaluate
def translate_and_compare(df, translator, sample_size=10):
    # Sample if needed
    if sample_size and len(df) > sample_size:
        sample_df = df.sample(sample_size, random_state=42)
    else:
        sample_df = df

    results = []

    # Process each row
    for idx, row in tqdm(sample_df.iterrows(), total=len(sample_df), desc="Translating"):
        english_text = row['English']
        french_reference = row['French']

        # Translate with the model
        try:
            translated = translator(english_text)
            model_translation = translated[0]['translation_text']

            results.append({
                'English': english_text,
                'Reference_French': french_reference,
                'Model_Translation': model_translation
            })
        except Exception as e:
            print(f"Error translating row {idx}: {e}")
            results.append({
                'English': english_text,
                'Reference_French': french_reference,
                'Model_Translation': f"ERROR: {str(e)}"
            })

    # Convert to DataFrame
    results_df = pd.DataFrame(results)
    return results_df

# Step 4: Process in batches (for handling the full dataset)
def process_in_batches(df, translator, batch_size=100, output_file='translation_results.csv'):
    num_batches = len(df) // batch_size + (1 if len(df) % batch_size > 0 else 0)
    all_results = []

    for i in tqdm(range(num_batches), desc="Processing batches"):
        start_idx = i * batch_size
        end_idx = min(start_idx + batch_size, len(df))
        batch = df.iloc[start_idx:end_idx]

        batch_results = translate_and_compare(batch, translator, sample_size=None)
        all_results.append(batch_results)

        # Save interim results
        if i % 10 == 0 or i == num_batches - 1:
            interim_df = pd.concat(all_results)
            interim_df.to_csv(f"interim_results_batch_{i}.csv", index=False)

    # Combine all results
    final_results = pd.concat(all_results)
    final_results.to_csv(output_file, index=False)
    print(f"All translations saved to {output_file}")

    return final_results

# Main execution
if __name__ == "__main__":
    # Load the CSV file
    file_path = '/content/english_french.csv'
    df = load_translation_csv(file_path)

    if df is not None:
        # Display a few examples from the dataset
        print("\nSample rows from the dataset:")
        print(df.head())

        # Process a small sample for quick demonstration
        print("\nTranslating a sample of 10 sentences...")
        sample_results = translate_and_compare(df, translator, sample_size=10)

        # Display results
        print("\nTranslation results:")
        for i, row in sample_results.iterrows():
            print(f"\nEnglish: {row['English']}")
            print(f"Reference: {row['Reference_French']}")
            print(f"Model: {row['Model_Translation']}")

        # Ask if user wants to process the entire dataset
        process_all = input("\nDo you want to process all 175,621 sentences? (yes/no): ").lower().strip()

        if process_all in ['yes', 'y']:
            print("\nProcessing the entire dataset. This will take a long time...")
            print("Results will be saved in batches to prevent data loss in case of interruptions.")
            all_results = process_in_batches(df, translator, batch_size=500,
                                            output_file='complete_translation_results.csv')
            print("Processing complete!")
        else:
            print("\nProcessing a larger sample of 100 sentences...")
            larger_sample = translate_and_compare(df, translator, sample_size=100)
            larger_sample.to_csv('sample_translation_results.csv', index=False)
            print("Sample results saved to 'sample_translation_results.csv'")

In [None]:
import pandas as pd
import numpy as np
import time
from tqdm import tqdm
from transformers import pipeline
import os

# Configuration - Update these according to your model
MODEL_CONFIG = {
    'model_name': 'Helsinki-NLP/opus-mt-en-fr',  # Default model, replace with your fine-tuned model
    'source_lang': 'en',
    'target_lang': 'fr'
}

# If you have a fine-tuned model, uncomment and modify this:
# MODEL_CONFIG['model_name'] = "your-username/your-finetuned-model-name"

def load_translation_model(model_name):
    """Load the translation model"""
    try:
        print(f"Loading model: {model_name}")
        translator = pipeline("translation", model=model_name, device=0 if torch.cuda.is_available() else -1)
        print("Model loaded successfully!")
        return translator
    except Exception as e:
        print(f"Error loading model {model_name}: {e}")
        print("Falling back to default Helsinki-NLP model...")
        try:
            translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr")
            return translator
        except Exception as e2:
            print(f"Fallback model also failed: {e2}")
            return None

def load_translation_csv(file_path, encoding='utf-8'):
    """Load the CSV file with English-French translations"""
    try:
        # Load the CSV file
        df = pd.read_csv(file_path, encoding=encoding)

        # Handle different possible column names
        columns = df.columns.tolist()
        print(f"Original columns: {columns}")

        # Standardize column names
        if len(df.columns) == 2:
            # Map common column name variations
            column_mapping = {}
            for col in columns:
                col_lower = col.lower().strip()
                if col_lower in ['english', 'en', 'eng', 'source']:
                    column_mapping[col] = 'English'
                elif col_lower in ['french', 'fr', 'fra', 'target']:
                    column_mapping[col] = 'French'

            if len(column_mapping) == 2:
                df = df.rename(columns=column_mapping)
            else:
                # If we can't map, assume first column is English, second is French
                df.columns = ['English', 'French']

        # Clean the data
        df = df.dropna()  # Remove rows with missing values
        df['English'] = df['English'].astype(str).str.strip()
        df['French'] = df['French'].astype(str).str.strip()

        # Remove empty strings
        df = df[(df['English'] != '') & (df['French'] != '')]

        print(f"Loaded dataset with {len(df)} rows and {len(df.columns)} columns")
        print(f"Final column names: {df.columns.tolist()}")
        print(f"Sample data:")
        print(df.head(3))

        return df
    except Exception as e:
        print(f"Error loading CSV: {e}")
        return None

def sample_dataset(df, n=10, random_state=42):
    """Sample the dataset for quick testing"""
    if len(df) <= n:
        return df
    return df.sample(n, random_state=random_state).reset_index(drop=True)

def translate_batch(texts, translator, max_length=512):
    """Translate a batch of texts"""
    translations = []
    for text in texts:
        try:
            # Truncate very long texts
            if len(text) > max_length:
                text = text[:max_length]

            result = translator(text)
            if isinstance(result, list) and len(result) > 0:
                translation = result[0]['translation_text']
            else:
                translation = str(result)
            translations.append(translation)
        except Exception as e:
            print(f"Error translating '{text}': {e}")
            translations.append(f"ERROR: {str(e)}")
    return translations

def translate_and_compare(df, translator, sample_size=10):
    """Translate and compare with reference translations"""
    # Sample if needed
    if sample_size and len(df) > sample_size:
        sample_df = sample_dataset(df, sample_size)
        print(f"Using sample of {len(sample_df)} sentences")
    else:
        sample_df = df.copy()

    results = []

    # Process each row
    print("Starting translation...")
    for idx, row in tqdm(sample_df.iterrows(), total=len(sample_df), desc="Translating"):
        english_text = row['English']
        french_reference = row['French']

        # Translate with the model
        try:
            model_translations = translate_batch([english_text], translator)
            model_translation = model_translations[0]

            results.append({
                'Index': idx,
                'English': english_text,
                'Reference_French': french_reference,
                'Model_Translation': model_translation,
                'Length_English': len(english_text),
                'Length_Reference': len(french_reference),
                'Length_Model': len(model_translation) if not model_translation.startswith('ERROR') else 0
            })
        except Exception as e:
            print(f"Error processing row {idx}: {e}")
            results.append({
                'Index': idx,
                'English': english_text,
                'Reference_French': french_reference,
                'Model_Translation': f"ERROR: {str(e)}",
                'Length_English': len(english_text),
                'Length_Reference': len(french_reference),
                'Length_Model': 0
            })

    # Convert to DataFrame
    results_df = pd.DataFrame(results)
    return results_df

def calculate_basic_metrics(results_df):
    """Calculate basic evaluation metrics"""
    # Filter out error translations
    valid_results = results_df[~results_df['Model_Translation'].str.startswith('ERROR')]

    if len(valid_results) == 0:
        print("No valid translations to evaluate!")
        return {}

    metrics = {
        'total_sentences': len(results_df),
        'successful_translations': len(valid_results),
        'error_rate': (len(results_df) - len(valid_results)) / len(results_df) * 100,
        'avg_length_english': valid_results['Length_English'].mean(),
        'avg_length_reference': valid_results['Length_Reference'].mean(),
        'avg_length_model': valid_results['Length_Model'].mean()
    }

    return metrics

def process_in_batches(df, translator, batch_size=50, output_file='translation_results.csv'):
    """Process the entire dataset in batches"""
    num_batches = len(df) // batch_size + (1 if len(df) % batch_size > 0 else 0)
    all_results = []

    print(f"Processing {len(df)} sentences in {num_batches} batches of {batch_size}")

    for i in tqdm(range(num_batches), desc="Processing batches"):
        start_idx = i * batch_size
        end_idx = min(start_idx + batch_size, len(df))
        batch = df.iloc[start_idx:end_idx].copy()

        # Reset index for the batch
        batch = batch.reset_index(drop=True)

        batch_results = translate_and_compare(batch, translator, sample_size=None)
        all_results.append(batch_results)

        # Save interim results every 10 batches
        if (i + 1) % 10 == 0 or i == num_batches - 1:
            interim_df = pd.concat(all_results, ignore_index=True)
            interim_file = f"interim_results_batch_{i+1}.csv"
            interim_df.to_csv(interim_file, index=False)
            print(f"Saved interim results to {interim_file}")

    # Combine all results
    final_results = pd.concat(all_results, ignore_index=True)
    final_results.to_csv(output_file, index=False)
    print(f"All translations saved to {output_file}")

    return final_results

def display_sample_results(results_df, n=5):
    """Display sample translation results"""
    print(f"\nSample translation results (showing {n} examples):")
    print("=" * 80)

    sample = results_df.head(n)
    for idx, row in sample.iterrows():
        print(f"\nExample {idx + 1}:")
        print(f"English: {row['English']}")
        print(f"Reference: {row['Reference_French']}")
        print(f"Model: {row['Model_Translation']}")
        print("-" * 60)

# Main execution
def main():
    # Update this path to point to your CSV file
    file_path = '/content/en_fr_dataset.txt'  # Update this to your file path

    print("English-French Translation Model Evaluation")
    print("=" * 50)

    # Load the translation model
    translator = load_translation_model(MODEL_CONFIG['model_name'])

    if translator is None:
        print("Failed to load translation model. Exiting.")
        return

    # Load the CSV file
    print(f"\nLoading dataset from: {file_path}")
    df = load_translation_csv(file_path)

    if df is None:
        print("Failed to load dataset. Please check the file path and format.")
        return

    print(f"\nDataset loaded successfully with {len(df)} sentences.")

    # Process a small sample for quick demonstration
    print("\n" + "="*50)
    print("QUICK SAMPLE EVALUATION (10 sentences)")
    print("="*50)

    sample_results = translate_and_compare(df, translator, sample_size=10)

    # Display results
    display_sample_results(sample_results, n=5)

    # Calculate and display metrics
    metrics = calculate_basic_metrics(sample_results)
    print(f"\nSample Evaluation Metrics:")
    for key, value in metrics.items():
        if isinstance(value, float):
            print(f"{key}: {value:.2f}")
        else:
            print(f"{key}: {value}")

    # Ask if user wants to process more
    print(f"\nThe dataset contains {len(df)} sentences total.")

    while True:
        choice = input("\nChoose an option:\n1. Process 100 sentences\n2. Process 500 sentences\n3. Process all sentences\n4. Exit\nEnter choice (1-4): ").strip()

        if choice == '1':
            print("\nProcessing 100 sentences...")
            results = translate_and_compare(df, translator, sample_size=100)
            results.to_csv('sample_100_translation_results.csv', index=False)
            display_sample_results(results, n=3)
            metrics = calculate_basic_metrics(results)
            print(f"\nEvaluation Metrics (100 sentences):")
            for key, value in metrics.items():
                if isinstance(value, float):
                    print(f"{key}: {value:.2f}")
                else:
                    print(f"{key}: {value}")
            print("Results saved to 'sample_100_translation_results.csv'")

        elif choice == '2':
            print("\nProcessing 500 sentences...")
            results = translate_and_compare(df, translator, sample_size=500)
            results.to_csv('sample_500_translation_results.csv', index=False)
            display_sample_results(results, n=3)
            metrics = calculate_basic_metrics(results)
            print(f"\nEvaluation Metrics (500 sentences):")
            for key, value in metrics.items():
                if isinstance(value, float):
                    print(f"{key}: {value:.2f}")
                else:
                    print(f"{key}: {value}")
            print("Results saved to 'sample_500_translation_results.csv'")

        elif choice == '3':
            confirm = input(f"This will process all {len(df)} sentences and may take a long time. Continue? (yes/no): ").lower().strip()
            if confirm in ['yes', 'y']:
                print(f"\nProcessing all {len(df)} sentences...")
                print("Results will be saved in batches to prevent data loss.")
                all_results = process_in_batches(df, translator, batch_size=50,
                                              output_file='complete_translation_results.csv')
                display_sample_results(all_results, n=3)
                metrics = calculate_basic_metrics(all_results)
                print(f"\nFinal Evaluation Metrics:")
                for key, value in metrics.items():
                    if isinstance(value, float):
                        print(f"{key}: {value:.2f}")
                    else:
                        print(f"{key}: {value}")
                print("Processing complete! Results saved to 'complete_translation_results.csv'")
                break
            else:
                print("Cancelled processing all sentences.")

        elif choice == '4':
            print("Exiting...")
            break
        else:
            print("Invalid choice. Please enter 1, 2, 3, or 4.")

if __name__ == "__main__":
    main()

English-French Translation Model Evaluation
Loading model: Helsinki-NLP/opus-mt-en-fr
Error loading model Helsinki-NLP/opus-mt-en-fr: name 'torch' is not defined
Falling back to default Helsinki-NLP model...


Device set to use cpu



Loading dataset from: /content/en_fr_dataset.txt
Original columns: ['english', 'french']
Loaded dataset with 992 rows and 2 columns
Final column names: ['English', 'French']
Sample data:
                 English                         French
0    Hello, how are you?  Bonjour, comment allez-vous ?
1  I am fine, thank you.           Je vais bien, merci.
2     What is your name?    Comment vous appelez-vous ?

Dataset loaded successfully with 992 sentences.

QUICK SAMPLE EVALUATION (10 sentences)
Using sample of 10 sentences
Starting translation...


Translating: 100%|██████████| 10/10 [00:04<00:00,  2.11it/s]



Sample translation results (showing 5 examples):

Example 1:
English: He's good at basketball.
Reference: Il est bon au basket-ball.
Model: Il est bon au basket.
------------------------------------------------------------

Example 2:
English: You have good taste.
Reference: Vous avez bon goût.
Model: Vous avez bon goût.
------------------------------------------------------------

Example 3:
English: Feelings don't lie.
Reference: Les sentiments ne mentent pas.
Model: Les sentiments ne mentent pas.
------------------------------------------------------------

Example 4:
English: When can we start?
Reference: Quand pouvons-nous commencer ?
Model: Quand pouvons-nous commencer ?
------------------------------------------------------------

Example 5:
English: I'm wearing a red shirt.
Reference: Je porte une chemise rouge.
Model: Je porte une chemise rouge.
------------------------------------------------------------

Sample Evaluation Metrics:
total_sentences: 10
successful_translations

Processing batches:   0%|          | 0/20 [00:00<?, ?it/s]

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:21,  2.25it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:20,  2.38it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:19,  2.46it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:19,  2.35it/s][A
Translating:  10%|█         | 5/50 [00:02<00:19,  2.33it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:18,  2.38it/s][A
Translating:  14%|█▍        | 7/50 [00:02<00:18,  2.33it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:19,  2.15it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:21,  1.91it/s][A
Translating:  20%|██        | 10/50 [00:04<00:19,  2.03it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:18,  2.15it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:19,  1.99it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:18,  1.96it/s][A
Translating:  28%|██▊       | 14/50 [00:06<00:20,  1.76it/s][A
Translating:  30%|███       | 15/50 [00:07<00:19,  1.83it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:18,  2.64it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:20,  2.36it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:20,  2.32it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:21,  2.18it/s][A
Translating:  10%|█         | 5/50 [00:02<00:19,  2.32it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:23,  1.90it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:19,  2.16it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:16,  2.48it/s][A
Translating:  18%|█▊        | 9/50 [00:03<00:16,  2.49it/s][A
Translating:  20%|██        | 10/50 [00:04<00:15,  2.52it/s][A
Translating:  22%|██▏       | 11/50 [00:04<00:16,  2.34it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:15,  2.40it/s][A
Translating:  26%|██▌       | 13/50 [00:05<00:14,  2.55it/s][A
Translating:  28%|██▊       | 14/50 [00:05<00:14,  2.44it/s][A
Translating:  30%|███       | 15/50 [00:06<00:14,  2.49it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:26,  1.85it/s][A
Translating:   4%|▍         | 2/50 [00:01<00:23,  2.03it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:23,  2.02it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:21,  2.11it/s][A
Translating:  10%|█         | 5/50 [00:02<00:23,  1.95it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:21,  2.03it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:23,  1.84it/s][A
Translating:  16%|█▌        | 8/50 [00:04<00:20,  2.01it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:20,  2.01it/s][A
Translating:  20%|██        | 10/50 [00:04<00:18,  2.21it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:17,  2.24it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:16,  2.24it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:17,  2.16it/s][A
Translating:  28%|██▊       | 14/50 [00:06<00:17,  2.05it/s][A
Translating:  30%|███       | 15/50 [00:07<00:17,  2.04it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:31,  1.58it/s][A
Translating:   4%|▍         | 2/50 [00:01<00:26,  1.82it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:22,  2.07it/s][A
Translating:   8%|▊         | 4/50 [00:02<00:23,  1.98it/s][A
Translating:  10%|█         | 5/50 [00:02<00:21,  2.12it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:19,  2.25it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:19,  2.16it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:20,  2.05it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:21,  1.90it/s][A
Translating:  20%|██        | 10/50 [00:04<00:19,  2.07it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:18,  2.12it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:17,  2.15it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:18,  2.05it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:19,  1.85it/s][A
Translating:  30%|███       | 15/50 [00:07<00:20,  1.73it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:21,  2.29it/s][A
Translating:   4%|▍         | 2/50 [00:01<00:25,  1.85it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:24,  1.94it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:22,  2.04it/s][A
Translating:  10%|█         | 5/50 [00:02<00:20,  2.21it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:20,  2.12it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:19,  2.25it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:20,  2.07it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:20,  1.97it/s][A
Translating:  20%|██        | 10/50 [00:05<00:22,  1.80it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:18,  2.06it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:17,  2.19it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:16,  2.27it/s][A
Translating:  28%|██▊       | 14/50 [00:06<00:17,  2.06it/s][A
Translating:  30%|███       | 15/50 [00:07<00:18,  1.92it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:24,  1.98it/s][A
Translating:   4%|▍         | 2/50 [00:01<00:25,  1.89it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:25,  1.86it/s][A
Translating:   8%|▊         | 4/50 [00:02<00:22,  2.01it/s][A
Translating:  10%|█         | 5/50 [00:02<00:22,  2.02it/s][A
Translating:  12%|█▏        | 6/50 [00:03<00:21,  2.02it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:22,  1.95it/s][A
Translating:  16%|█▌        | 8/50 [00:04<00:22,  1.85it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:24,  1.64it/s][A
Translating:  20%|██        | 10/50 [00:05<00:23,  1.69it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:21,  1.83it/s][A
Translating:  24%|██▍       | 12/50 [00:06<00:17,  2.18it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:18,  1.98it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:17,  2.04it/s][A
Translating:  30%|███       | 15/50 [00:07<00:18,  1.88it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:24,  2.03it/s][A
Translating:   4%|▍         | 2/50 [00:01<00:25,  1.88it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:23,  2.04it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:22,  2.02it/s][A
Translating:  10%|█         | 5/50 [00:02<00:22,  1.99it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:20,  2.16it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:18,  2.33it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:16,  2.50it/s][A
Translating:  18%|█▊        | 9/50 [00:03<00:16,  2.53it/s][A
Translating:  20%|██        | 10/50 [00:04<00:18,  2.19it/s][A
Translating:  22%|██▏       | 11/50 [00:04<00:15,  2.57it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:16,  2.31it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:20,  1.78it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:23,  1.52it/s][A
Translating:  30%|███       | 15/50 [00:07<00:21,  1.64it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:27,  1.80it/s][A
Translating:   4%|▍         | 2/50 [00:01<00:26,  1.83it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:23,  1.99it/s][A
Translating:   8%|▊         | 4/50 [00:02<00:23,  1.93it/s][A
Translating:  10%|█         | 5/50 [00:02<00:22,  2.02it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:17,  2.45it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:20,  2.11it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:20,  2.08it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:21,  1.93it/s][A
Translating:  20%|██        | 10/50 [00:04<00:19,  2.02it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:22,  1.76it/s][A
Translating:  24%|██▍       | 12/50 [00:06<00:21,  1.78it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:21,  1.74it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:20,  1.72it/s][A
Translating:  30%|███       | 15/50 [00:08<00:21,  1.61it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:34,  1.40it/s][A
Translating:   4%|▍         | 2/50 [00:01<00:26,  1.81it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:27,  1.72it/s][A
Translating:   8%|▊         | 4/50 [00:02<00:26,  1.77it/s][A
Translating:  10%|█         | 5/50 [00:02<00:27,  1.66it/s][A
Translating:  12%|█▏        | 6/50 [00:03<00:27,  1.58it/s][A
Translating:  14%|█▍        | 7/50 [00:04<00:24,  1.79it/s][A
Translating:  16%|█▌        | 8/50 [00:04<00:23,  1.81it/s][A
Translating:  18%|█▊        | 9/50 [00:05<00:21,  1.93it/s][A
Translating:  20%|██        | 10/50 [00:05<00:24,  1.64it/s][A
Translating:  22%|██▏       | 11/50 [00:06<00:24,  1.57it/s][A
Translating:  24%|██▍       | 12/50 [00:07<00:24,  1.53it/s][A
Translating:  26%|██▌       | 13/50 [00:07<00:23,  1.56it/s][A
Translating:  28%|██▊       | 14/50 [00:08<00:23,  1.52it/s][A
Translating:  30%|███       | 15/50 [00:09<00:22,  1.59it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:24,  2.02it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:18,  2.66it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:16,  2.79it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:22,  2.01it/s][A
Translating:  10%|█         | 5/50 [00:02<00:23,  1.88it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:23,  1.86it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:24,  1.77it/s][A
Translating:  16%|█▌        | 8/50 [00:04<00:23,  1.81it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:23,  1.77it/s][A
Translating:  20%|██        | 10/50 [00:05<00:24,  1.66it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:20,  1.92it/s][A
Translating:  24%|██▍       | 12/50 [00:06<00:19,  1.90it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:18,  2.00it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:18,  1.94it/s][A
Translating:  30%|███       | 15/50 [00:07<00:19,  1.80it

Saved interim results to interim_results_batch_10.csv
Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:19,  2.56it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:20,  2.33it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:26,  1.78it/s][A
Translating:   8%|▊         | 4/50 [00:02<00:25,  1.80it/s][A
Translating:  10%|█         | 5/50 [00:02<00:24,  1.86it/s][A
Translating:  12%|█▏        | 6/50 [00:03<00:31,  1.39it/s][A
Translating:  14%|█▍        | 7/50 [00:04<00:31,  1.39it/s][A
Translating:  16%|█▌        | 8/50 [00:05<00:30,  1.37it/s][A
Translating:  18%|█▊        | 9/50 [00:05<00:26,  1.53it/s][A
Translating:  20%|██        | 10/50 [00:06<00:25,  1.57it/s][A
Translating:  22%|██▏       | 11/50 [00:06<00:21,  1.79it/s][A
Translating:  24%|██▍       | 12/50 [00:07<00:19,  1.95it/s][A
Translating:  26%|██▌       | 13/50 [00:07<00:19,  1.86it/s][A
Translating:  28%|██▊       | 14/50 [00:08<00:18,  1.95it/s][A
Translating:  30%|███       | 15/50 [00:08<00:17,  2.04it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:16,  3.03it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:17,  2.71it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:18,  2.48it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:19,  2.37it/s][A
Translating:  10%|█         | 5/50 [00:02<00:18,  2.45it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:16,  2.68it/s][A
Translating:  14%|█▍        | 7/50 [00:02<00:16,  2.60it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:18,  2.30it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:22,  1.80it/s][A
Translating:  20%|██        | 10/50 [00:05<00:26,  1.49it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:24,  1.61it/s][A
Translating:  24%|██▍       | 12/50 [00:06<00:22,  1.69it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:20,  1.83it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:20,  1.71it/s][A
Translating:  30%|███       | 15/50 [00:07<00:18,  1.90it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:23,  2.05it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:22,  2.14it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:21,  2.19it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:20,  2.20it/s][A
Translating:  10%|█         | 5/50 [00:02<00:26,  1.72it/s][A
Translating:  12%|█▏        | 6/50 [00:03<00:26,  1.66it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:24,  1.74it/s][A
Translating:  16%|█▌        | 8/50 [00:04<00:25,  1.66it/s][A
Translating:  18%|█▊        | 9/50 [00:05<00:24,  1.70it/s][A
Translating:  20%|██        | 10/50 [00:05<00:24,  1.66it/s][A
Translating:  22%|██▏       | 11/50 [00:06<00:23,  1.66it/s][A
Translating:  24%|██▍       | 12/50 [00:06<00:22,  1.71it/s][A
Translating:  26%|██▌       | 13/50 [00:07<00:21,  1.73it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:20,  1.76it/s][A
Translating:  30%|███       | 15/50 [00:08<00:18,  1.85it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:26,  1.86it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:23,  2.07it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:28,  1.62it/s][A
Translating:   8%|▊         | 4/50 [00:02<00:30,  1.50it/s][A
Translating:  10%|█         | 5/50 [00:03<00:34,  1.29it/s][A
Translating:  12%|█▏        | 6/50 [00:04<00:33,  1.33it/s][A
Translating:  14%|█▍        | 7/50 [00:04<00:30,  1.42it/s][A
Translating:  16%|█▌        | 8/50 [00:05<00:28,  1.48it/s][A
Translating:  18%|█▊        | 9/50 [00:05<00:25,  1.62it/s][A
Translating:  20%|██        | 10/50 [00:06<00:21,  1.83it/s][A
Translating:  22%|██▏       | 11/50 [00:06<00:22,  1.77it/s][A
Translating:  24%|██▍       | 12/50 [00:07<00:19,  1.96it/s][A
Translating:  26%|██▌       | 13/50 [00:07<00:16,  2.25it/s][A
Translating:  28%|██▊       | 14/50 [00:08<00:16,  2.18it/s][A
Translating:  30%|███       | 15/50 [00:08<00:14,  2.46it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:32,  1.50it/s][A
Translating:   4%|▍         | 2/50 [00:01<00:28,  1.66it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:28,  1.63it/s][A
Translating:   8%|▊         | 4/50 [00:02<00:25,  1.77it/s][A
Translating:  10%|█         | 5/50 [00:02<00:25,  1.78it/s][A
Translating:  12%|█▏        | 6/50 [00:03<00:22,  1.92it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:20,  2.07it/s][A
Translating:  16%|█▌        | 8/50 [00:04<00:19,  2.20it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:19,  2.14it/s][A
Translating:  20%|██        | 10/50 [00:05<00:19,  2.02it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:20,  1.89it/s][A
Translating:  24%|██▍       | 12/50 [00:06<00:22,  1.72it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:20,  1.85it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:24,  1.49it/s][A
Translating:  30%|███       | 15/50 [00:08<00:23,  1.47it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:19,  2.45it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:20,  2.34it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:24,  1.96it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:22,  2.08it/s][A
Translating:  10%|█         | 5/50 [00:02<00:20,  2.24it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:22,  1.93it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:23,  1.83it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:19,  2.16it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:18,  2.27it/s][A
Translating:  20%|██        | 10/50 [00:04<00:16,  2.42it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:16,  2.38it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:15,  2.44it/s][A
Translating:  26%|██▌       | 13/50 [00:05<00:15,  2.45it/s][A
Translating:  28%|██▊       | 14/50 [00:06<00:14,  2.48it/s][A
Translating:  30%|███       | 15/50 [00:06<00:15,  2.23it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:30,  1.58it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:20,  2.32it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:20,  2.30it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:18,  2.53it/s][A
Translating:  10%|█         | 5/50 [00:02<00:19,  2.32it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:18,  2.40it/s][A
Translating:  14%|█▍        | 7/50 [00:02<00:16,  2.67it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:16,  2.50it/s][A
Translating:  18%|█▊        | 9/50 [00:03<00:17,  2.33it/s][A
Translating:  20%|██        | 10/50 [00:04<00:16,  2.47it/s][A
Translating:  22%|██▏       | 11/50 [00:04<00:18,  2.15it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:15,  2.43it/s][A
Translating:  26%|██▌       | 13/50 [00:05<00:16,  2.21it/s][A
Translating:  28%|██▊       | 14/50 [00:06<00:16,  2.23it/s][A
Translating:  30%|███       | 15/50 [00:06<00:17,  1.96it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:30,  1.63it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:21,  2.19it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:28,  1.67it/s][A
Translating:   8%|▊         | 4/50 [00:02<00:23,  1.92it/s][A
Translating:  10%|█         | 5/50 [00:02<00:26,  1.68it/s][A
Translating:  12%|█▏        | 6/50 [00:03<00:22,  1.96it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:24,  1.73it/s][A
Translating:  16%|█▌        | 8/50 [00:04<00:20,  2.05it/s][A
Translating:  18%|█▊        | 9/50 [00:04<00:21,  1.90it/s][A
Translating:  20%|██        | 10/50 [00:05<00:22,  1.82it/s][A
Translating:  22%|██▏       | 11/50 [00:05<00:19,  1.99it/s][A
Translating:  24%|██▍       | 12/50 [00:06<00:18,  2.03it/s][A
Translating:  26%|██▌       | 13/50 [00:06<00:15,  2.32it/s][A
Translating:  28%|██▊       | 14/50 [00:07<00:16,  2.18it/s][A
Translating:  30%|███       | 15/50 [00:07<00:15,  2.20it

Starting translation...



Translating:   0%|          | 0/50 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/50 [00:00<00:22,  2.20it/s][A
Translating:   4%|▍         | 2/50 [00:00<00:21,  2.27it/s][A
Translating:   6%|▌         | 3/50 [00:01<00:19,  2.38it/s][A
Translating:   8%|▊         | 4/50 [00:01<00:21,  2.13it/s][A
Translating:  10%|█         | 5/50 [00:02<00:22,  2.00it/s][A
Translating:  12%|█▏        | 6/50 [00:02<00:20,  2.17it/s][A
Translating:  14%|█▍        | 7/50 [00:03<00:17,  2.46it/s][A
Translating:  16%|█▌        | 8/50 [00:03<00:15,  2.72it/s][A
Translating:  18%|█▊        | 9/50 [00:03<00:15,  2.67it/s][A
Translating:  20%|██        | 10/50 [00:04<00:13,  2.87it/s][A
Translating:  22%|██▏       | 11/50 [00:04<00:16,  2.42it/s][A
Translating:  24%|██▍       | 12/50 [00:05<00:16,  2.29it/s][A
Translating:  26%|██▌       | 13/50 [00:05<00:15,  2.33it/s][A
Translating:  28%|██▊       | 14/50 [00:05<00:15,  2.31it/s][A
Translating:  30%|███       | 15/50 [00:06<00:15,  2.19it

Starting translation...



Translating:   0%|          | 0/42 [00:00<?, ?it/s][A
Translating:   2%|▏         | 1/42 [00:00<00:23,  1.77it/s][A
Translating:   5%|▍         | 2/42 [00:00<00:18,  2.18it/s][A
Translating:   7%|▋         | 3/42 [00:01<00:16,  2.29it/s][A
Translating:  10%|▉         | 4/42 [00:01<00:17,  2.15it/s][A
Translating:  12%|█▏        | 5/42 [00:02<00:18,  2.00it/s][A
Translating:  14%|█▍        | 6/42 [00:02<00:18,  1.95it/s][A
Translating:  17%|█▋        | 7/42 [00:03<00:17,  2.03it/s][A
Translating:  19%|█▉        | 8/42 [00:03<00:17,  1.98it/s][A
Translating:  21%|██▏       | 9/42 [00:04<00:15,  2.14it/s][A
Translating:  24%|██▍       | 10/42 [00:04<00:15,  2.03it/s][A
Translating:  26%|██▌       | 11/42 [00:05<00:12,  2.42it/s][A
Translating:  29%|██▊       | 12/42 [00:05<00:13,  2.26it/s][A
Translating:  31%|███       | 13/42 [00:06<00:13,  2.11it/s][A
Translating:  33%|███▎      | 14/42 [00:06<00:12,  2.23it/s][A
Translating:  36%|███▌      | 15/42 [00:06<00:11,  2.31it

Saved interim results to interim_results_batch_20.csv
All translations saved to complete_translation_results.csv

Sample translation results (showing 3 examples):

Example 1:
English: Hello, how are you?
Reference: Bonjour, comment allez-vous ?
Model: Bonjour, comment allez-vous ?
------------------------------------------------------------

Example 2:
English: I am fine, thank you.
Reference: Je vais bien, merci.
Model: Je vais bien, merci.
------------------------------------------------------------

Example 3:
English: What is your name?
Reference: Comment vous appelez-vous ?
Model: Quel est votre nom ?
------------------------------------------------------------

Final Evaluation Metrics:
total_sentences: 992
successful_translations: 992
error_rate: 0.00
avg_length_english: 21.50
avg_length_reference: 25.89
avg_length_model: 25.52
Processing complete! Results saved to 'complete_translation_results.csv'



