In [None]:
%%capture
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("HF_TOKEN")
from huggingface_hub import login
login(token=f"{secret_value_0}",write_permission=True)  # Enter your HF token when prompted
!pip install sacrebleu pandas tqdm

In [None]:
import torch
from transformers import MarianMTModel, MarianTokenizer
from typing import List, Dict
import warnings
warnings.filterwarnings('ignore')

class TranslationPipeline:
    def __init__(self):
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"Using device: {self.device}")
        
        # Initialize English -> Lojban model
        print("Loading English -> Lojban model...")
        self.en_loj_model_name = "woctordho/lojban-translation"
        self.en_loj_tokenizer = MarianTokenizer.from_pretrained(self.en_loj_model_name)
        self.en_loj_model = MarianMTModel.from_pretrained(self.en_loj_model_name).to(self.device)
        
        # Initialize Lojban -> English model
        print("Loading Lojban -> English model...")
        self.loj_en_model_name = "woctordho/lojban-translation"  # Using reverse model
        self.loj_en_tokenizer = MarianTokenizer.from_pretrained(self.loj_en_model_name)
        self.loj_en_model = MarianMTModel.from_pretrained(self.loj_en_model_name).to(self.device)

    def translate_to_lojban(self, text: str, num_variants: int = 3) -> List[str]:
        """Translate English text to Lojban with multiple variants."""
        inputs = self.en_loj_tokenizer(text, return_tensors="pt", padding=True).to(self.device)
        
        translations = []
        beam_configs = [
            {"num_beams": 5, "length_penalty": 1.0, "no_repeat_ngram_size": 2},
            {"num_beams": 8, "length_penalty": 0.8, "no_repeat_ngram_size": 3},
            {"num_beams": 4, "length_penalty": 1.2, "no_repeat_ngram_size": 2}
        ]
        
        for config in beam_configs[:num_variants]:
            outputs = self.en_loj_model.generate(
                **inputs,
                max_length=50,
                early_stopping=True,
                **config
            )
            translation = self.en_loj_tokenizer.decode(outputs[0], skip_special_tokens=True)
            translations.append(translation)
        
        return list(set(translations))  # Remove duplicates

    def translate_to_english(self, text: str) -> str:
        """Translate Lojban text back to English."""
        inputs = self.loj_en_tokenizer(text, return_tensors="pt", padding=True).to(self.device)
        
        outputs = self.loj_en_model.generate(
            **inputs,
            max_length=50,
            num_beams=5,
            length_penalty=1.0,
            no_repeat_ngram_size=2,
            early_stopping=True
        )
        
        return self.loj_en_tokenizer.decode(outputs[0], skip_special_tokens=True)

    def round_trip_translate(self, text: str) -> Dict[str, List[str]]:
        """Perform round-trip translation: English -> Lojban -> English."""
        results = {
            "original": text,
            "lojban_variants": [],
            "back_translations": []
        }
        
        # Get Lojban translations
        lojban_translations = self.translate_to_lojban(text)
        results["lojban_variants"] = lojban_translations
        
        # Translate each Lojban variant back to English
        for lojban_text in lojban_translations:
            back_translation = self.translate_to_english(lojban_text)
            results["back_translations"].append(back_translation)
            
        return results

def demonstrate_pipeline():
    """Demonstrate the translation pipeline with example sentences."""
    pipeline = TranslationPipeline()
    
    test_sentences = [
        "Hello, how are you?",
        "The red book is on the big table",
        "I really love learning Lojban",
        "What time is it?",
        "The weather is nice today"
    ]
    
    print("\nRunning translation demonstrations:")
    print("=" * 60)
    
    for sentence in test_sentences:
        results = pipeline.round_trip_translate(sentence)
        
        print(f"\nOriginal English: {results['original']}")
        print("\nLojban variants:")
        for i, variant in enumerate(results['lojban_variants'], 1):
            print(f"{i}. {variant}")
        
        print("\nBack translations:")
        for i, translation in enumerate(results['back_translations'], 1):
            print(f"{i}. {translation}")
        print("-" * 60)

if __name__ == "__main__":
    demonstrate_pipeline()