In [1]:
# Step 1: Install minimal required packages
%pip install torch --index-url https://download.pytorch.org/whl/cpu
%pip install transformers==4.36.2
%pip install sentencepiece
%pip install huggingface_hub


Looking in indexes: https://download.pytorch.org/whl/cpu
Note: you may need to restart the kernel to use updated packages.
Collecting transformers==4.36.2
  Downloading transformers-4.36.2-py3-none-any.whl.metadata (126 kB)
Collecting tokenizers<0.19,>=0.14 (from transformers==4.36.2)
  Downloading tokenizers-0.15.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.7 kB)
Downloading transformers-4.36.2-py3-none-any.whl (8.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading tokenizers-0.15.2-cp312-cp312-macosx_11_0_arm64.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.21.1
    Uninstalling tokenizers-0.21.1:
      Successfully uninstalled tokenizers

In [2]:
# Step 2: Import libraries with careful error handling
import warnings
warnings.filterwarnings('ignore')

print("Importing libraries...")

try:
    import torch
    print(f"✅ PyTorch {torch.__version__} loaded successfully")
except ImportError as e:
    print(f"❌ Failed to import PyTorch: {e}")
    exit()

try:
    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
    import transformers
    print(f"✅ Transformers {transformers.__version__} loaded successfully")
except ImportError as e:
    print(f"❌ Failed to import Transformers: {e}")
    print("Try: pip install transformers==4.36.2")
    exit()

import json
import os
print("✅ All libraries imported successfully!")


Importing libraries...


The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


✅ PyTorch 2.7.1 loaded successfully


0it [00:00, ?it/s]


✅ Transformers 4.36.2 loaded successfully
✅ All libraries imported successfully!


In [3]:
# Step 3: Load the translation model
model_name = "mbazaNLP/Nllb_finetuned_general_en_kin"

print(f"Loading model: {model_name}")
print("This may take a few minutes...")

try:
    # Load tokenizer
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    print(f"✅ Tokenizer loaded. Vocab size: {len(tokenizer)}")
    
    # Load model
    print("Loading model...")
    model = AutoModelForSeq2SeqLM.from_pretrained(
        model_name,
        torch_dtype=torch.float32,  # Use float32 for CPU compatibility
        device_map=None  # Keep on CPU
    )
    print(f"✅ Model loaded successfully!")
    print(f"Model parameters: {model.num_parameters():,}")
    
    # Set language codes
    source_lang = "eng_Latn"  # English
    target_lang = "kin_Latn"  # Kinyarwanda
    
    print(f"Source language: {source_lang}")
    print(f"Target language: {target_lang}")
    
except Exception as e:
    print(f"❌ Error loading model: {e}")
    print("Make sure you have internet connection and sufficient disk space")


Loading model: mbazaNLP/Nllb_finetuned_general_en_kin
This may take a few minutes...
Loading tokenizer...
✅ Tokenizer loaded. Vocab size: 256204
Loading model...
✅ Model loaded successfully!
Model parameters: 1,370,638,336
Source language: eng_Latn
Target language: kin_Latn


In [4]:
# Step 4: Create translation function
def translate_english_to_kinyarwanda(text, max_length=128):
    """
    Translate English text to Kinyarwanda
    """
    try:
        # Set source language
        tokenizer.src_lang = source_lang
        
        # Tokenize input text
        inputs = tokenizer(
            text, 
            return_tensors="pt", 
            max_length=max_length, 
            truncation=True, 
            padding=True
        )
        
        # Generate translation
        with torch.no_grad():
            generated_tokens = model.generate(
                **inputs,
                forced_bos_token_id=tokenizer.convert_tokens_to_ids(target_lang),
                max_length=max_length,
                num_beams=5,
                early_stopping=True,
                no_repeat_ngram_size=2
            )
        
        # Decode the translation
        translation = tokenizer.batch_decode(
            generated_tokens, 
            skip_special_tokens=True
        )[0]
        
        return translation
        
    except Exception as e:
        return f"Translation error: {str(e)}"

print("✅ Translation function created successfully!")


✅ Translation function created successfully!


In [5]:
# Step 5: Test the translation model
test_sentences = [
    "Hello, how are you?",
    "What is your name?",
    "I love learning Kinyarwanda.",
    "Good morning",
    "Thank you very much",
    "Where is the school?",
    "The weather is nice today.",
    "Can you help me?",
    "I am going to school.",
    "I want to learn more about Rwanda."
]

print("Testing English to Kinyarwanda translation:")
print("=" * 60)

for i, sentence in enumerate(test_sentences, 1):
    print(f"\n{i}. English: {sentence}")
    translation = translate_english_to_kinyarwanda(sentence)
    print(f"   Kinyarwanda: {translation}")

print("\n" + "=" * 60)
print("✅ Translation testing completed!")


Testing English to Kinyarwanda translation:

1. English: Hello, how are you?
   Kinyarwanda: Muraho, mumeze mute?

2. English: What is your name?
   Kinyarwanda: witwa nde

3. English: I love learning Kinyarwanda.
   Kinyarwanda: Nkunda kwiga ikinyarwanda.

4. English: Good morning
   Kinyarwanda: Igitondo cyiza

5. English: Thank you very much
   Kinyarwanda: Murakoze cyane

6. English: Where is the school?
   Kinyarwanda: Ishuri riri he 

7. English: The weather is nice today.
   Kinyarwanda: Ikirere ni cyiza uyumunsi. 

8. English: Can you help me?
   Kinyarwanda: Ushobora kumfasha? 

9. English: I am going to school.
   Kinyarwanda: ngiye ku ishuri. 

10. English: I want to learn more about Rwanda.
   Kinyarwanda: Ndashaka kumenya byinshi ku Rwanda. 

✅ Translation testing completed!


In [6]:
# Step 6: Interactive translation
def interactive_translator():
    """
    Interactive translation session
    """
    print("\n" + "=" * 50)
    print("INTERACTIVE ENGLISH TO KINYARWANDA TRANSLATOR")
    print("=" * 50)
    print("Type 'quit' to exit")
    
    while True:
        try:
            english_text = input("\nEnter English text: ").strip()
            
            if english_text.lower() in ['quit', 'exit', 'q']:
                print("Goodbye! / Murabeho!")
                break
                
            if not english_text:
                print("Please enter some text.")
                continue
                
            print("Translating...")
            translation = translate_english_to_kinyarwanda(english_text)
            print(f"Kinyarwanda: {translation}")
            
        except KeyboardInterrupt:
            print("\nGoodbye! / Murabeho!")
            break
        except Exception as e:
            print(f"Error: {e}")

# Run interactive translator
print("Ready for interactive translation!")
print("Uncomment the line below to start interactive mode:")
print("# interactive_translator()")


Ready for interactive translation!
Uncomment the line below to start interactive mode:
# interactive_translator()


In [7]:
# Step 7: Save the model locally
def save_model_locally(save_path="../models/en_kin_translation_simple"):
    """
    Save the model and tokenizer locally
    """
    try:
        os.makedirs(save_path, exist_ok=True)
        
        print(f"Saving model to: {save_path}")
        model.save_pretrained(save_path)
        tokenizer.save_pretrained(save_path)
        
        # Save configuration
        config = {
            "model_name": model_name,
            "source_lang": source_lang,
            "target_lang": target_lang,
            "max_length": 128
        }
        
        with open(f"{save_path}/config.json", "w") as f:
            json.dump(config, f, indent=2)
        
        print("✅ Model saved successfully!")
        return save_path
        
    except Exception as e:
        print(f"❌ Error saving model: {e}")
        return None

# Save the model
saved_path = save_model_locally()
if saved_path:
    print(f"Model saved to: {saved_path}")
    print("You can now load this model later without downloading again.")


Saving model to: ../models/en_kin_translation_simple
✅ Model saved successfully!
Model saved to: ../models/en_kin_translation_simple
You can now load this model later without downloading again.


In [8]:
# Enhanced translation function for longer texts
def translate_long_text(text, max_chunk_length=400, overlap=50):
    """
    Translate longer texts by breaking them into chunks
    """
    # If text is short enough, translate directly
    if len(text.split()) <= 50:  # Short text threshold
        return translate_english_to_kinyarwanda(text, max_length=512)
    
    # For longer texts, break into sentences and translate in chunks
    import re
    
    # Split into sentences
    sentences = re.split(r'[.!?]+', text)
    sentences = [s.strip() for s in sentences if s.strip()]
    
    translated_sentences = []
    current_chunk = ""
    
    for sentence in sentences:
        # If adding this sentence would make chunk too long, translate current chunk
        if len(current_chunk.split()) + len(sentence.split()) > 40:
            if current_chunk:
                translation = translate_english_to_kinyarwanda(current_chunk, max_length=512)
                translated_sentences.append(translation)
                current_chunk = sentence
            else:
                current_chunk = sentence
        else:
            current_chunk = current_chunk + " " + sentence if current_chunk else sentence
    
    # Translate remaining chunk
    if current_chunk:
        translation = translate_english_to_kinyarwanda(current_chunk, max_length=512)
        translated_sentences.append(translation)
    
    # Join translated sentences
    return " ".join(translated_sentences)

print("✅ Enhanced translation function for long texts created!")


✅ Enhanced translation function for long texts created!


In [9]:
# Test with a paragraph
test_paragraph = """
Education is one of the most important aspects of human development. It provides people with knowledge, 
skills, and critical thinking abilities that are essential for personal growth and societal progress. 
In Rwanda, education has been prioritized as a key driver of national development since the 1994 genocide. 
The government has invested heavily in building schools, training teachers, and ensuring that all children 
have access to quality education. This commitment to education has resulted in significant improvements 
in literacy rates and has contributed to Rwanda's economic growth and social transformation.
"""

print("Testing paragraph translation:")
print("=" * 80)
print("ORIGINAL PARAGRAPH (English):")
print(test_paragraph.strip())
print("\n" + "=" * 80)
print("TRANSLATED PARAGRAPH (Kinyarwanda):")

paragraph_translation = translate_long_text(test_paragraph.strip())
print(paragraph_translation)

print("\n" + "=" * 80)
print(f"Original word count: {len(test_paragraph.strip().split())} words")
print(f"Translation completed successfully!")


Testing paragraph translation:
ORIGINAL PARAGRAPH (English):
Education is one of the most important aspects of human development. It provides people with knowledge, 
skills, and critical thinking abilities that are essential for personal growth and societal progress. 
In Rwanda, education has been prioritized as a key driver of national development since the 1994 genocide. 
The government has invested heavily in building schools, training teachers, and ensuring that all children 
have access to quality education. This commitment to education has resulted in significant improvements 
in literacy rates and has contributed to Rwanda's economic growth and social transformation.

TRANSLATED PARAGRAPH (Kinyarwanda):
Uburezi ni kimwe mu bintu byingenzi mu iterambere ryumuntu. Buhesha abantu ubumenyi, ubuhanga, nubushobozi bwo gutekereza neza ari ngombwa kugira ngo umuntu akure kandi atera imbere. Mu Rwanda, uburezi bwashyizwe imbere nkurwego rukomeye rwiterambere ryigihugu kuva jenoside yo mu

In [10]:
# Test with a short essay
test_essay = """
The Importance of Technology in Modern Education

Technology has revolutionized the way we approach education in the 21st century. From traditional 
blackboards to interactive smart boards, from printed textbooks to digital resources, the educational 
landscape has undergone tremendous transformation. This evolution has brought both opportunities and 
challenges that educators and students must navigate carefully.

One of the most significant benefits of technology in education is accessibility. Students can now 
access vast amounts of information instantly through the internet. Online courses and virtual 
classrooms have made education available to people who might not have had access to traditional 
educational institutions due to geographical, physical, or economic constraints. This democratization 
of education has opened doors for millions of learners worldwide.

However, technology also presents challenges. The digital divide means that not all students have 
equal access to technological resources. Some students may lack reliable internet connections or 
modern devices, which can create disparities in learning opportunities. Additionally, the abundance 
of information available online can sometimes overwhelm students and make it difficult for them to 
distinguish between reliable and unreliable sources.

In conclusion, while technology has undoubtedly enhanced educational possibilities, it must be 
implemented thoughtfully and equitably. The goal should be to use technology as a tool to enhance 
human learning and creativity, not to replace the fundamental human connections that make education 
meaningful. As we move forward, we must ensure that technological advances in education benefit all 
students, regardless of their background or circumstances.
"""

print("Testing essay translation:")
print("=" * 100)
print("ORIGINAL ESSAY (English):")
print(test_essay.strip())
print(f"\nEssay length: {len(test_essay.strip().split())} words")
print("\n" + "=" * 100)
print("TRANSLATING ESSAY TO KINYARWANDA...")
print("This may take a moment for longer texts...")

essay_translation = translate_long_text(test_essay.strip())

print("\nTRANSLATED ESSAY (Kinyarwanda):")
print("=" * 100)
print(essay_translation)

print("\n" + "=" * 100)
print("✅ Essay translation completed successfully!")
print(f"Original: {len(test_essay.strip().split())} words")
print("The model can handle essays and longer texts by breaking them into manageable chunks.")


Testing essay translation:
ORIGINAL ESSAY (English):
The Importance of Technology in Modern Education

Technology has revolutionized the way we approach education in the 21st century. From traditional 
blackboards to interactive smart boards, from printed textbooks to digital resources, the educational 
landscape has undergone tremendous transformation. This evolution has brought both opportunities and 
challenges that educators and students must navigate carefully.

One of the most significant benefits of technology in education is accessibility. Students can now 
access vast amounts of information instantly through the internet. Online courses and virtual 
classrooms have made education available to people who might not have had access to traditional 
educational institutions due to geographical, physical, or economic constraints. This democratization 
of education has opened doors for millions of learners worldwide.

However, technology also presents challenges. The digital divide m

In [11]:
# Function to translate custom text input
def translate_custom_text():
    """
    Allow users to input their own paragraphs or essays for translation
    """
    print("\n" + "=" * 80)
    print("CUSTOM TEXT TRANSLATION")
    print("=" * 80)
    print("Enter your English text (paragraph or essay):")
    print("Press Enter twice when finished, or type 'quit' to exit")
    print("-" * 80)
    
    lines = []
    while True:
        try:
            line = input()
            if line.lower().strip() == 'quit':
                return
            if line == "":
                if lines:  # If we have some text and encounter empty line
                    break
                else:
                    continue  # Wait for actual text
            lines.append(line)
        except KeyboardInterrupt:
            print("\nExiting...")
            return
    
    # Join all lines
    custom_text = " ".join(lines).strip()
    
    if not custom_text:
        print("No text entered.")
        return
    
    print(f"\nTranslating {len(custom_text.split())} words...")
    print("Please wait...")
    
    try:
        translation = translate_long_text(custom_text)
        
        print("\n" + "=" * 80)
        print("ORIGINAL TEXT:")
        print("-" * 80)
        print(custom_text)
        print("\n" + "=" * 80)
        print("KINYARWANDA TRANSLATION:")
        print("-" * 80)
        print(translation)
        print("\n" + "=" * 80)
        print("✅ Translation completed!")
        
    except Exception as e:
        print(f"❌ Translation failed: {e}")

print("✅ Custom text translation function ready!")
print("\nTo translate your own text, run:")
print("translate_custom_text()")
print("\nOr uncomment the line below:")
print("# translate_custom_text()")


✅ Custom text translation function ready!

To translate your own text, run:
translate_custom_text()

Or uncomment the line below:
# translate_custom_text()
