<a href="https://colab.research.google.com/github/hemanth/notebooks/blob/main/gemma3-test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
#!/usr/bin/env python3
"""
Test script for Gemma 3 - Google's latest open model
Based on the Jupyter notebook functionality
"""

import warnings
warnings.filterwarnings('ignore')

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import time
import json

def print_header(title):
    """Print a formatted header"""
    print("\n" + "="*60)
    print(f" {title}")
    print("="*60)

def test_basic_functionality():
    """Test basic Gemma 3 loading and inference"""
    print_header("TESTING GEMMA 3 BASIC FUNCTIONALITY")

    # Model configuration
    model_id = "google/gemma-3-1b-it"  # Using 1B instruct model

    print(f"📦 Loading model: {model_id}")
    print("⏳ This may take a few minutes for first-time download...")

    try:
        # Load tokenizer
        print("🔧 Loading tokenizer...")
        tokenizer = AutoTokenizer.from_pretrained(
            model_id,
            trust_remote_code=True
        )
        print("✅ Tokenizer loaded successfully!")

        # Load model
        print("🔧 Loading model...")
        model = AutoModelForCausalLM.from_pretrained(
            model_id,
            torch_dtype=torch.bfloat16,
            device_map="auto",
            trust_remote_code=True
        )
        print("✅ Model loaded successfully!")

        # Print model info
        total_params = sum(p.numel() for p in model.parameters())
        print(f"📊 Model parameters: {total_params:,}")
        print(f"🖥️  Device: {next(model.parameters()).device}")
        print(f"🔢 Data type: {next(model.parameters()).dtype}")

        return model, tokenizer

    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return None, None

def test_inference(model, tokenizer):
    """Test basic inference capabilities"""
    print_header("TESTING INFERENCE CAPABILITIES")

    if model is None or tokenizer is None:
        print("❌ Cannot test inference - model not loaded")
        return

    # Test prompts
    test_prompts = [
        "What is artificial intelligence?",
        "Write a Python function to calculate fibonacci numbers:",
        "Explain quantum computing in simple terms:",
        "What are the benefits of renewable energy?"
    ]

    for i, prompt in enumerate(test_prompts, 1):
        print(f"\n🧪 Test {i}/4: {prompt}")
        print("-" * 50)

        try:
            # Tokenize input
            inputs = tokenizer(prompt, return_tensors="pt")
            if torch.backends.mps.is_available():
                inputs = {k: v.to("mps") for k, v in inputs.items()}

            # Generate response
            start_time = time.time()
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=150,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=tokenizer.eos_token_id
                )

            end_time = time.time()

            # Decode response
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            generated_text = response[len(prompt):].strip()

            print(f"💬 Response: {generated_text}")
            print(f"⏱️  Generation time: {end_time - start_time:.2f} seconds")

        except Exception as e:
            print(f"❌ Error during inference: {e}")

def test_multilingual_capabilities(model, tokenizer):
    """Test multilingual capabilities"""
    print_header("TESTING MULTILINGUAL CAPABILITIES")

    if model is None or tokenizer is None:
        print("❌ Cannot test multilingual - model not loaded")
        return

    multilingual_prompts = [
        ("English", "Hello, how are you?"),
        ("Spanish", "Hola, ¿cómo estás?"),
        ("French", "Bonjour, comment allez-vous?"),
        ("German", "Hallo, wie geht es dir?"),
        ("Japanese", "こんにちは、元気ですか？")
    ]

    for language, prompt in multilingual_prompts:
        print(f"\n🌍 Testing {language}: {prompt}")
        print("-" * 50)

        try:
            inputs = tokenizer(prompt, return_tensors="pt")
            if torch.backends.mps.is_available():
                inputs = {k: v.to("mps") for k, v in inputs.items()}

            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=100,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=tokenizer.eos_token_id
                )

            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            generated_text = response[len(prompt):].strip()

            print(f"💬 Response: {generated_text}")

        except Exception as e:
            print(f"❌ Error with {language}: {e}")

def test_code_generation(model, tokenizer):
    """Test code generation capabilities"""
    print_header("TESTING CODE GENERATION")

    if model is None or tokenizer is None:
        print("❌ Cannot test code generation - model not loaded")
        return

    code_prompts = [
        "Write a Python function to sort a list:",
        "Create a JavaScript function to validate email:",
        "Write a SQL query to find top 10 customers:",
        "Create a React component for a button:"
    ]

    for prompt in code_prompts:
        print(f"\n💻 Code prompt: {prompt}")
        print("-" * 50)

        try:
            inputs = tokenizer(prompt, return_tensors="pt")
            if torch.backends.mps.is_available():
                inputs = {k: v.to("mps") for k, v in inputs.items()}

            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=200,
                    temperature=0.3,  # Lower temperature for more precise code
                    do_sample=True,
                    pad_token_id=tokenizer.eos_token_id
                )

            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            generated_code = response[len(prompt):].strip()

            print(f"💻 Generated code:\n{generated_code}")

        except Exception as e:
            print(f"❌ Error generating code: {e}")

def run_performance_benchmark(model, tokenizer):
    """Run performance benchmark"""
    print_header("PERFORMANCE BENCHMARK")

    if model is None or tokenizer is None:
        print("❌ Cannot run benchmark - model not loaded")
        return

    benchmark_prompt = "Write a detailed explanation of machine learning:"

    # Different token lengths
    token_lengths = [50, 100, 200]

    for max_tokens in token_lengths:
        print(f"\n⚡ Benchmark - {max_tokens} tokens:")
        print("-" * 30)

        try:
            inputs = tokenizer(benchmark_prompt, return_tensors="pt")
            if torch.backends.mps.is_available():
                inputs = {k: v.to("mps") for k, v in inputs.items()}

            start_time = time.time()
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_new_tokens=max_tokens,
                    temperature=0.7,
                    do_sample=True,
                    pad_token_id=tokenizer.eos_token_id
                )
            end_time = time.time()

            generation_time = end_time - start_time
            tokens_per_second = max_tokens / generation_time

            print(f"⏱️  Time: {generation_time:.2f} seconds")
            print(f"🚀 Speed: {tokens_per_second:.2f} tokens/second")

        except Exception as e:
            print(f"❌ Benchmark error: {e}")

def main():
    """Main test function"""
    print_header("GEMMA 3 COMPREHENSIVE TEST SUITE")
    print("🔬 Testing Google's Gemma 3 model capabilities")
    print("📚 Based on: https://blog.google/technology/developers/gemma-3/")

    # Load model
    model, tokenizer = test_basic_functionality()

    if model is not None and tokenizer is not None:
        # Run all tests
        test_inference(model, tokenizer)
        test_multilingual_capabilities(model, tokenizer)
        test_code_generation(model, tokenizer)
        run_performance_benchmark(model, tokenizer)

        print_header("TEST SUITE COMPLETED SUCCESSFULLY! 🎉")
        print("✅ All tests completed")
        print("📊 Gemma 3 is working correctly on your system")
        print("🚀 You can now use the model for your projects")
    else:
        print_header("TEST SUITE FAILED ❌")
        print("❌ Could not load Gemma 3 model")
        print("💡 Check your internet connection and try again")

if __name__ == "__main__":
    main()


 GEMMA 3 COMPREHENSIVE TEST SUITE
🔬 Testing Google's Gemma 3 model capabilities
📚 Based on: https://blog.google/technology/developers/gemma-3/

 TESTING GEMMA 3 BASIC FUNCTIONALITY
📦 Loading model: google/gemma-3-1b-it
⏳ This may take a few minutes for first-time download...
🔧 Loading tokenizer...
✅ Tokenizer loaded successfully!
🔧 Loading model...
✅ Model loaded successfully!
📊 Model parameters: 999,885,952
🖥️  Device: cpu
🔢 Data type: torch.bfloat16

 TESTING INFERENCE CAPABILITIES

🧪 Test 1/4: What is artificial intelligence?
--------------------------------------------------
💬 Response: Artificial intelligence (AI) refers to the simulation of human intelligence processes by computer systems. It’s not about creating robots that think exactly like us, but rather about developing computer systems that can perform tasks that typically require human intelligence, such as learning, problem-solving, and decision-making.

Here's a breakdown of key aspects:

* **Machine Learning (ML):** A 