In [1]:
# Environment Setup

import subprocess
import sys
import os
import shutil
from pathlib import Path
import warnings

# Suppress warnings for cleaner output
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

print("🔧 Setting up Kaggle environment...")

# Install required packages
packages = [
    "accelerate>=0.20.0",
    "transformers>=4.30.0", 
    "torch>=2.0.0",
    "datasets>=2.10.0",
    "peft>=0.4.0",
    "wandb",
    "numpy<2.0",  # Important for compatibility
]

for package in packages:
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", package, "--quiet"])
        print(f"✅ Installed {package}")
    except:
        print(f"⚠️ Failed to install {package}")

print("✅ Environment setup complete!")

🔧 Setting up Kaggle environment...
✅ Installed accelerate>=0.20.0
✅ Installed transformers>=4.30.0
✅ Installed torch>=2.0.0
✅ Installed datasets>=2.10.0
✅ Installed peft>=0.4.0
✅ Installed wandb
✅ Installed numpy<2.0
✅ Environment setup complete!


In [2]:
import zipfile

# Option A: Extract from uploaded dataset
dataset_path = "/kaggle/working/"
project_dirs = list(Path(dataset_path).glob("**/math_pe_research.zip"))

if project_dirs:
    zip_path = project_dirs[0]
    print(f"📁 Found project zip: {zip_path}")
    
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall("/kaggle/working")
    print("✅ Project extracted successfully!")
else:
    # Option B: Create project structure manually (if no dataset upload)
    print("📁 Creating project structure manually...")
    
    # This would require you to upload individual files
    # For now, we'll assume you uploaded as a dataset
    print("⚠️ No zip found. Please upload your project as a dataset.")

# Verify project structure
project_path = Path("/kaggle/working/Transformer/math_pe_research")
if project_path.exists():
    print(f"✅ Project found at: {project_path}")
    print("📂 Project structure:")
    for item in project_path.rglob("*"):
        if item.is_file() and item.suffix in ['.py', '.md', '.txt']:
            print(f"   {item.relative_to(project_path)}")
else:
    print("❌ Project not found. Check your upload.")

📁 Creating project structure manually...
⚠️ No zip found. Please upload your project as a dataset.
✅ Project found at: /kaggle/working/Transformer/math_pe_research
📂 Project structure:
   README.md
   EXPERIMENT_ANALYSIS.md
   requirements.txt
   comprehensive_pe_test.py
   test_architecture_compatibility.py
   src/positional_encoding/diet.py
   src/positional_encoding/__init__.py
   src/positional_encoding/math_adaptive.py
   src/positional_encoding/sinusoidal.py
   src/positional_encoding/t5_relative.py
   src/positional_encoding/alibi.py
   src/positional_encoding/rope.py
   src/data/math_dataset_loader.py
   src/models/mathematical_reasoning_model.py
   scripts/simulate_experiment.py
   scripts/comprehensive_test.py
   scripts/simple_simulation.py
   scripts/train_and_eval.py


In [3]:
# # project setup thorugh dataset

# !rm -rf /kaggle/working/Transformer/
# # Unzip to /kaggle/working/
# # %cd /kaggle/input/transformer/
# !apt install tree
# !tree ..
# !zip -r transformer.zip /kaggle/input/transformer/Transformer/

# import zipfile
# z=zipfile.ZipFile('transformer.zip')
# z.extractall()

# %cd /kaggle/working/kaggle/input/transformer/

# !zip -r transformer.zip Transformer/

# %mv transformer.zip /kaggle/working/
# %cd /kaggle/working/
# %rm -rf kaggle

# z=zipfile.ZipFile('transformer.zip')
# z.extractall()

# %cd /kaggle/working/Transformer/
# !tree ..

In [4]:
# import torch
# from pathlib import Path

# # Create required directories
# directories = [
#     '/kaggle/working/checkpoints',
#     '/kaggle/working/evaluation_results',
#     '/kaggle/working/data_cache',
#     '/kaggle/working/logs'
# ]

# for dir_path in directories:
#     Path(dir_path).mkdir(parents=True, exist_ok=True)
#     print(f"✅ Created: {dir_path}")

# # Check GPU
# if torch.cuda.is_available():
#     gpu_name = torch.cuda.get_device_name(0)
#     gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
#     print(f"\n🚀 GPU Available: {gpu_name} ({gpu_memory:.1f} GB)")
#     print(f"   CUDA Version: {torch.version.cuda}")
# else:
#     print("\n❌ NO GPU AVAILABLE!")
#     print("   Enable GPU: Settings → Accelerator → GPU T4 x2")

# # Find project directory
# project_dirs = list(Path('/kaggle/working').glob('**/math_pe_research'))
# if project_dirs:
#     project_dir = project_dirs[0]
#     print(f"\n✅ Project found: {project_dir}")
# else:
#     print("\n❌ Project not found. Check extraction step.")
#     # Try manual path
#     possible_paths = [
#         '/kaggle/working/Transformer/math_pe_research',
#         '/kaggle/working/math_pe_research'
#     ]
#     for path in possible_paths:
#         if Path(path).exists():
#             project_dir = Path(path)
#             print(f"✅ Found at: {project_dir}")
#             break

In [5]:

# 🎯 TRAINING CONFIGURATION
# Modify these settings as needed

CONFIG = {
    # Model settings
    'model_size': 'EleutherAI/pythia-2.8b',  # Options: pythia-70m, pythia-410m, pythia-1.4b, pythia-2.8b
    'pe_method': 'rope',  # Options: 'rope', 'sinusoidal', 't5_relative', 'diet', 'alibi'
    
    # Training settings
    'batch_size': 4,
    'max_steps': 500,
    'learning_rate': 2e-5,
    'max_length': 1024,
    'use_lora': True,  # Recommended for Kaggle
    
    # Data settings
    'datasets': 'gsm8k,math',  # Available: gsm8k, math, mathqa
    'data_fraction': 0.1,  # Use 10% of data for faster training
    
    # Experiment settings
    'experiment_name': 'kaggle_math_pe_experiment',
    'wandb_project': 'kaggle_math_reasoning',
    
    # Kaggle-specific settings
    'save_steps': 100,
    'eval_steps': 100,
    'logging_steps': 50,
}

# Quick configurations for different use cases
QUICK_CONFIGS = {
    'fast_test': {
        'model_size': 'EleutherAI/pythia-70m',
        'max_steps': 50,
        'batch_size': 8,
        'max_length': 512,
    },
    'production': {
        'model_size': 'EleutherAI/pythia-2.8b',
        'max_steps': 1000,
        'batch_size': 4,
        'max_length': 1024,
    },
    'math_specialized': {
        'model_size': 'wellecks/llmstep-mathlib4-pythia2.8b',
        'pe_method': 'sinusoidal',
        'max_steps': 500,
        'batch_size': 2,
    }
}

# Uncomment to use a quick configuration:
# CONFIG.update(QUICK_CONFIGS['fast_test'])  # For quick testing
# CONFIG.update(QUICK_CONFIGS['production'])  # For full training
# CONFIG.update(QUICK_CONFIGS['math_specialized'])  # For math-specialized model

print("🎯 Configuration loaded:")
for key, value in CONFIG.items():
    print(f"   {key}: {value}")


🎯 Configuration loaded:
   model_size: EleutherAI/pythia-2.8b
   pe_method: rope
   batch_size: 4
   max_steps: 500
   learning_rate: 2e-05
   max_length: 1024
   use_lora: True
   datasets: gsm8k,math
   data_fraction: 0.1
   experiment_name: kaggle_math_pe_experiment
   wandb_project: kaggle_math_reasoning
   save_steps: 100
   eval_steps: 100
   logging_steps: 50


In [6]:

import subprocess
import shutil
from pathlib import Path
import os

# 🧹 Setup directories
print("🗂️ Setting up directories...")

directories = {
    'cache_dir': '/tmp/model_cache',
    'checkpoint_dir': '/kaggle/working/checkpoints',
    'result_dir': '/kaggle/working/results'
}

for name, dir_path in directories.items():
    Path(dir_path).mkdir(parents=True, exist_ok=True)
    print(f"✅ Created {name}: {dir_path}")

# 🔧 Environment variables
os.environ['HF_HOME'] = directories['cache_dir']
os.environ['TOKENIZERS_PARALLELISM'] = 'false'
# os.environ['WANDB_API_KEY'] = 'your_wandb_key_here'  # Uncomment and add your W&B key

# 📁 Find project directory
project_dir = Path("/kaggle/working/Transformer/math_pe_research")
if not project_dir.exists():
    print("❌ Project directory not found!")
    print("Please ensure you've uploaded the project correctly in Cell 2.")
    exit()

print(f"📁 Using project: {project_dir}")

# 🚀 Build training command
cmd_parts = [
    f"cd {project_dir}",
    "python scripts/train_and_eval.py",
    f"--pe {CONFIG['pe_method']}",
    f"--batch_size {CONFIG['batch_size']}",
    f"--max_steps {CONFIG['max_steps']}",
    f"--learning_rate {CONFIG['learning_rate']}",
    f"--experiment_name {CONFIG['experiment_name']}",
    f"--checkpoint_dir {directories['checkpoint_dir']}",
    f"--result_dir {directories['result_dir']}",
    f"--cache_dir {directories['cache_dir']}",
    f"--max_length {CONFIG['max_length']}",
    f"--model_size {CONFIG['model_size']}",
    f"--datasets {CONFIG['datasets']}",
    f"--wandb_project {CONFIG['wandb_project']}",
    f"--save_steps {CONFIG['save_steps']}",
    f"--eval_steps {CONFIG['eval_steps']}",
    f"--logging_steps {CONFIG['logging_steps']}"
]

if CONFIG.get('use_lora', True):
    cmd_parts.append("--use_lora")

cmd = " \\\n    ".join(cmd_parts)

print(f"""
🚀 STARTING TRAINING WITH {CONFIG['pe_method'].upper()} PE
{'='*60}

📊 Configuration:
   🎯 Model: {CONFIG['model_size']}
   🔧 PE Method: {CONFIG['pe_method']}
   📈 Batch Size: {CONFIG['batch_size']}
   🎓 Max Steps: {CONFIG['max_steps']}
   📏 Max Length: {CONFIG['max_length']}
   💡 Learning Rate: {CONFIG['learning_rate']}
   🔗 LoRA: {CONFIG.get('use_lora', True)}

📝 Command:
{cmd}

{'='*60}
""")

# Execute training
try:
    # Check available space
    statvfs = os.statvfs('/kaggle/working/Transformer')
    free_space_gb = (statvfs.f_frsize * statvfs.f_bavail) / (1024**3)
    print(f"💾 Available space: {free_space_gb:.1f} GB")
    
    result = subprocess.run(cmd, shell=True, capture_output=False, text=True)
    if result.returncode == 0:
        print("\n🎉 Training completed successfully!")
    else:
        print(f"\n❌ Training failed with return code: {result.returncode}")
except KeyboardInterrupt:
    print("\n⚠️ Training interrupted by user")
except Exception as e:
    print(f"\n❌ Training failed: {e}")

print(f"\n📁 Results saved to: {directories['result_dir']}")
print(f"💾 Checkpoints saved to: {directories['checkpoint_dir']}")

🗂️ Setting up directories...
✅ Created cache_dir: /tmp/model_cache
✅ Created checkpoint_dir: /kaggle/working/checkpoints
✅ Created result_dir: /kaggle/working/results
📁 Using project: /kaggle/working/Transformer/math_pe_research

🚀 STARTING TRAINING WITH ROPE PE

📊 Configuration:
   🎯 Model: EleutherAI/pythia-2.8b
   🔧 PE Method: rope
   📈 Batch Size: 4
   🎓 Max Steps: 500
   📏 Max Length: 1024
   💡 Learning Rate: 2e-05
   🔗 LoRA: True

📝 Command:
cd /kaggle/working/Transformer/math_pe_research \
    python scripts/train_and_eval.py \
    --pe rope \
    --batch_size 4 \
    --max_steps 500 \
    --learning_rate 2e-05 \
    --experiment_name kaggle_math_pe_experiment \
    --checkpoint_dir /kaggle/working/checkpoints \
    --result_dir /kaggle/working/results \
    --cache_dir /tmp/model_cache \
    --max_length 1024 \
    --model_size EleutherAI/pythia-2.8b \
    --datasets gsm8k,math \
    --wandb_project kaggle_math_reasoning \
    --save_steps 100 \
    --eval_steps 100 \
    --log

In [7]:

import json
import pandas as pd
from pathlib import Path

# 📊 Load and display results
result_dir = Path("/kaggle/working/results")
checkpoint_dir = Path("/kaggle/working/checkpoints")

print("📊 TRAINING RESULTS ANALYSIS")
print("="*50)

# Check for results files
result_files = list(result_dir.glob("*.json"))
if result_files:
    print(f"✅ Found {len(result_files)} result files:")
    for file in result_files:
        print(f"   📄 {file.name}")
        
        # Load and display results
        try:
            with open(file, 'r') as f:
                results = json.load(f)
            
            print(f"\n📈 Results from {file.name}:")
            for key, value in results.items():
                if isinstance(value, (int, float)):
                    print(f"   {key}: {value:.4f}")
                else:
                    print(f"   {key}: {value}")
        except Exception as e:
            print(f"   ⚠️ Error reading {file.name}: {e}")
else:
    print("⚠️ No result files found")

# Check for checkpoints
checkpoint_files = list(checkpoint_dir.glob("**/*.bin"))
if checkpoint_files:
    print(f"\n💾 Found {len(checkpoint_files)} checkpoint files:")
    for file in checkpoint_files[-5:]:  # Show last 5
        size_mb = file.stat().st_size / 1024 / 1024
        print(f"   📄 {file.name} ({size_mb:.1f} MB)")
else:
    print("\n⚠️ No checkpoint files found")

# Display model summary
print(f"\n🎯 MODEL SUMMARY:")
print(f"   PE Method: {CONFIG['pe_method']}")
print(f"   Base Model: {CONFIG['model_size']}")
print(f"   Training Steps: {CONFIG['max_steps']}")
print(f"   LoRA Enabled: {CONFIG.get('use_lora', True)}")

print("\n✅ Analysis complete!")


📊 TRAINING RESULTS ANALYSIS
⚠️ No result files found

⚠️ No checkpoint files found

🎯 MODEL SUMMARY:
   PE Method: rope
   Base Model: EleutherAI/pythia-2.8b
   Training Steps: 500
   LoRA Enabled: True

✅ Analysis complete!


In [12]:
import torch
import sys
from pathlib import Path

# Add project to path
sys.path.insert(0, str(Path("/kaggle/working/Transformer/math_pe_research/src")))

try:
    from models.mathematical_reasoning_model import create_mathematical_reasoning_model
    
    print("🧪 TESTING TRAINED MODEL")
    print("="*40)
    
    # Load the trained model
    print("📥 Loading trained model...")
    
    model = create_mathematical_reasoning_model(
        pe_method=CONFIG['pe_method'],
        base_model=CONFIG['model_size'],
        load_in_4bit=False,
        use_lora=CONFIG.get('use_lora', True),
        device_map=None,
        torch_dtype=torch.float16
    )
    
    print(f"✅ Model loaded with {CONFIG['pe_method']} PE")
    
    # Test problems
    test_problems = [
        "What is 15 + 27?",
        "If a rectangle has length 8 and width 5, what is its area?", 
        "Solve for x: 2x + 5 = 13",
        "What is the square root of 144?",
        "A train travels 120 miles in 2 hours. What is its speed?"
    ]
    
    print("\n🧮 Testing mathematical reasoning:")
    print("-" * 40)
    
    for i, problem in enumerate(test_problems, 1):
        print(f"\n{i}. Problem: {problem}")
        try:
            solution = model.solve_math_problem(
                problem, 
                max_length=200, 
                temperature=0.1
            )
            print(f"   Solution: {solution}")
        except Exception as e:
            print(f"   Error: {e}")
    
    print(f"\n🎉 Model testing completed!")
    
except Exception as e:
    print(f"❌ Model testing failed: {e}")
    print("This might happen if training didn't complete successfully.")
    import traceback
    traceback.print_exc()

2025-07-24 21:10:06.215643: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1753391406.596532     393 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1753391406.701553     393 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


🧪 TESTING TRAINED MODEL
📥 Loading trained model...


tokenizer_config.json:   0%|          | 0.00/396 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/5.68G [00:00<?, ?B/s]

The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
The new lm_head weights will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


trainable params: 52,428,800 || all params: 2,827,801,641 || trainable%: 1.8540
✅ Model loaded with rope PE

🧮 Testing mathematical reasoning:
----------------------------------------

1. Problem: What is 15 + 27?
   Error: shape '[1, 80, 3, 8, 80]' is invalid for input of size 614400

2. Problem: If a rectangle has length 8 and width 5, what is its area?
   Error: shape '[1, 89, 3, 8, 80]' is invalid for input of size 683520

3. Problem: Solve for x: 2x + 5 = 13
   Error: shape '[1, 84, 3, 8, 80]' is invalid for input of size 645120

4. Problem: What is the square root of 144?
   Error: shape '[1, 82, 3, 8, 80]' is invalid for input of size 629760

5. Problem: A train travels 120 miles in 2 hours. What is its speed?
   Error: shape '[1, 88, 3, 8, 80]' is invalid for input of size 675840

🎉 Model testing completed!


In [9]:
# Uncomment this cell to test different PE methods
"""
print("🔄 TESTING DIFFERENT PE METHODS")
print("="*50)

pe_methods = ['rope', 'sinusoidal', 't5_relative', 'diet', 'alibi']
test_problem = "What is 12 * 8?"

for pe_method in pe_methods:
    print(f"\n🔧 Testing {pe_method.upper()} PE:")
    try:
        model = create_mathematical_reasoning_model(
            pe_method=pe_method,
            base_model='EleutherAI/pythia-70m',  # Use small model for quick testing
            load_in_4bit=False,
            use_lora=False,
            device_map=None,
            torch_dtype=torch.float32
        )
        
        solution = model.solve_math_problem(test_problem, max_length=100, temperature=0.1)
        print(f"   ✅ {pe_method}: {solution}")
        
    except Exception as e:
        print(f"   ❌ {pe_method}: {e}")

print("\n✅ PE method comparison complete!")
"""

'\nprint("🔄 TESTING DIFFERENT PE METHODS")\nprint("="*50)\n\npe_methods = [\'rope\', \'sinusoidal\', \'t5_relative\', \'diet\', \'alibi\']\ntest_problem = "What is 12 * 8?"\n\nfor pe_method in pe_methods:\n    print(f"\n🔧 Testing {pe_method.upper()} PE:")\n    try:\n        model = create_mathematical_reasoning_model(\n            pe_method=pe_method,\n            base_model=\'EleutherAI/pythia-70m\',  # Use small model for quick testing\n            load_in_4bit=False,\n            use_lora=False,\n            device_map=None,\n            torch_dtype=torch.float32\n        )\n        \n        solution = model.solve_math_problem(test_problem, max_length=100, temperature=0.1)\n        print(f"   ✅ {pe_method}: {solution}")\n        \n    except Exception as e:\n        print(f"   ❌ {pe_method}: {e}")\n\nprint("\n✅ PE method comparison complete!")\n'

In [10]:
print("""
🎉 KAGGLE DEPLOYMENT COMPLETE!

📊 What you accomplished:
   ✅ Set up environment with all dependencies
   ✅ Extracted and verified project structure  
   ✅ Configured training parameters
   ✅ Executed training with chosen PE method
   ✅ Analyzed results and model performance
   ✅ Tested trained model on mathematical problems

📝 Next steps:
   1. Experiment with different PE methods
   2. Try different model sizes  
   3. Adjust hyperparameters for better performance
   4. Compare results across configurations
   5. Share your findings!

🔗 Resources:
   - Project documentation: /kaggle/working/math_pe_research/README.md
   - Results: /kaggle/working/results/
   - Checkpoints: /kaggle/working/checkpoints/
   - Full deployment guide: /kaggle/working/math_pe_research/KAGGLE_DEPLOYMENT_GUIDE.md

Happy experimenting! 🚀🔥
""") 


🎉 KAGGLE DEPLOYMENT COMPLETE!

📊 What you accomplished:
   ✅ Set up environment with all dependencies
   ✅ Extracted and verified project structure  
   ✅ Configured training parameters
   ✅ Executed training with chosen PE method
   ✅ Analyzed results and model performance
   ✅ Tested trained model on mathematical problems

📝 Next steps:
   1. Experiment with different PE methods
   2. Try different model sizes  
   3. Adjust hyperparameters for better performance
   4. Compare results across configurations
   5. Share your findings!

🔗 Resources:
   - Project documentation: /kaggle/working/math_pe_research/README.md
   - Results: /kaggle/working/results/
   - Checkpoints: /kaggle/working/checkpoints/
   - Full deployment guide: /kaggle/working/math_pe_research/KAGGLE_DEPLOYMENT_GUIDE.md

Happy experimenting! 🚀🔥



In [11]:
# # Monitor training progress
# from pathlib import Path
# import json

# checkpoint_dir = Path('/kaggle/working/checkpoints')
# results_dir = Path('/kaggle/working/evaluation_results')

# print("📊 Training Progress:")

# # Check checkpoints
# if checkpoint_dir.exists():
#     checkpoints = list(checkpoint_dir.glob('**/*'))
#     print(f"   📁 Checkpoints: {len(checkpoints)} files")
#     for ckpt in checkpoints[:5]:  # Show first 5
#         if ckpt.is_file():
#             size_mb = ckpt.stat().st_size / (1024 * 1024)
#             print(f"      📄 {ckpt.name} ({size_mb:.1f} MB)")

# # Check results
# if results_dir.exists():
#     results = list(results_dir.glob('*.json'))
#     print(f"   📈 Results: {len(results)} files")
#     for result_file in results:
#         try:
#             with open(result_file, 'r') as f:
#                 data = json.load(f)
#             print(f"      📊 {result_file.name}: {list(data.keys())}")
#         except:
#             print(f"      📄 {result_file.name}")

# print("\n🔗 W&B Dashboard: Check your W&B project for live metrics")