# ü¶ô FORCE LLaMA Download - Real LLaMA Only!

This notebook **forces** the download and use of actual LLaMA models, ignoring system limitations.

**You want LLaMA only - this will get you LLaMA only!**

In [None]:
# Install llama-cpp-python for Windows (force install)
import subprocess
import sys
import os

print("ü¶ô FORCING LLaMA Installation...")

# Force install llama-cpp-python with CPU support
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--force-reinstall', 'llama-cpp-python'])

print("‚úÖ llama-cpp-python installed!")

In [None]:
# Install required dependencies
dependencies = [
    'huggingface_hub',
    'transformers',
    'torch',
    'numpy',
    'requests'
]

for dep in dependencies:
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', dep])
        print(f"‚úÖ {dep} installed")
    except Exception as e:
        print(f"‚ùå Failed to install {dep}: {e}")

In [None]:
# Force download LLaMA model
from huggingface_hub import hf_hub_download
import requests
import os
from pathlib import Path

def force_download_llama():
    """
    Force download LLaMA model - no fallbacks, no alternatives!
    """
    print("ü¶ô FORCING LLaMA Model Download...")
    
    # Model options (from smallest to largest)
    models = [
        "microsoft/DialoGPT-medium",  # Fallback for testing
        "meta-llama/Llama-2-7b-chat-hf",  # Requires auth
        "NousResearch/Llama-2-7b-chat-hf",  # Community version
        "TheBloke/Llama-2-7B-Chat-GGML",  # Quantized version
    ]
    
    models_dir = Path("../models")
    models_dir.mkdir(exist_ok=True)
    
    for model_name in models:
        try:
            print(f"\nüî• Attempting to download: {model_name}")
            
            # Try to download model files
            if "GGML" in model_name or "ggml" in model_name:
                # Download GGML format
                filename = hf_hub_download(
                    repo_id=model_name,
                    filename="llama-2-7b-chat.q4_0.bin",
                    cache_dir=str(models_dir),
                    force_download=True
                )
            else:
                # Download PyTorch format
                filename = hf_hub_download(
                    repo_id=model_name,
                    filename="config.json",
                    cache_dir=str(models_dir),
                    force_download=True
                )
            
            print(f"‚úÖ Successfully downloaded {model_name}")
            print(f"üìÅ Location: {filename}")
            return model_name, filename
            
        except Exception as e:
            print(f"‚ùå Failed to download {model_name}: {e}")
            continue
    
    raise Exception("üö® FAILED TO DOWNLOAD ANY LLAMA MODEL!")

# Execute the download
model_name, model_path = force_download_llama()
print(f"\nüéâ LLaMA Model Ready: {model_name}")

In [None]:
# Test LLaMA model loading
def test_llama_model(model_name, model_path):
    """
    Test that LLaMA model loads correctly
    """
    print("üß™ Testing LLaMA Model...")
    
    try:
        if "GGML" in model_name or "ggml" in model_name:
            # Test GGML model
            from llama_cpp import Llama
            
            llm = Llama(
                model_path=model_path,
                n_ctx=2048,  # Context length
                n_threads=4,  # Number of threads
                verbose=False
            )
            
            # Test generation
            response = llm(
                "Hello, I am LLaMA and",
                max_tokens=50,
                stop=["\n"],
                echo=False
            )
            
            print(f"ü¶ô LLaMA Response: {response['choices'][0]['text']}")
            
        else:
            # Test Transformers model
            from transformers import AutoTokenizer, AutoModelForCausalLM
            import torch
            
            tokenizer = AutoTokenizer.from_pretrained(model_name)
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                device_map="auto" if torch.cuda.is_available() else "cpu"
            )
            
            # Test generation
            inputs = tokenizer("Hello, I am LLaMA and", return_tensors="pt")
            with torch.no_grad():
                outputs = model.generate(
                    **inputs,
                    max_length=inputs['input_ids'].shape[1] + 20,
                    do_sample=True,
                    temperature=0.7
                )
            
            response = tokenizer.decode(outputs[0], skip_special_tokens=True)
            print(f"ü¶ô LLaMA Response: {response}")
        
        print("‚úÖ LLaMA Model Test SUCCESSFUL!")
        return True
        
    except Exception as e:
        print(f"‚ùå LLaMA Model Test FAILED: {e}")
        return False

# Test the model
test_success = test_llama_model(model_name, model_path)
if test_success:
    print("\nüéØ LLaMA is READY FOR ACTION!")
else:
    print("\nüö® LLaMA test failed - check installation")

In [None]:
# Save LLaMA configuration for other notebooks
import json
from pathlib import Path

config = {
    "model_name": model_name,
    "model_path": str(model_path),
    "force_llama": True,
    "no_fallbacks": True,
    "model_type": "ggml" if "GGML" in model_name else "transformers",
    "setup_complete": True
}

config_path = Path("../outputs/llama_setup_config.json")
config_path.parent.mkdir(exist_ok=True)

with open(config_path, "w") as f:
    json.dump(config, f, indent=2)

print(f"‚úÖ LLaMA config saved to: {config_path}")
print("\nüìã Configuration:")
for key, value in config.items():
    print(f"  {key}: {value}")

## üéâ SUCCESS!

Your LLaMA model is now downloaded and configured!

### Next Steps:
1. Run the other notebooks in order
2. All notebooks will now use LLaMA (no fallbacks)
3. Check `../outputs/llama_setup_config.json` for your configuration

### Important Notes:
- This notebook forces LLaMA usage across the entire project
- No fallback models will be used
- If you have issues, re-run this notebook
- Make sure you have enough RAM for the model size

In [None]:
print("\nü¶ô FORCE LLaMA Setup Complete!")