In [1]:
import os
import sys
import subprocess
from pathlib import Path
from huggingface_hub import snapshot_download
import shutil

In [2]:
# Configuration
MODEL_NAME = "coderop12/gemma2b-nirf-lookup-2025"
LOCAL_MODEL_DIR = "./gemma2b-nirf-lookup"
GGUF_OUTPUT_DIR = "./gguf_output"

# Create directories
os.makedirs(LOCAL_MODEL_DIR, exist_ok=True)
os.makedirs(GGUF_OUTPUT_DIR, exist_ok=True)

print(f"Model will be downloaded to: {LOCAL_MODEL_DIR}")
print(f"GGUF files will be saved to: {GGUF_OUTPUT_DIR}")

Model will be downloaded to: ./gemma2b-nirf-lookup
GGUF files will be saved to: ./gguf_output


In [3]:
# Download the model from Hugging Face
print("Downloading model from Hugging Face...")
try:
    snapshot_download(
        repo_id=MODEL_NAME,
        local_dir=LOCAL_MODEL_DIR,
        local_dir_use_symlinks=False
    )
    print("✅ Model downloaded successfully!")
    
    # List downloaded files
    print("\nDownloaded files:")
    for file in os.listdir(LOCAL_MODEL_DIR):
        print(f"  - {file}")
        
except Exception as e:
    print(f"❌ Error downloading model: {e}")

Downloading model from Hugging Face...


For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder.


Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

✅ Model downloaded successfully!

Downloaded files:
  - .gitattributes
  - special_tokens_map.json
  - generation_config.json
  - model-00001-of-00002.safetensors
  - tokenizer_config.json
  - model-00002-of-00002.safetensors
  - chat_template.jinja
  - config.json
  - README.md
  - tokenizer.json
  - NOTICE
  - .cache
  - model.safetensors.index.json
  - tokenizer.model


In [21]:
# Check if llama.cpp is available
def check_llama_cpp():
    try:
        result = subprocess.run(['python', '-c', 'import llama_cpp'], 
                              capture_output=True, text=True)
        return result.returncode == 0
    except:
        return False

def check_conversion_script():
    # Common paths where convert script might be
    possible_paths = [
        "convert-hf-to-gguf.py",
        "./llama.cpp/convert-hf-to-gguf.py",
        "convert_hf_to_gguf.py"
    ]
    
    for path in possible_paths:
        if os.path.exists(path):
            return path
    return None

print("Checking llama.cpp installation...")
if check_llama_cpp():
    print("✅ llama-cpp-python found")
else:
    print("❌ llama-cpp-python not found - you may need to install it")

convert_script = check_conversion_script()
if convert_script:
    print(f"✅ Conversion script found: {convert_script}")
    CONVERT_SCRIPT = convert_script
else:
    print("❌ Conversion script not found - you may need to clone llama.cpp repo")
    CONVERT_SCRIPT = None

Checking llama.cpp installation...
❌ llama-cpp-python not found - you may need to install it
❌ Conversion script not found - you may need to clone llama.cpp repo


In [22]:
# Install llama-cpp-python and clone llama.cpp repo
print("Installing llama-cpp-python...")
try:
    subprocess.run([sys.executable, '-m', 'pip', 'install', 'llama-cpp-python'], 
                   check=True)
    print("✅ llama-cpp-python installed successfully!")
except subprocess.CalledProcessError as e:
    print(f"❌ Failed to install llama-cpp-python: {e}")

print("\nCloning llama.cpp repository...")
try:
    subprocess.run(['git', 'clone', 'https://github.com/ggerganov/llama.cpp.git'], 
                   check=True)
    print("✅ llama.cpp repository cloned successfully!")
    
    # Set the conversion script path
    CONVERT_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
    print(f"Conversion script available at: {CONVERT_SCRIPT}")
    
except subprocess.CalledProcessError as e:
    print(f"❌ Failed to clone llama.cpp: {e}")

Installing llama-cpp-python...
Collecting llama-cpp-python
  Using cached llama_cpp_python-0.3.16.tar.gz (50.7 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Collecting diskcache>=5.6.1 (from llama-cpp-python)
  Using cached diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)
Using cached diskcache-5.6.3-py3-none-any.whl (45 kB)
Building wheels for collected packages: llama-cpp-python
  Building wheel for llama-cpp-python (pyproject.toml): started
  Building wheel for llama-cpp-python (pyproject.toml): still running...
  Building wheel for llama-cpp-python (pyproject.toml): finished with status 'error'

  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mBuilding wheel for llama-cpp-python [0m[1;32m([0m[32mpyproject.toml[0m[1;32m)[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m [31m[171 lines of output][0m
  [31m   [0m [32m*** [1mscikit-build-core 0.11.6[0m using [34mCMake 3.28.3[39m[0m [31m(wheel)[0m
  [31m   [0m [32m***[0m [1mConfiguring CMake...[0m
  [31m   [0m loading initial cache file /tmp/tmpuvi2lzk4/build/CMakeInit.txt
  [31m   [0m -- The C compiler identification is GNU 13.3.0
  [31m   [0m -- The CXX compiler identification is GNU 13.3.0
  [31m   [0m -- Detecting C compiler ABI info
  [31m   [0m -- Detecting C compiler ABI info - done
  [31m   [0m -- Check for working C compiler: /usr/bin/gcc - skipped
  [31m   [0m -- Detecting C compile features
  [31m   [0m -- Detecting C compile features - done
  [31m   [0m -- Detecting CXX compiler ABI info
  [31m   [0m -- Detecting CXX


✅ llama.cpp repository cloned successfully!
Conversion script available at: ./llama.cpp/convert_hf_to_gguf.py


In [4]:
# Try installing a pre-built version or use alternative method
print("Trying alternative installation methods...")

# First, try installing without building from source
try:
    subprocess.run([sys.executable, '-m', 'pip', 'install', 
                   'llama-cpp-python', '--only-binary=all'], 
                   check=True)
    print("✅ Pre-built llama-cpp-python installed successfully!")
except subprocess.CalledProcessError:
    print("❌ Pre-built version failed, trying with environment variables...")
    
    # Try with specific environment variables to fix OpenMP issues
    env = os.environ.copy()
    env['CMAKE_ARGS'] = '-DGGML_OPENMP=OFF'
    
    try:
        subprocess.run([sys.executable, '-m', 'pip', 'install', 'llama-cpp-python'], 
                       env=env, check=True)
        print("✅ llama-cpp-python installed without OpenMP!")
    except subprocess.CalledProcessError:
        print("❌ Still failing, let's proceed with just the conversion script")

# Verify we have the conversion script
CONVERT_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
if os.path.exists(CONVERT_SCRIPT):
    print(f"✅ Conversion script ready: {CONVERT_SCRIPT}")
else:
    print("❌ Conversion script not found")

Trying alternative installation methods...


✅ Pre-built llama-cpp-python installed successfully!
✅ Conversion script ready: ./llama.cpp/convert_hf_to_gguf.py


In [5]:
# Try to download the original Gemma 2B tokenizer.model file
print("Attempting to get compatible tokenizer...")

try:
    # Download the base Gemma 2B model's tokenizer
    from huggingface_hub import hf_hub_download
    
    print("Downloading original Gemma 2B tokenizer.model...")
    tokenizer_model_path = hf_hub_download(
        repo_id="google/gemma-2b", 
        filename="tokenizer.model",
        local_dir="./gemma_base_tokenizer"
    )
    
    # Copy it to our model directory
    import shutil
    dest_path = os.path.join(LOCAL_MODEL_DIR, "tokenizer.model")
    shutil.copy2(tokenizer_model_path, dest_path)
    
    print(f"✅ Copied tokenizer.model to {dest_path}")
    
    # Now try the conversion again
    print("Retrying conversion with SentencePiece tokenizer...")
    
    output_file = os.path.join(GGUF_OUTPUT_DIR, "gemma2b-nirf-lookup-f16.gguf")
    cmd = [
        sys.executable, 
        CONVERT_SCRIPT,
        LOCAL_MODEL_DIR,
        "--outfile", output_file,
        "--outtype", "f16"
    ]
    
    result = subprocess.run(cmd, capture_output=True, text=True, check=True)
    print("✅ Conversion completed successfully!")
    print("STDOUT:", result.stdout[-500:])  # Show last 500 chars
    
except Exception as e:
    print(f"❌ Error: {e}")
    print("Let's try an alternative approach...")

Attempting to get compatible tokenizer...
Downloading original Gemma 2B tokenizer.model...
✅ Copied tokenizer.model to ./gemma2b-nirf-lookup/tokenizer.model
Retrying conversion with SentencePiece tokenizer...


✅ Conversion completed successfully!
STDOUT: 


In [6]:
# Check the conversion results
print("Checking conversion results...")

# List all files in output directory
print(f"\nFiles in {GGUF_OUTPUT_DIR}:")
for file in os.listdir(GGUF_OUTPUT_DIR):
    file_path = os.path.join(GGUF_OUTPUT_DIR, file)
    if os.path.isfile(file_path):
        size_mb = os.path.getsize(file_path) / (1024 * 1024)
        print(f"  - {file} ({size_mb:.1f} MB)")

# Now let's check if we want to create quantized versions
print("\nF16 conversion complete! Would you like to create quantized versions?")
print("Available quantization options:")
print("  - Q4_0: ~2.3GB, good balance of size/quality")
print("  - Q4_1: ~2.6GB, slightly better quality") 
print("  - Q5_0: ~2.9GB, better quality")
print("  - Q5_1: ~3.2GB, even better quality")
print("  - Q8_0: ~4.3GB, minimal quality loss")

# Let's create a popular Q4_0 version as an example
print("\nCreating Q4_0 quantized version...")

Checking conversion results...

Files in ./gguf_output:
  - gemma2b-nirf-lookup-f16.gguf (4992.7 MB)

F16 conversion complete! Would you like to create quantized versions?
Available quantization options:
  - Q4_0: ~2.3GB, good balance of size/quality
  - Q4_1: ~2.6GB, slightly better quality
  - Q5_0: ~2.9GB, better quality
  - Q5_1: ~3.2GB, even better quality
  - Q8_0: ~4.3GB, minimal quality loss

Creating Q4_0 quantized version...


In [7]:
# Try building with CURL disabled
print("Retrying build with CURL disabled...")

try:
    # Clean build directory first
    import shutil
    if os.path.exists("./llama.cpp/build"):
        shutil.rmtree("./llama.cpp/build")
    
    os.makedirs("./llama.cpp/build", exist_ok=True)
    
    # Build with CURL disabled
    cmake_cmd = [
        "cmake", "-B", "./llama.cpp/build", "-S", "./llama.cpp",
        "-DLLAMA_CURL=OFF",
        "-DGGML_OPENMP=OFF"  # Also disable OpenMP to avoid previous issues
    ]
    
    print("Running cmake...")
    cmake_result = subprocess.run(cmake_cmd, capture_output=True, text=True)
    
    if cmake_result.returncode != 0:
        print("CMake failed:", cmake_result.stderr)
        raise Exception("CMake configuration failed")
    
    # Build just the quantize tool
    make_cmd = ["make", "-C", "./llama.cpp/build", "-j4", "llama-quantize"]
    print("Building quantize tool...")
    
    make_result = subprocess.run(make_cmd, capture_output=True, text=True)
    
    if make_result.returncode != 0:
        print("Build failed:", make_result.stderr)
        # Try alternative: use Python quantization
        print("Build failed, using Python-based quantization instead...")
        
        # Use llama-cpp-python for quantization
        from llama_cpp import Llama
        
        input_file = os.path.join(GGUF_OUTPUT_DIR, "gemma2b-nirf-lookup-f16.gguf")
        output_file = os.path.join(GGUF_OUTPUT_DIR, "gemma2b-nirf-lookup-q4_0.gguf")
        
        print("Loading model for quantization...")
        # Note: This approach loads the model to quantize it
        # For very large models, this might use significant RAM
        llm = Llama(model_path=input_file, verbose=False)
        
        print("Quantizing to Q4_0...")
        # This is a conceptual approach - actual quantization with llama-cpp-python 
        # requires different methods
        print("❌ Direct quantization via Python not easily available")
        print("The F16 GGUF file is ready for use though!")
        
    else:
        print("✅ Build successful!")
        
        # Now try quantization
        quantize_tool = "./llama.cpp/build/llama-quantize"
        input_file = os.path.join(GGUF_OUTPUT_DIR, "gemma2b-nirf-lookup-f16.gguf")
        output_file = os.path.join(GGUF_OUTPUT_DIR, "gemma2b-nirf-lookup-q4_0.gguf")
        
        quant_cmd = [quantize_tool, input_file, output_file, "Q4_0"]
        print(f"Running quantization: {' '.join(quant_cmd)}")
        
        result = subprocess.run(quant_cmd, capture_output=True, text=True, check=True)
        print("✅ Q4_0 quantization completed!")
    
except Exception as e:
    print(f"Build/quantization process encountered issues: {e}")
    print("\n✅ However, your F16 GGUF conversion was successful!")
    print("You can use the F16 version or manually quantize it later.")

# Show final status
print(f"\n📁 Final files in {GGUF_OUTPUT_DIR}:")
for file in os.listdir(GGUF_OUTPUT_DIR):
    file_path = os.path.join(GGUF_OUTPUT_DIR, file)
    if os.path.isfile(file_path):
        size_mb = os.path.getsize(file_path) / (1024 * 1024)
        print(f"  - {file} ({size_mb:.1f} MB)")

print(f"\n✅ GGUF conversion completed successfully!")
print(f"Your model is ready: {os.path.join(GGUF_OUTPUT_DIR, 'gemma2b-nirf-lookup-f16.gguf')}")

Retrying build with CURL disabled...


Running cmake...
Building quantize tool...
✅ Build successful!
Running quantization: ./llama.cpp/build/llama-quantize ./gguf_output/gemma2b-nirf-lookup-f16.gguf ./gguf_output/gemma2b-nirf-lookup-q4_0.gguf Q4_0
Build/quantization process encountered issues: [Errno 2] No such file or directory: './llama.cpp/build/llama-quantize'

✅ However, your F16 GGUF conversion was successful!
You can use the F16 version or manually quantize it later.

📁 Final files in ./gguf_output:
  - gemma2b-nirf-lookup-f16.gguf (4992.7 MB)

✅ GGUF conversion completed successfully!
Your model is ready: ./gguf_output/gemma2b-nirf-lookup-f16.gguf


In [8]:
# Final verification and summary
print("=== GGUF CONVERSION SUMMARY ===\n")

# Verify the final file
gguf_file = os.path.join(GGUF_OUTPUT_DIR, "gemma2b-nirf-lookup-f16.gguf")
if os.path.exists(gguf_file):
    file_size_gb = os.path.getsize(gguf_file) / (1024**3)
    print(f"✅ Successfully converted: {MODEL_NAME}")
    print(f"📁 Output file: {gguf_file}")
    print(f"📊 File size: {file_size_gb:.2f} GB")
    print(f"🔧 Format: GGUF F16")
    
    # Basic file validation
    try:
        # Check if file is readable and has reasonable size
        if file_size_gb > 1.0 and file_size_gb < 10.0:  # Reasonable range for 2B model
            print("✅ File size appears reasonable for a 2B parameter model")
        else:
            print("⚠️  File size seems unusual - please verify")
            
        print(f"\n📋 Model ready for use with:")
        print(f"   - llama.cpp")
        print(f"   - Ollama")
        print(f"   - text-generation-webui")
        print(f"   - LM Studio")
        print(f"   - Any GGUF-compatible inference engine")
        
    except Exception as e:
        print(f"⚠️  Could not fully validate file: {e}")
else:
    print("❌ Final GGUF file not found")

print(f"\n🎯 CONVERSION COMPLETE!")
print(f"The specialized Gemma 2B NIRF lookup model is now ready in GGUF format.")

# Optional: Show how to use the model
print(f"\n💡 Usage example:")
print(f"   llama.cpp: ./llama-cli -m {gguf_file} -p 'Your prompt here'")
print(f"   Python: llama = Llama(model_path='{gguf_file}')")

=== GGUF CONVERSION SUMMARY ===

✅ Successfully converted: coderop12/gemma2b-nirf-lookup-2025
📁 Output file: ./gguf_output/gemma2b-nirf-lookup-f16.gguf
📊 File size: 4.88 GB
🔧 Format: GGUF F16
✅ File size appears reasonable for a 2B parameter model

📋 Model ready for use with:
   - llama.cpp
   - Ollama
   - text-generation-webui
   - LM Studio
   - Any GGUF-compatible inference engine

🎯 CONVERSION COMPLETE!
The specialized Gemma 2B NIRF lookup model is now ready in GGUF format.

💡 Usage example:
   llama.cpp: ./llama-cli -m ./gguf_output/gemma2b-nirf-lookup-f16.gguf -p 'Your prompt here'
   Python: llama = Llama(model_path='./gguf_output/gemma2b-nirf-lookup-f16.gguf')


In [9]:
#!/usr/bin/env python3
"""
NIRF Ranking Model Inference Script
Uses the converted GGUF model to answer NIRF ranking questions
"""

import os
from llama_cpp import Llama

class NIRFRankingModel:
    def __init__(self, model_path):
        """Initialize the NIRF ranking model"""
        self.model_path = model_path
        self.llm = None
        self.load_model()
    
    def load_model(self):
        """Load the GGUF model"""
        try:
            print(f"Loading NIRF model from: {self.model_path}")
            self.llm = Llama(
                model_path=self.model_path,
                n_ctx=2048,  # Context window
                n_threads=4,  # Number of CPU threads
                verbose=False
            )
            print("Model loaded successfully!")
        except Exception as e:
            print(f"Error loading model: {e}")
            raise
    
    def generate_response(self, prompt, max_tokens=512, temperature=0.3):
        """Generate response for NIRF ranking questions"""
        if not self.llm:
            raise Exception("Model not loaded")
        
        try:
            response = self.llm(
                prompt,
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=0.9,
                repeat_penalty=1.1,
                stop=["</s>", "\n\n"]
            )
            return response['choices'][0]['text'].strip()
        except Exception as e:
            print(f"Error generating response: {e}")
            return None
    
    def ask_nirf_question(self, question):
        """Ask a NIRF ranking related question"""
        # Format the prompt for NIRF queries
        prompt = f"""Question: {question}

Answer:"""
        
        print(f"\nQuestion: {question}")
        print("Generating answer...")
        
        response = self.generate_response(prompt)
        if response:
            print(f"Answer: {response}")
        else:
            print("Failed to generate response")
        
        return response

def main():
    # Path to your converted GGUF model
    model_path = "./gguf_output/gemma2b-nirf-lookup-f16.gguf"
    
    # Check if model file exists
    if not os.path.exists(model_path):
        print(f"Model file not found: {model_path}")
        print("Please ensure the GGUF conversion was completed successfully.")
        return
    
    # Initialize the model
    try:
        nirf_model = NIRFRankingModel(model_path)
    except Exception as e:
        print(f"Failed to initialize model: {e}")
        return
    
    # Sample NIRF ranking questions
    sample_questions = [
        "What is the NIRF ranking of IIT Delhi in 2024?",
        "Which university ranked first in the NIRF Engineering category in 2023?",
        "How are NIRF rankings calculated?",
        "What are the key parameters used in NIRF ranking methodology?",
        "Which are the top 5 universities in NIRF Overall ranking 2024?",
        "What is the difference between NIRF and other international rankings?",
        "How often are NIRF rankings updated?",
        "Which institution has consistently performed well in NIRF rankings?"
    ]
    
    print("=== NIRF Ranking Model Inference Test ===\n")
    
    # Test with sample questions
    for i, question in enumerate(sample_questions[:1], 1):  # Test first 3 questions
        print(f"\n--- Test {i} ---")
        nirf_model.ask_nirf_question(question)
        print("-" * 50)
    
    # Interactive mode
    print("\n=== Interactive Mode ===")
    print("Ask your NIRF ranking questions (type 'quit' to exit):")
    
    while True:
        try:
            user_question = input("\nYour question: ").strip()
            if user_question.lower() in ['quit', 'exit', 'q']:
                break
            
            if user_question:
                nirf_model.ask_nirf_question(user_question)
            
        except KeyboardInterrupt:
            print("\nExiting...")
            break
        except Exception as e:
            print(f"Error: {e}")

if __name__ == "__main__":
    main()

Loading NIRF model from: ./gguf_output/gemma2b-nirf-lookup-f16.gguf


: 

In [1]:
#!/usr/bin/env python3
"""
Script to upload GGUF model to Hugging Face Hub
Handles large file uploads and creates proper model repository
"""

import os
import json
from pathlib import Path
from huggingface_hub import HfApi, login, create_repo, upload_file
from huggingface_hub.utils import RepositoryNotFoundError

class HuggingFaceUploader:
    def __init__(self):
        self.api = HfApi()
        self.token = None
        
    def authenticate(self, token=None):
        """Authenticate with Hugging Face"""
        if token:
            login(token=token)
            self.token = token
            print("✅ Authenticated with provided token")
        else:
            try:
                # Try to use existing token
                login()
                print("✅ Using existing HF token")
            except Exception as e:
                print("❌ Authentication failed")
                print("Please provide your HF token or run 'huggingface-cli login' first")
                raise e
    
    def create_model_card(self, repo_id, original_model, model_size, description=None):
        """Create a model card for the GGUF model"""
        
        # YAML frontmatter with proper metadata
        yaml_header = f"""---
license: apache-2.0
base_model: {original_model}
tags:
- gguf
- quantized
- gemma
- nirf
- education
- ranking
- indian-universities
- text-generation
library_name: gguf
model_name: {repo_id.split('/')[-1]}
inference: false
model_creator: {repo_id.split('/')[0]}
model_type: gemma
quantization: f16
language:
- en
pipeline_tag: text-generation
widget:
- text: "What is NIRF ranking methodology?"
  example_title: "NIRF Methodology"
- text: "Which are the top engineering colleges in NIRF 2024?"
  example_title: "Top Engineering Colleges"
- text: "How are universities ranked in India?"
  example_title: "University Rankings"
---

"""
        
        default_description = f"""# {repo_id.split('/')[-1]}

This is a GGUF conversion of [{original_model}](https://huggingface.co/{original_model}).

## Model Details
- **Original Model**: {original_model}
- **Format**: GGUF (F16 precision)
- **File Size**: ~{model_size:.1f} GB
- **Architecture**: Gemma 2B
- **Specialization**: NIRF (National Institutional Ranking Framework) lookup and ranking queries

## Usage

### With llama.cpp
```bash
./llama-cli -m {repo_id.split('/')[-1]}.gguf -p "What is the NIRF ranking methodology?"
```

### With Python (llama-cpp-python)
```python
from llama_cpp import Llama

llm = Llama(model_path="{repo_id.split('/')[-1]}.gguf")
response = llm("What are the top NIRF ranked engineering colleges?")
print(response['choices'][0]['text'])
```

### With Ollama
```bash
# First, create a Modelfile
echo 'FROM ./{repo_id.split('/')[-1]}.gguf' > Modelfile
ollama create {repo_id.split('/')[-1]} -f Modelfile
ollama run {repo_id.split('/')[-1]} "Explain NIRF ranking parameters"
```

## Model Capabilities
This model is specifically fine-tuned for:
- NIRF ranking information and queries
- Indian higher education institutional data
- University and college ranking explanations
- Educational policy and framework questions

## Technical Details
- **Quantization**: F16 (16-bit floating point)
- **Context Length**: 2048 tokens
- **License**: Follow original model license terms
- **Converted using**: llama.cpp conversion tools

## Original Model License
Please refer to the original model repository for license information.
"""
        
        # Combine YAML header with description
        if description:
            model_card_content = yaml_header + description
        else:
            model_card_content = yaml_header + default_description
        
        return model_card_content
    
    def create_repository(self, repo_id, private=False):
        """Create HF repository"""
        try:
            create_repo(
                repo_id=repo_id,
                repo_type="model",
                private=private,
                exist_ok=True
            )
            print(f"✅ Repository created/verified: {repo_id}")
            return True
        except Exception as e:
            print(f"❌ Failed to create repository: {e}")
            return False
    
    def upload_gguf_model(self, 
                         local_file_path, 
                         repo_id, 
                         original_model="coderop12/gemma2b-nirf-lookup-2025",
                         private=False,
                         description=None):
        """
        Upload GGUF model to Hugging Face
        
        Args:
            local_file_path: Path to your GGUF file
            repo_id: HF repo ID (username/model-name)
            original_model: Original model this was converted from
            private: Whether to make repo private
            description: Custom model card description
        """
        
        if not os.path.exists(local_file_path):
            print(f"❌ File not found: {local_file_path}")
            return False
        
        # Get file info
        file_size_gb = os.path.getsize(local_file_path) / (1024**3)
        file_name = os.path.basename(local_file_path)
        
        print(f"📁 File: {local_file_path}")
        print(f"📊 Size: {file_size_gb:.2f} GB")
        print(f"🎯 Target repo: {repo_id}")
        
        # Create repository
        if not self.create_repository(repo_id, private):
            return False
        
        try:
            # Upload the GGUF file
            print(f"🔄 Uploading {file_name}...")
            print("This may take a while for large files...")
            
            upload_file(
                path_or_fileobj=local_file_path,
                path_in_repo=file_name,
                repo_id=repo_id,
                repo_type="model",
                commit_message=f"Upload {file_name} GGUF model"
            )
            
            print(f"✅ Successfully uploaded {file_name}")
            
            # Create and upload model card
            print("📝 Creating model card...")
            model_card = self.create_model_card(repo_id, original_model, file_size_gb, description)
            
            # Save model card to temporary file and upload
            readme_path = "README.md"
            with open(readme_path, "w", encoding="utf-8") as f:
                f.write(model_card)
            
            upload_file(
                path_or_fileobj=readme_path,
                path_in_repo="README.md",
                repo_id=repo_id,
                repo_type="model",
                commit_message="Add model card"
            )
            
            # Clean up temporary file
            os.remove(readme_path)
            
            print(f"✅ Model card uploaded")
            print(f"🎉 Upload complete! View at: https://huggingface.co/{repo_id}")
            
            return True
            
        except Exception as e:
            print(f"❌ Upload failed: {e}")
            return False

def main():
    """Main upload script"""
    
    # Configuration
    LOCAL_GGUF_PATH = "./gguf_output/gemma2b-nirf-lookup-f16.gguf"
    
    # You need to set these values
    HF_USERNAME = "coderop12"  # Replace with your HF username
    MODEL_NAME = "gemma2b-nirf-lookup-gguf"  # Choose your model name
    REPO_ID = f"{HF_USERNAME}/{MODEL_NAME}"
    
    # Optional: Use your HF token directly
    HF_TOKEN = None  # Replace with your token or leave None to use saved token
    
    print("=== Hugging Face GGUF Upload Script ===\n")
    
    # Validate inputs
    if HF_USERNAME == "YOUR_USERNAME":
        print("❌ Please set your HF_USERNAME in the script")
        print("Edit the script and replace 'YOUR_USERNAME' with your Hugging Face username")
        return
    
    if not os.path.exists(LOCAL_GGUF_PATH):
        print(f"❌ GGUF file not found: {LOCAL_GGUF_PATH}")
        print("Make sure your GGUF conversion completed successfully")
        return
    
    # Initialize uploader
    uploader = HuggingFaceUploader()
    
    try:
        # Authenticate
        print("🔑 Authenticating with Hugging Face...")
        uploader.authenticate(HF_TOKEN)
        
        # Upload model
        success = uploader.upload_gguf_model(
            local_file_path=LOCAL_GGUF_PATH,
            repo_id=REPO_ID,
            original_model="coderop12/gemma2b-nirf-lookup-2025",
            private=False,  # Set to True if you want private repo
        )
        
        if success:
            print(f"\n🎉 SUCCESS!")
            print(f"Your GGUF model is now available at:")
            print(f"https://huggingface.co/{REPO_ID}")
            print(f"\nUsers can download it with:")
            print(f"huggingface-cli download {REPO_ID} {os.path.basename(LOCAL_GGUF_PATH)}")
        else:
            print(f"\n❌ Upload failed. Check the errors above.")
            
    except Exception as e:
        print(f"❌ Script failed: {e}")
        print("\nTroubleshooting:")
        print("1. Make sure you have a Hugging Face account")
        print("2. Get your access token from https://huggingface.co/settings/tokens")
        print("3. Run: huggingface-cli login")
        print("4. Or set HF_TOKEN in the script")

if __name__ == "__main__":
    main()

=== Hugging Face GGUF Upload Script ===

🔑 Authenticating with Hugging Face...


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

✅ Using existing HF token
📁 File: ./gguf_output/gemma2b-nirf-lookup-f16.gguf
📊 Size: 4.88 GB
🎯 Target repo: coderop12/gemma2b-nirf-lookup-gguf
✅ Repository created/verified: coderop12/gemma2b-nirf-lookup-gguf
🔄 Uploading gemma2b-nirf-lookup-f16.gguf...
This may take a while for large files...


Processing Files (0 / 0)                : |          |  0.00B /  0.00B            

New Data Upload                         : |          |  0.00B /  0.00B            

  ...output/gemma2b-nirf-lookup-f16.gguf:   0%|          | 25.1MB / 5.24GB            

Cancellation requested; stopping current tasks.


KeyboardInterrupt: 

In [1]:
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

path = hf_hub_download("coderop12/gemma2b-nirf-lookup-gguf", "gemma2b-nirf-lookup-f16.gguf")
llm = Llama(
    model_path=path,
    n_ctx=1024,
    n_threads=4,
    n_batch=128,
    n_gpu_layers=0,
    use_mmap=True,
    use_mlock=False,
    verbose=False,
)
print("Loaded!")

out = llm("Question: Which are the top 5 universities in NIRF Overall ranking 2024?\n\nAnswer:",
          max_tokens=256, temperature=0.3, top_p=0.9, repeat_penalty=1.1, stop=["</s>", "\n\n"])
print(out["choices"][0]["text"].strip())


: 