In [None]:
# Step 0: Setup
!apt-get update -y
!apt-get install -y git git-lfs build-essential cmake python3-pip
!git lfs install

# Step 1: Clone & build llama.cpp
!git clone https://github.com/ggerganov/llama.cpp
%cd llama.cpp
!cmake -B build
!cmake --build build -j 1

# Step 2: Download Qwen2.5-Coder-3B-Instruct
%cd /content
!git clone https://huggingface.co/Qwen/Qwen2.5-Coder-3B-Instruct

# Step 3: Convert HF ‚Üí GGUF (F16)
%cd /content/llama.cpp
!python3 convert_hf_to_gguf.py /content/Qwen2.5-Coder-3B-Instruct \
  --outfile /content/qwen2.5-coder-3b-instruct-f16.gguf

# Step 4: Quantize GGUF ‚Üí Q4_K_M
!./build/bin/llama-quantize \
  /content/qwen2.5-coder-3b-instruct-f16.gguf \
  /content/qwen2.5-coder-3b-instruct-q4_k_m.gguf \
  Q4_K_M

# Step 5: Check size
!ls -lh /content/*.gguf

# Step 6: Download
# from google.colab import files
# files.download("/content/qwen2.5-coder-3b-instruct-q4_k_m.gguf")

# Donwload from the /content

print("This is the end")

In [None]:
# Full Reset
!rm -f /content/qwen2.5-coder-3b-instruct-q4_k_m.gguf
# !rm -rf /content

In [None]:
# Verify the f16 file
# It should be around 6.2G
!ls -lh /content/qwen2.5-coder-3b-instruct-f16.gguf

# Verify the quantized model
# This should be like ~2GB
!ls -lh /content/qwen2.5-coder-3b-instruct-q4_k_m.gguf


In [None]:
# Testing
! /content/llama.cpp/build/bin/llama-cli \
  -m /content/qwen2.5-coder-3b-instruct-q4_k_m.gguf \
  -p "Write python code to print hello world" \
  -n 40


In [None]:
# Move model to drive and then download it if you can't download normally in colab
from google.colab import drive
import shutil
import os

# 1. Mount Google Drive
print("üìÇ Mounting Google Drive...")
drive.mount('/content/drive')

# 2. Define Source and Destination
source_path = "/content/qwen2.5-coder-3b-instruct-q4_k_m.gguf"
destination_folder = "/content/drive/MyDrive/Models" # You can change 'Models' to any folder you like
destination_path = os.path.join(destination_folder, "qwen2.5-coder-3b-instruct-q4_k_m.gguf")

# 3. Create the folder if it doesn't exist
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)
    print(f"‚úÖ Created folder: {destination_folder}")

# 4. Move (or Copy) the file
if os.path.exists(source_path):
    print(f"üöÄ Moving file to {destination_path}...")
    shutil.copy(source_path, destination_path) # Uses copy instead of move to be safe
    print("‚úÖ Success! File saved to Google Drive.")
else:
    print("‚ùå Error: Source file not found. Did the download finish?")

# Finally download the model from drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Move the generated model to Hugging Face
from huggingface_hub import HfApi
from google.colab import userdata

# 1. Login (It will ask for a token)
# Go to https://huggingface.co/settings/tokens -> Create New Token (Write) -> Copy it
from huggingface_hub import login

try:
    hf_token = userdata.get('HF_TOKEN') # Assuming your Hugging Face token is stored as 'HF_TOKEN' in Colab Secrets
    login(token=hf_token)
    print("‚úÖ Logged in to Hugging Face using Colab Secrets.")
except Exception as e:
    print(f"‚ùå Could not retrieve HF_TOKEN from Colab Secrets or login failed: {e}")
    print("Please ensure you have added your Hugging Face token to Colab Secrets with the name 'HF_TOKEN'.")
    login()

print("Uploading")
# 2. Upload
api = HfApi()
api.upload_file(
    path_or_fileobj="/content/drive/MyDrive/Models/qwen2.5-coder-3b-instruct-q4_k_m.gguf", # Where your file is NOW
    path_in_repo="qwen2.5-coder-3b.gguf", # What you want to name it on Hugging Face
    repo_id="Adhik6495/Qwen2.5-Coder-3B-Instruct", # Your Hugging Face Repo ID
    repo_type="model"
)
print("‚úÖ Upload Complete!")