# Ollama 101 - A set of basic ollama functions from the ollama pypi docs
All functions are referenced from the PyPi ollama package built by Jeffrey Morgan: https://pypi.org/project/ollama/

The following notebook is a set of ollama tools and part of an ollama guide made by Leo Borcherding. 
This notebook is part of the following ollama tutorial https://github.com/Leoleojames1/ollamaStarterKit

In [None]:
# Install Ollama
!pip install ollama

# Ollama pull/run

The following commands will allow you to pull any ollama model, feel free to explore ollama's vast set of models to choose from, here are some of my custom system prompts: https://ollama.com/borch

ollama run phi3

&

ollama pull phi3

# Ollama show & list

*ollama show, shows the currently loaded model's modelfile metadata*
ollama show --modelfile llama3.2:3b

*ollama list shows the available models*
ollama list

In [None]:
# Import the Ollama library
import ollama
import json
from datetime import datetime

# Show information about a specific model
model_info = ollama.show('llama3.2:3b')

# Convert to a dictionary for easier formatting
model_dict = {
    "model": "llama3.2:3b",
    "modified_at": str(model_info.modified_at),
    "family": model_info.details.family,
    "parameter_size": model_info.details.parameter_size,
    "quantization": model_info.details.quantization_level,
    "parameters": model_info.parameters.split('\n')
}

# Pretty print the model information
print(json.dumps(model_dict, indent=2))

# List all available models
models = ollama.list()

# Print a simplified table of models
print("\n{:<40} {:<10} {:<10} {:<10}".format("MODEL", "FAMILY", "SIZE", "QUANT"))
print("-" * 75)

for model in models.models:
    size_gb = f"{model.size / 1_000_000_000:.2f} GB"
    print("{:<40} {:<10} {:<10} {:<10}".format(
        model.model[:38], 
        model.details.family[:8], 
        size_gb, 
        model.details.quantization_level[:8]
    ))


# Chat Response

The ChatResponse method can be used for direct back and forth messaging with your ollama model, but will not provide access to prompt streaming. For Streaming follow along to the next section.

In [None]:
from ollama import chat
from ollama import ChatResponse

response: ChatResponse = chat(model='llama3.2:3b', messages=[
  {
    'role': 'user',
    'content': 'Who are you? And what were you trained on?',
  },
])
print(response['message']['content'])
# or access fields directly from the response object
print(response.message.content)

# Streaming Response



Here's a nice interactive chat interface for your Ollama models that works directly in the notebook. This interface includes:

1. A dropdown to select the ollama model
2. A text input for messages
3. A scrollable chat history display
4. A reset button to start fresh conversations
5. Streaming responses that appear character by character

To use it, simply run this cell and you'll get a fully functional chat interface:

In [None]:
import ollama
import time
from IPython.display import clear_output

class SimpleOllamaChat:
    def __init__(self, model="llama3.2:3b"):
        """Initialize the chat interface with a specified model."""
        self.model = model
        self.messages = []
        self.running = True
        print(f"🤖 Chat initialized with model: {self.model}")
        print("Type 'exit' to end the chat, 'clear' to reset the conversation, or 'change model:[name]' to switch models.")
        
    def display_models(self):
        """Display available models in a clean format."""
        try:
            models_list = ollama.list()
            print("\nAvailable models:")
            print("-" * 60)
            for model in models_list.models[:10]:  # Show only first 10 models to avoid clutter
                print(f"• {model.model}")
            if len(models_list.models) > 10:
                print(f"... and {len(models_list.models) - 10} more models")
            print("-" * 60)
        except Exception as e:
            print(f"⚠️ Error fetching models: {e}")
    
    def start_chat(self):
        """Start the interactive chat loop."""
        while self.running:
            # Get user input
            user_message = input("\n👤 You: ")
            
            # Process commands
            if user_message.lower() == 'exit':
                print("👋 Ending chat session.")
                self.running = False
                break
                
            elif user_message.lower() == 'clear':
                self.messages = []
                clear_output(wait=True)
                print(f"🤖 Chat reset with model: {self.model}")
                print("Type 'exit' to end the chat, 'clear' to reset the conversation, or 'change model:[name]' to switch models.")
                continue
                
            elif user_message.lower().startswith('change model:'):
                new_model = user_message[len('change model:'):].strip()
                if new_model:
                    self.model = new_model
                    print(f"🔄 Model changed to {self.model}")
                    continue
                else:
                    self.display_models()
                    continue
                    
            elif user_message.lower() == 'models':
                self.display_models()
                continue
                
            # Skip empty messages
            if not user_message.strip():
                continue
                
            # Add user message to history
            self.messages.append({'role': 'user', 'content': user_message})
            
            # Get AI response with streaming
            print("\n🤖 AI: ", end='')
            
            try:
                full_response = ""
                stream = ollama.chat(
                    model=self.model,
                    messages=self.messages,
                    stream=True,
                )
                
                for chunk in stream:
                    if 'message' in chunk and 'content' in chunk['message']:
                        text_chunk = chunk['message']['content']
                        full_response += text_chunk
                        print(text_chunk, end='', flush=True)
                
                # Add AI response to history
                self.messages.append({'role': 'assistant', 'content': full_response})
                print()  # Add newline after response
                
            except Exception as e:
                print(f"\n⚠️ Error: {str(e)}")
                
                # If model not found, suggest listing models
                if "model not found" in str(e).lower():
                    print(f"Model '{self.model}' not found. Type 'models' to see available models.")
                    
                    # Reset to a likely available model
                    default_models = ["llama3.2:3b", "llama3.2", "llama3", "phi3"]
                    for model in default_models:
                        try:
                            # Try to pull model info to check if it exists
                            ollama.show(model)
                            self.model = model
                            print(f"Switched to model: {self.model}")
                            break
                        except:
                            continue

# Create and start the chat interface
chat = SimpleOllamaChat()
chat.start_chat()

# Custom Modelfile create

In [5]:
import os
import json
import subprocess
from typing import Optional, Dict, Any, List, Union, Tuple
import time
import re

def generate_modelfile(
    model_name: str,
    base_model: str = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    system_prompt: str = "",
    parameters: Dict[str, Any] = None,
    output_dir: str = "./modelfiles",
    filename: str = None,
    tags: List[str] = None,
    license: str = "Apache-2.0",
    export_to_ollama: bool = False,
    ollama_name: str = None
) -> str:
    """
    Generate a Modelfile for use with Ollama from Unsloth fine-tuned models.
    
    Parameters:
    -----------
    model_name : str
        A name for your custom model
    base_model : str
        Base model name, either local path or Hugging Face ID
    system_prompt : str
        Custom system prompt for the model
    parameters : dict
        Model parameters like temperature, top_p, etc.
    output_dir : str
        Directory to save the Modelfile
    filename : str
        Custom filename for the Modelfile (defaults to model_name.Modelfile)
    tags : list
        Tags to categorize the model
    license : str
        License type for the model
    export_to_ollama : bool
        Whether to create an Ollama model from this Modelfile
    ollama_name : str
        Custom name for the Ollama model (defaults to model_name)
        
    Returns:
    --------
    str
        Path to the generated Modelfile
    """
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    # Set default parameters if none provided
    if parameters is None:
        parameters = {
            "temperature": 0.7,
            "top_p": 0.9,
            "top_k": 40,
            "num_ctx": 4096,
            "stop": ["<|im_end|>", "<|endoftext|>"]
        }
    
    # Set default filename if none provided
    if filename is None:
        # Replace any slashes with hyphens for the filename
        safe_model_name = model_name.replace("/", "-")
        filename = f"{safe_model_name}.Modelfile"
    
    # Set default ollama_name if none provided
    if ollama_name is None:
        # Remove any slashes and spaces for Ollama model name
        ollama_name = model_name.split("/")[-1].lower().replace(" ", "-")
    
    # Format the system prompt for the Modelfile
    formatted_system_prompt = system_prompt.replace('\n', '\\n').replace('"', '\\"')
    
    # Build the Modelfile content
    modelfile_content = f'FROM {base_model}\n\n'
    
    if system_prompt:
        modelfile_content += f'SYSTEM """{system_prompt}"""\n\n'
    
    # Add parameters
    for param_name, param_value in parameters.items():
        if isinstance(param_value, list):
            # Handle lists like stop words
            param_str = json.dumps(param_value)
            modelfile_content += f'PARAMETER {param_name} {param_str}\n'
        else:
            modelfile_content += f'PARAMETER {param_name} {param_value}\n'
    
    # Add template if needed for specific models
    if "llama" in base_model.lower() or "llama3" in base_model.lower():
        modelfile_content += '\n# Using Llama 3 template\n'
        modelfile_content += 'TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>\n\n{{- end }}{{ range $i, $message := .Messages }}{{- if eq $message.Role "user" }}<|start_header_id|>user<|end_header_id|>\n\n{{ $message.Content }}<|eot_id|>\n\n{{- else if eq $message.Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ $message.Content }}<|eot_id|>\n\n{{- end }}{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n"""\n'
    elif "phi-3" in base_model.lower():
        modelfile_content += '\n# Using Phi-3 template\n'
        modelfile_content += 'TEMPLATE """{{- if .System }}<|system|>\n{{ .System }}\n<|user|>\n{{- else }}<|user|>\n{{- end }}{{ range $i, $message := .Messages }}{{- if eq $message.Role "user" }}{{ $message.Content }}\n<|assistant|>\n{{- else if eq $message.Role "assistant" }}{{ $message.Content }}\n<|user|>\n{{- end }}{{ end }}"""\n'
    
    # Add tags if provided
    if tags:
        tags_str = ", ".join(tags)
        modelfile_content += f'\nTAGS {tags_str}\n'
    
    # Add license if provided
    if license:
        modelfile_content += f'LICENSE {license}\n'
    
    # Save the Modelfile
    output_path = os.path.join(output_dir, filename)
    with open(output_path, "w") as f:
        f.write(modelfile_content)
    
    print(f"✅ Modelfile generated at: {output_path}")
    
    # Create Ollama model if requested
    if export_to_ollama:
        try:
            print(f"🔄 Creating Ollama model '{ollama_name}'...")
            # Check if Ollama is installed
            import subprocess
            result = subprocess.run(["ollama", "create", ollama_name, "-f", output_path], 
                                 capture_output=True, text=True)
            
            if result.returncode == 0:
                print(f"✅ Ollama model '{ollama_name}' created successfully!")
                print(f"   You can now use it with: ollama run {ollama_name}")
            else:
                print(f"❌ Failed to create Ollama model: {result.stderr}")
        except Exception as e:
            print(f"❌ Error creating Ollama model: {str(e)}")
            print("   Make sure Ollama is installed and running.")
    
    return output_path

def extract_modelfile(model_name: str) -> Tuple[bool, str]:
    """
    Extracts the Modelfile from an existing Ollama model using 'ollama show'.
    
    Parameters:
    -----------
    model_name : str
        Name of the existing Ollama model
        
    Returns:
    --------
    Tuple[bool, str]
        Success status and either the Modelfile content or error message
    """
    try:
        # Run 'ollama show --modelfile model_name' command
        result = subprocess.run(
            ["ollama", "show", "--modelfile", model_name],
            capture_output=True,
            text=True,
            check=True
        )
        
        if result.returncode == 0:
            return True, result.stdout
        else:
            return False, f"Error: {result.stderr}"
    except subprocess.CalledProcessError as e:
        return False, f"Failed to extract Modelfile: {e.stderr}"
    except Exception as e:
        return False, f"Error: {str(e)}"

def list_ollama_models() -> List[str]:
    """
    Get a list of all available Ollama models.
    
    Returns:
    --------
    List[str]
        List of model names
    """
    try:
        # Run 'ollama list' command
        result = subprocess.run(
            ["ollama", "list"],
            capture_output=True,
            text=True,
            check=True
        )
        
        if result.returncode == 0:
            # Parse the output to extract model names
            lines = result.stdout.strip().split('\n')
            # Skip the header line and extract the first column (model name)
            models = []
            for line in lines[1:]:  # Skip header
                if line.strip():  # Skip empty lines
                    models.append(line.split()[0])  # First column is model name
            return models
        else:
            print(f"Error listing models: {result.stderr}")
            return []
    except Exception as e:
        print(f"Error: {str(e)}")
        return []

# Function to create a custom Unsloth ModelFile and deploy it to Ollama
def create_and_deploy_custom_model(
    model_name: str,
    base_model: str = "llama3.2:3b",
    system_prompt: str = None,
    temperature: float = 0.7,
    context_length: int = 4096,
    stop_tokens: List[str] = None,
    test_prompt: str = "Tell me what you can do",
    ollama_export: bool = True,
    from_existing_model: bool = False
) -> None:
    """
    Creates a custom ModelFile for use with Ollama and optionally deploys it.
    
    Parameters:
    -----------
    model_name : str
        Name for your custom model
    base_model : str
        Base model to use (e.g., "llama3.2:3b", "phi3:latest")
    system_prompt : str
        Custom system prompt
    temperature : float
        Model temperature (0.0-2.0)
    context_length : int
        Context window size
    stop_tokens : list
        Custom stop tokens
    test_prompt : str
        Test prompt to use when demonstrating the model
    ollama_export : bool
        Whether to export to Ollama
    from_existing_model : bool
        If True, extract the Modelfile from the base_model rather than creating a new one
    """
    # Generate timestamp for unique file naming
    timestamp = time.strftime("%Y%m%d-%H%M%S")
    output_dir = f"./modelfiles_{timestamp}"
    os.makedirs(output_dir, exist_ok=True)
    
    if from_existing_model:
        # Extract the existing Modelfile
        success, modelfile_content = extract_modelfile(base_model)
        
        if not success:
            print(f"❌ {modelfile_content}")
            return None
            
        # Modify the Modelfile as needed
        # 1. Change the FROM line if needed
        if base_model != model_name:
            modelfile_content = re.sub(r'^FROM .*$', f'FROM {base_model}', modelfile_content, flags=re.MULTILINE)
        
        # 2. Update or add SYSTEM prompt if provided
        if system_prompt is not None:
            # Check if a SYSTEM prompt already exists
            if 'SYSTEM' in modelfile_content:
                # Replace existing SYSTEM prompt
                modelfile_content = re.sub(
                    r'SYSTEM\s+["""].*?["""]', 
                    f'SYSTEM """{system_prompt}"""', 
                    modelfile_content, 
                    flags=re.DOTALL
                )
            else:
                # Add SYSTEM prompt after FROM line
                modelfile_content = re.sub(
                    r'(FROM .*?)(\n\n|\n)', 
                    f'\\1\\2SYSTEM """{system_prompt}"""\\2', 
                    modelfile_content
                )
        
        # 3. Update temperature parameter if specified
        if 'PARAMETER temperature' in modelfile_content:
            modelfile_content = re.sub(
                r'PARAMETER temperature .*', 
                f'PARAMETER temperature {temperature}', 
                modelfile_content
            )
        else:
            # Add temperature parameter if it doesn't exist
            modelfile_content += f"\nPARAMETER temperature {temperature}\n"
        
        # 4. Update context length if specified
        if 'PARAMETER num_ctx' in modelfile_content:
            modelfile_content = re.sub(
                r'PARAMETER num_ctx .*', 
                f'PARAMETER num_ctx {context_length}', 
                modelfile_content
            )
        else:
            # Add context length parameter if it doesn't exist
            modelfile_content += f"PARAMETER num_ctx {context_length}\n"
        
        # 5. Update stop tokens if specified
        if stop_tokens is not None:
            # Remove existing stop tokens
            modelfile_content = re.sub(r'PARAMETER stop .*\n', '', modelfile_content)
            
            # Add new stop tokens
            for token in stop_tokens:
                modelfile_content += f'PARAMETER stop {token}\n'
        
        # Save the modified Modelfile
        output_path = os.path.join(output_dir, f"{model_name}.Modelfile")
        with open(output_path, "w") as f:
            f.write(modelfile_content)
            
        print(f"✅ Modified Modelfile generated at: {output_path}")
        
        # Create Ollama model if requested
        if ollama_export:
            try:
                # Format the name for Ollama (lowercase, replace spaces with hyphens)
                ollama_name = model_name.split("/")[-1].lower().replace(" ", "-")
                
                print(f"🔄 Creating Ollama model '{ollama_name}'...")
                result = subprocess.run(
                    ["ollama", "create", ollama_name, "-f", output_path], 
                    capture_output=True, 
                    text=True
                )
                
                if result.returncode == 0:
                    print(f"✅ Ollama model '{ollama_name}' created successfully!")
                else:
                    print(f"❌ Failed to create Ollama model: {result.stderr}")
            except Exception as e:
                print(f"❌ Error creating Ollama model: {str(e)}")
                
    else:
        # Default system prompt if none provided
        if system_prompt is None:
            system_prompt = f"""You are {model_name}, a helpful AI assistant.
You provide clear, accurate, and concise responses to queries.
You always strive to be respectful, ethical, and supportive.
"""
        
        # Default stop tokens if none provided
        if stop_tokens is None:
            stop_tokens = ["<|im_end|>", "<|endoftext|>"]
        
        # Create parameters dictionary
        parameters = {
            "temperature": temperature,
            "top_p": 0.9,
            "num_ctx": context_length,
            "stop": stop_tokens
        }
        
        # Generate the ModelFile from scratch
        output_path = generate_modelfile(
            model_name=model_name,
            base_model=base_model,
            system_prompt=system_prompt,
            parameters=parameters,
            output_dir=output_dir,
            tags=["custom", "unsloth"],
            export_to_ollama=ollama_export
        )
    
    print(f"\n📄 ModelFile created at: {output_path}")
    
    # If exporting to Ollama, show how to test the model
    if ollama_export:
        # Format the name for Ollama (lowercase, replace spaces with hyphens)
        ollama_name = model_name.split("/")[-1].lower().replace(" ", "-")
        
        print("\n🚀 To test your model with the Ollama CLI:")
        print(f"   ollama run {ollama_name}")
        
        print("\n🧪 To test your model with Python:")
        print(f"""
import ollama

# Simple test
response = ollama.chat(model='{ollama_name}', messages=[
    {{'role': 'user', 'content': '{test_prompt}'}}
])

print(response['message']['content'])

# Interactive chat
from IPython.display import clear_output

chat = SimpleOllamaChat(model='{ollama_name}')
chat.start_chat()
""")
    
    return output_path

# Example usage - Uncomment and modify as needed
"""
# Create a coding assistant from an existing model
coding_system_prompt = \"\"\"You are CodingLlama, an expert programming assistant.
Focus on providing clean, efficient, and well-documented code examples.
Always include explanations of how the code works.
If you're unsure about any part of your solution, acknowledge it and suggest alternatives.\"\"\"

create_and_deploy_custom_model(
    model_name="CodingLlama",
    base_model="llama3.2:3b",
    system_prompt=coding_system_prompt,
    temperature=0.2,  # Lower temperature for more precise coding answers
    context_length=4096,
    test_prompt="Write a Python function to count word frequencies in a text file"
)
"""

'\n# Create a coding assistant from an existing model\ncoding_system_prompt = """You are CodingLlama, an expert programming assistant.\nFocus on providing clean, efficient, and well-documented code examples.\nAlways include explanations of how the code works.\nIf you\'re unsure about any part of your solution, acknowledge it and suggest alternatives."""\n\ncreate_and_deploy_custom_model(\n    model_name="CodingLlama",\n    base_model="llama3.2:3b",\n    system_prompt=coding_system_prompt,\n    temperature=0.2,  # Lower temperature for more precise coding answers\n    context_length=4096,\n    test_prompt="Write a Python function to count word frequencies in a text file"\n)\n'

In [2]:
# Create a programming assistant model
coding_system_prompt = """You are CodingLlama, an expert programming assistant.
Focus on providing clean, efficient, and well-documented code examples.
Always include explanations of how the code works.
If you're unsure about any part of your solution, acknowledge it and suggest alternatives."""

create_and_deploy_custom_model(
    model_name="CodingLlama",
    base_model="llama3.2:3b",
    system_prompt=coding_system_prompt,
    temperature=0.2,  # Lower temperature for more precise coding answers
    context_length=4096,
    test_prompt="Write a Python function to count word frequencies in a text file"
)

# Or create a model by modifying an existing one
# create_and_deploy_custom_model(
#     model_name="WritingGPT",
#     base_model="llama3.2:3b",
#     system_prompt="You are a professional writing assistant, specialized in helping users craft engaging content.",
#     temperature=0.7,
#     context_length=8192,
#     from_existing_model=True  # Extract and modify the existing Modelfile
# )

✅ Modelfile generated at: ./modelfiles_20250317-205648\CodingLlama.Modelfile
🔄 Creating Ollama model 'codingllama'...
❌ Failed to create Ollama model: [?2026h[?25l[1G[?25h[?2026l
Error: (line 59): command must be one of "from", "license", "template", "system", "adapter", "parameter", or "message"


📄 ModelFile created at: ./modelfiles_20250317-205648\CodingLlama.Modelfile

🚀 To test your model with the Ollama CLI:
   ollama run codingllama

🧪 To test your model with Python:

import ollama

# Simple test
response = ollama.chat(model='codingllama', messages=[
    {'role': 'user', 'content': 'Write a Python function to count word frequencies in a text file'}
])

print(response['message']['content'])

# Interactive chat
from IPython.display import clear_output

chat = SimpleOllamaChat(model='codingllama')
chat.start_chat()



'./modelfiles_20250317-205648\\CodingLlama.Modelfile'

# Custom Unsloth Modelfile for GGUF

In [2]:
# New function to create a Modelfile for an existing GGUF file
def create_modelfile_for_gguf(
    model_name: str,
    gguf_path: str,
    system_prompt: str = None,
    temperature: float = 0.7,
    context_length: int = 4096,
    stop_tokens: list[str] = None,
    output_dir: str = None,
    export_to_ollama: bool = True
):
    """
    Creates a Modelfile for an existing GGUF model file and optionally imports it to Ollama.
    
    Parameters:
    -----------
    model_name : str
        Name for your Ollama model
    gguf_path : str
        Path to the GGUF model file
    system_prompt : str
        Custom system prompt
    temperature : float
        Model temperature
    context_length : int
        Context window size
    stop_tokens : list
        Custom stop tokens
    output_dir : str
        Directory to save the Modelfile (defaults to a timestamped directory)
    export_to_ollama : bool
        Whether to import the model into Ollama
    """
    # Generate timestamp for unique file naming if no output_dir provided
    if output_dir is None:
        timestamp = time.strftime("%Y%m%d-%H%M%S")
        output_dir = f"./modelfiles_{timestamp}"
    
    os.makedirs(output_dir, exist_ok=True)
    
    # Default system prompt if none provided
    if system_prompt is None:
        system_prompt = f"""You are {model_name}, a helpful AI assistant.
You provide clear, accurate, and concise responses to queries.
You always strive to be respectful, ethical, and supportive.
"""
    
    # Default stop tokens if none provided
    if stop_tokens is None:
        stop_tokens = ["<|im_end|>", "<|endoftext|>"]
    
    # Create the Modelfile content
    modelfile_content = f"FROM {gguf_path}\n\n"
    
    # Add system prompt
    modelfile_content += f'SYSTEM """{system_prompt}"""\n\n'
    
    # Add parameters
    modelfile_content += f"PARAMETER temperature {temperature}\n"
    modelfile_content += f"PARAMETER num_ctx {context_length}\n"
    
    # Add stop tokens
    for token in stop_tokens:
        modelfile_content += f'PARAMETER stop {token}\n'
    
    # Add appropriate template based on model name
    if "llama" in model_name.lower() or "llama3" in gguf_path.lower():
        modelfile_content += '\n# Using Llama 3 template\n'
        modelfile_content += 'TEMPLATE """{{- if .System }}<|start_header_id|>system<|end_header_id|>\n\n{{ .System }}<|eot_id|>\n\n{{- end }}{{ range $i, $message := .Messages }}{{- if eq $message.Role "user" }}<|start_header_id|>user<|end_header_id|>\n\n{{ $message.Content }}<|eot_id|>\n\n{{- else if eq $message.Role "assistant" }}<|start_header_id|>assistant<|end_header_id|>\n\n{{ $message.Content }}<|eot_id|>\n\n{{- end }}{{ end }}<|start_header_id|>assistant<|end_header_id|>\n\n"""\n'
    elif "phi" in model_name.lower() or "phi" in gguf_path.lower():
        modelfile_content += '\n# Using Phi-3 template\n'
        modelfile_content += 'TEMPLATE """{{- if .System }}<|system|>\n{{ .System }}\n<|user|>\n{{- else }}<|user|>\n{{- end }}{{ range $i, $message := .Messages }}{{- if eq $message.Role "user" }}{{ $message.Content }}\n<|assistant|>\n{{- else if eq $message.Role "assistant" }}{{ $message.Content }}\n<|user|>\n{{- end }}{{ end }}"""\n'
    
    # Save the Modelfile
    safe_model_name = model_name.replace("/", "-")
    output_path = os.path.join(output_dir, f"{safe_model_name}.Modelfile")
    with open(output_path, "w") as f:
        f.write(modelfile_content)
    
    print(f"✅ Modelfile for GGUF model generated at: {output_path}")
    
    # Create Ollama model if requested
    if export_to_ollama:
        try:
            # Format the name for Ollama (lowercase, replace spaces with hyphens)
            ollama_name = model_name.split("/")[-1].lower().replace(" ", "-")
            
            print(f"🔄 Creating Ollama model '{ollama_name}' from GGUF file...")
            result = subprocess.run(
                ["ollama", "create", ollama_name, "-f", output_path], 
                capture_output=True, 
                text=True
            )
            
            if result.returncode == 0:
                print(f"✅ Ollama model '{ollama_name}' created successfully!")
                print(f"   You can now use it with: ollama run {ollama_name}")
            else:
                print(f"❌ Failed to create Ollama model: {result.stderr}")
                
                # If error mentions MIME type, try adding absolute path
                if "Content type" in result.stderr or "MIME" in result.stderr:
                    print("Trying again with absolute path...")
                    # Convert to absolute path if it's not already
                    abs_gguf_path = os.path.abspath(gguf_path)
                    
                    # Update Modelfile with absolute path
                    modelfile_content = modelfile_content.replace(f"FROM {gguf_path}", f"FROM {abs_gguf_path}")
                    with open(output_path, "w") as f:
                        f.write(modelfile_content)
                    
                    # Try again with updated Modelfile
                    result = subprocess.run(
                        ["ollama", "create", ollama_name, "-f", output_path], 
                        capture_output=True, 
                        text=True
                    )
                    
                    if result.returncode == 0:
                        print(f"✅ Ollama model '{ollama_name}' created successfully!")
                        print(f"   You can now use it with: ollama run {ollama_name}")
                    else:
                        print(f"❌ Failed again: {result.stderr}")
        except Exception as e:
            print(f"❌ Error creating Ollama model: {str(e)}")
    
    return output_path

def find_unsloth_gguf_models():
    """Find GGUF models generated by Unsloth in common directories"""
    gguf_files = []
    
    # Get current working directory
    cwd = os.getcwd()
    
    # Common directories where Unsloth might save GGUF files
    search_dirs = [
        cwd,  # Current directory
        os.path.join(cwd, "finetuned_model_gguf"),  # Default Unsloth GGUF export dir
        os.path.join(cwd, "gguf_exports"),  # Unsloth UI export dir
        os.path.expanduser("~/finetuned_model_gguf"),  # Home directory
        os.path.expanduser("~/gguf_exports"),  # Home directory exports
    ]
    
    # Look for any directory with _gguf suffix up to 2 levels deep
    for root, dirs, _ in os.walk(cwd):
        # Limit depth to avoid excessive searching
        if root.count(os.sep) - cwd.count(os.sep) <= 2:
            for dir_name in dirs:
                if dir_name.endswith("_gguf") or "_gguf_" in dir_name:
                    search_dirs.append(os.path.join(root, dir_name))
    
    # Search for GGUF files in all the directories
    for directory in search_dirs:
        if os.path.exists(directory):
            for file in os.listdir(directory):
                if file.endswith(".gguf"):
                    full_path = os.path.join(directory, file)
                    # Get file size for information
                    size_mb = os.path.getsize(full_path) / (1024 * 1024)
                    gguf_files.append((full_path, file, f"{size_mb:.1f} MB"))
    
    return gguf_files

# Use gguf find & create gguf modelfile

In [4]:
# List available GGUF models generated by Unsloth
gguf_models = find_unsloth_gguf_models()

if gguf_models:
    print("Found the following GGUF models:")
    for i, (path, name) in enumerate(gguf_models):
        print(f"{i+1}. {name} at {path}")
    
    # Ask user to select a model (you can also specify the index directly)
    try:
        selected_index = int(input(f"Enter the number of the model you want to use (1-{len(gguf_models)}): ")) - 1
        if 0 <= selected_index < len(gguf_models):
            selected_path, selected_name = gguf_models[selected_index]
            
            # Create a model name based on the file name
            model_name = os.path.splitext(selected_name)[0]
            
            # Create system prompt for this model
            system_prompt = f"""You are an AI assistant based on a fine-tuned {model_name} model.
You are helpful, accurate, and engaging.
You provide concise and well-structured answers to questions.
"""
            
            # Create the Modelfile and import to Ollama
            create_modelfile_for_gguf(
                model_name=model_name,
                gguf_path=selected_path,
                system_prompt=system_prompt,
                temperature=0.7,
                context_length=4096,
                export_to_ollama=True
            )
        else:
            print("Invalid selection!")
    except ValueError:
        print("Please enter a valid number!")
else:
    print("No GGUF models found. You need to export your model to GGUF format first.")
    print("You can do this using the 'Export to GGUF' tab in the Unsloth UI.")

No GGUF models found. You need to export your model to GGUF format first.
You can do this using the 'Export to GGUF' tab in the Unsloth UI.
