<a href="https://colab.research.google.com/github/Luxadevi/Ollama-Colab-Integration/blob/main/Ollama_ColabV3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# ***OLLAMA COLAB INTEGRATION V3***
---
Seamless integration of Ollama, offering fully autonomous public endpoints. No need for additional software.

## Now with the awaited Ollama Companion

Build with gradio, Ollama companion is a web interface to make it easier to quickly manage Ollama.


### ***Ollama Companion provides following features***

* Modelfilde builder
* Dropdowns and information about all current models
* Modelfile viewer
* Model listing and overview
* Public endpoint with cloudflare
* Litellm proxy with polling of model list
* Adds models to LiteLLM config.yaml when creating models and restarts LiteLLM if running
* Proper logging for all services
* Up to-Date Model list from ollama 28-11-2023

In [None]:
#@title Installing Dependencies and Updates
# @markdown Check the box below to install CUDA 12-3 before proceeding.
# @markdown
# @markdown This CUDA installation typically takes around 5 minutes to complete and does not require a system reboot.
# @markdown The latest version of the code only needs this cell to run, but you can refer to the code below for reference.
# @markdown It initiates the Ollama serve process and deploys the Ollama Companion from the 'tools' directory.

install_cuda = False #@param {type:"boolean"}
# Define the log file path
log_file_path = "/content/install.log"

# Function to run a command and log its output
def run_command(command):
    description = " ".join(command.split()[1:])  # Use the entire command as the description
    print(f"Installing {description}...")
    with open(log_file_path, "a") as log_file:
        log_file.write(f"Installing {description}...\n")
        log_file.flush()  # Flush the buffer to ensure immediate writing to the log file

    # Run the command and capture its output (stdout and stderr)
    output = !{command}  # Capture the command output (stdout and stderr)
    with open(log_file_path, "a") as log_file:
        log_file.write("\n".join(output) + "\n")
        log_file.flush()  # Flush the buffer to ensure immediate writing to the log file

    print(f"Successfully installed {description}.")

if install_cuda:
    # CUDA installation commands
    cuda_commands = [
        "wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-ubuntu2204.pin",
        "mv cuda-ubuntu2204.pin /etc/apt/preferences.d/cuda-repository-pin-600",
        "wget https://developer.download.nvidia.com/compute/cuda/12.3.0/local_installers/cuda-repo-ubuntu2204-12-3-local_12.3.0-545.23.06-1_amd64.deb",
        "dpkg -i cuda-repo-ubuntu2204-12-3-local_12.3.0-545.23.06-1_amd64.deb",
        "cp /var/cuda-repo-ubuntu2204-12-3-local/cuda-*-keyring.gpg /usr/share/keyrings/",
        "apt-get update",
        "apt-get -y install cuda-toolkit",
        "apt-get -y install cuda",
    ]

    for cmd in cuda_commands:
        run_command(cmd)

    print("CUDA 12-3 installed successfully.")

else:
    print("CUDA installation skipped.")

# Other commands
other_commands = [
    "sudo apt install pciutils",
    "lspci",
    "wget https://ollama.ai/install.sh -O install.sh",
    "chmod +x install.sh",
    "./install.sh",
    "pip install asyncio flask_cloudflared gradio requests PyYAML > pip.log 2>&1",  # Redirect both stdout and stderr to pip.log
    "pip install -U litellm > litellm.log 2>&1",  # Redirect both stdout and stderr to litellm.log
    "git clone https://github.com/Luxadevi/Ollama-Colab-Integration.git /content/Ollama-Colab-Integration",
    "mv /content/Ollama-Colab-Integration/* /content/",
    "rm -rf /content/Ollama-Colab-Integration",
]

for cmd in other_commands:
    run_command(cmd)

# Start ollama.py in the background
print("Starting ollama.py in the background...")
!nohup python3 /content/tools/ollama.py &

# Run ollama_companion.py
print("Running ollama_companion.py...")
print("Public Endpoint Available in Companion Webui")
!python3 /content/tools/ollama_companion.py

## Run Ollama in Subprocess

In [None]:
import subprocess
import threading
import time
import logging.handlers
import httpx
import sys
import os

def create_logger(name, filename, level, formatter):
    logger = logging.getLogger(name)
    handler = logging.handlers.RotatingFileHandler(filename, maxBytes=5*1024*1024, backupCount=5)
    handler.setFormatter(formatter)
    logger.addHandler(handler)
    logger.setLevel(level)
    return logger

status_formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] [%(name)s] - %(message)s')
error_formatter = logging.Formatter('[%(asctime)s] [%(levelname)s] [%(name)s] - %(message)s')

loggers = {
    "Status": create_logger("Status", "status.log", logging.INFO, status_formatter),
    "OllamaStatus": create_logger("OllamaStatus", "ollama.log", logging.INFO, status_formatter),
    "Error": create_logger("Error", "error.log", logging.ERROR, error_formatter),
    "OllamaError": create_logger("OllamaError", "ollama_error.log", logging.ERROR, error_formatter)
}

class ProcessMonitor:
    def __init__(self):
        self.processes = {}
        self.is_monitoring = True

    def handle_output(self, process_name):
        process = self.processes[process_name]
        logger_status = loggers[f"{process_name.capitalize()}Status"]
        for line in iter(process.stdout.readline, b''):
            logger_status.info(line.decode().strip())

    def run_ollama(self):
        os.environ["OLLAMA_HOST"] = "0.0.0.0:11434"
        os.environ["OLLAMA_ORIGINS"] = "http://0.0.0.0:*"

        cmd = "ollama serve"
        # Redirect subprocess output to /dev/null
        with open(os.devnull, 'wb') as devnull:
            self.processes['ollama'] = subprocess.Popen(cmd, shell=True, stdout=devnull, stderr=devnull)
        loggers["OllamaStatus"].info(f"Started ollama with command: {cmd}")

    def monitor_process(self, process_name):
        while self.is_monitoring:
            if self.processes[process_name].poll() is not None:
                loggers["Status"].warning(f"{process_name} process has stopped. Restarting...")
                self.run_ollama()
            time.sleep(5)

    def start(self):
        self.run_ollama()
        threading.Thread(target=self.monitor_process, args=('ollama',)).start()

    def stop(self):
        self.is_monitoring = False
        for p in self.processes.values():
            p.terminate()

if __name__ == '__main__':
    monitor = ProcessMonitor()
    monitor.start()


## Ollama Companion

In [None]:
import gradio as gr
import json
import subprocess
import requests
import re  # Import the re module
from flask import Flask, request, Response
from flask_cloudflared import run_with_cloudflared
from threading import Thread
import time
import yaml
import os
import yaml

litellm_proxycmd = "PYTHONUNBUFFERED=1 litellm --config ./config.yaml >> litellmlog 2>&1 &"

polling_active = False
endpointcmd = "PYTHONUNBUFFERED=1 python3 /content/endpoint.py >> endpoint.log 2>&1 &"
kill_endpointcmd = "pkill -f '/content/endpoint.py'"


# Global variables for dropdown options
option_1_global = None
option_2_global = None
cloudflare_url = None
# Fetching models data from the URL
url = "https://raw.githubusercontent.com/Luxadevi/Ollama-Colab-Integration/main/models.json"
response = requests.get(url)
json_data = response.json()

# Structuring the data
options_1 = list(json_data.keys())  # ['mistral', 'llama2', 'codellama', ...]
options_2 = json_data  # The entire JSON data

# Parameters with their default values and ranges
parameters = {
    'mirostat': [0, [0, 1, 2]],  # Dropdown
    'mirostat_eta': [0.1, (0.0, 1.0)],
    'mirostat_tau': [0.1, (0.0, 1.0)],
    'num_ctx': [4096, (1024, 8192)],
    'num_gqa': [256, (128, 512)],
    'num_gpu': [1, (1, 250)],
    'num_thread': [1, (1, 30)],
    'repeat_last_n': [0, (0, 32000)],
    'repeat_penalty': [1.0, (0.5, 2.0)],
    'temperature': [0.8, (0.1, 1.0)],
    'seed': [None, (0, 10000)],  # None indicates no default value
    'tfs_z': [1, (1, 20)],  # Slider from 1 to 20
    'num_predict': [256, (128, 512)],
    'top_k': [0, (0, 100)],
    'top_p': [1.0, (0.1, 1.0)]
}

def initialize_log_files():
    log_files = ["litellmlog", "endpoint.log", "endpoint_openai.log"]
    content_dir = '/content/'  # Directory path to /content/

    for log_file in log_files:
        log_file_path = os.path.join(content_dir, log_file)

        if not os.path.exists(log_file_path):
            open(log_file_path, 'w').close()
            print(f"Created log file: {log_file_path}")
        else:
            print(f"Log file already exists: {log_file_path}")

def is_litellm_running():
    """Check if LiteLLM is currently running."""
    try:
        result = subprocess.run(["pgrep", "-f", "litellm --config"], capture_output=True, text=True)
        return result.stdout != ""
    except Exception as e:
        print(f"Error checking if LiteLLM is running: {e}")
        return False

def restart_litellm():
    """Restart the LiteLLM process."""
    try:
        kill_litellm_proxy()
        time.sleep(5)
        start_litellm_proxy_and_read_log()
        print("LiteLLM proxy restarted successfully.")
    except Exception as e:
        print(f"Error restarting LiteLLM: {e}")

def kill_litellm_proxy():
    try:
        # Command to kill the LiteLLM proxy process
        kill_command = "pkill -f 'litellm --config'"

        # Execute the kill command
        os.system(kill_command)

        return "LiteLLM proxy process terminated."
    except Exception as e:
        return f"Error: {str(e)}"
def start_litellm_proxy_and_read_log():
    try:
        # Start the LiteLLM proxy using subprocess
        subprocess.Popen(litellm_proxycmd, shell=True)

        # Wait for some time for the proxy to start and log
        time.sleep(15)

        # Read the log file and search for specific lines
        log_file_path = "/content/litellmlog"
        with open(log_file_path, "r") as log_file:
            lines = log_file.readlines()

        # Find and return the relevant lines
        for i, line in enumerate(lines):
            if "LiteLLM: Proxy initialized with Config, Set models:" in line:
                # Assuming the model names are listed in the following lines
                model_lines = [lines[i + j].strip() for j in range(1, len(lines) - i) if lines[i + j].strip()]
                return "\n".join([line.strip()] + model_lines)

        return "Relevant log information not found."
    except Exception as e:
        return f"Error: {str(e)}"

def poll_api():
    global polling_active
    while polling_active:
        response = requests.get("http://127.0.0.1:11434/api/tags")
        if response.status_code == 200:
            json_data = response.json()
            model_names = [model['name'] for model in json_data.get('models', [])]
            update_config_file(model_names)
        time.sleep(15)

def start_polling():
    global polling_active
    polling_active = True
    threading.Thread(target=poll_api).start()
    return "Polling started"

def stop_polling():
    global polling_active
    polling_active = False
    return "Polling stopped"
def update_config_file(model_names):
    config_file_path = "/content/config.yaml"

    # Read the existing content of the config file
    with open(config_file_path, "r") as file:
        try:
            config = yaml.safe_load(file) or {}
        except yaml.YAMLError as e:
            print(f"Error reading config file: {e}")
            return

    # Ensure 'model_list' key exists in the configuration
    if 'model_list' not in config:
        config['model_list'] = []

    existing_models = {model['model_name'] for model in config['model_list']}
    needs_update = False

    # Update the 'model_list' with new models
    for model_name in model_names:
        full_model_name = f"ollama/{model_name}"
        if full_model_name not in existing_models:
            entry = {
                'model_name': full_model_name,
                'litellm_params': {
                    'model': full_model_name,
                    'api_base': "http://127.0.0.1:11434",
                    'json': True
                }
            }
            config['model_list'].append(entry)
            existing_models.add(full_model_name)
            needs_update = True

    # Write the updated content back to the YAML file and restart LiteLLM if necessary
    if needs_update:
        with open(config_file_path, "w") as file:
            yaml.dump(config, file, default_flow_style=False, sort_keys=False)
        if is_litellm_running():
            restart_litellm()


def start_openai_proxy():
    try:
        # Specify the command to start the OpenAI proxy endpoint
        openai_endpointcmd = "PYTHONUNBUFFERED=1 python3 /content/endpointopenai.py >> endpoint_openai.log 2>&1 &"

        # Start the OpenAI proxy endpoint
        subprocess.Popen(openai_endpointcmd, shell=True)

        # Wait for 15 seconds (adjust as needed)
        time.sleep(15)

        # Read the last 2 lines from the endpoint_openai.log file
        log_file_path = "/content/endpoint_openai.log"
        with open(log_file_path, "r") as log_file:
            lines = log_file.readlines()
            last_2_lines = "".join(lines[-2:])  # Concatenate the last 2 lines

        return last_2_lines
    except Exception as e:
        return f"Error: {str(e)}"
def start_endpoint_and_get_last_2_lines():
    try:
        # Start the Flask endpoint (use subprocess.Popen as before)
        subprocess.Popen(endpointcmd, shell=True)

        # Wait for 15 seconds (adjust as needed)
        time.sleep(15)

        # Read the last 2 lines from the endpoint.log file
        log_file_path = "/content/endpoint.log"
        with open(log_file_path, "r") as log_file:
            lines = log_file.readlines()
            last_2_lines = "".join(lines[-2:])  # Concatenate the last 2 lines

        return last_2_lines
    except Exception as e:
        return f"Error: {str(e)}"
def kill_endpoint():
    try:
        # Specify the commands to kill both processes
        kill_endpointcmd = "pkill -f '/content/endpoint.py'"
        kill_openai_endpointcmd = "pkill -f '/content/endpointopenai.py'"

        # Execute the kill commands for both processes
        os.system(kill_endpointcmd)
        os.system(kill_openai_endpointcmd)

        return "Endpoints killed successfully."
    except Exception as e:
        return f"Error: {str(e)}"



def build_curl_command(model_name, modelfile_content, stop_sequence, *args):
    try:
        # Check if 'FROM' is present in the modelfile_content
        if 'FROM' not in modelfile_content:
            modelfile_content = f"FROM {option_1_global}:{option_2_global}" + modelfile_content

        for param, value in zip(parameters.keys(), args):
            default = parameters[param][0]
            if value != default:
                if param == 'mirostat':
                    modelfile_content += f"\nPARAMETER {['disabled', 'Mirostat 1', 'Mirostat 2.0'][value]}"
                else:
                    modelfile_content += f"\nPARAMETER {param} {value}"

        if stop_sequence:  # Add stop sequence if provided
            modelfile_content += f"\nPARAMETER stop {stop_sequence}"

        data = {
            "name": model_name,
            "modelfile": modelfile_content
        }
        curl_command = f"curl http://localhost:11434/api/create -d '{json.dumps(data)}'"
        process = subprocess.run(curl_command, shell=True, capture_output=True, text=True)
        return curl_command, process.stdout or process.stderr
    except Exception as e:
        return "", f"Error: {str(e)}"
def create_model_manually(model_name, modelfile_content, stream_response):
    try:
        data = {
            "name": model_name,
            "modelfile": modelfile_content,
            "stream": stream_response
        }
        response = requests.post("http://localhost:11434/api/create", json=data)
        return response.json()
    except Exception as e:
        return {"curl_command": "", "execution_output": f"Error: {str(e)}"}
def show_model_details(model_name):
    data = {"name": model_name}
    curl_command = f"curl http://localhost:11434/api/show -d '{json.dumps(data)}'"
    process = subprocess.run(curl_command, shell=True, capture_output=True, text=True)
    output = process.stdout or process.stderr

    try:
        # Parse the JSON data
        json_data = json.loads(output)

        # Extracting the specific keys
        license_info = json_data.get('license', 'Not available')
        modelfile_info = json_data.get('modelfile', 'Not available')
        parameters_info = json.dumps(json_data.get('parameters', {}), indent=4)
        template_info = json_data.get('template', 'Not available')

        return license_info, modelfile_info, parameters_info, template_info
    except json.JSONDecodeError:
        # Return a tuple with error message if it's not valid JSON
        return (output, "", "", "")
def list_models():
    url = "http://127.0.0.1:11434/api/tags"
    response = requests.get(url)
    models = response.json().get('models', [])
    return "\n".join([model['name'] for model in models])
def create_model_manually(model_name, modelfile_content, stream_response):
    try:
        data = {
            "name": model_name,
            "modelfile": modelfile_content,
            "stream": stream_response
        }
        response = requests.post("http://localhost:11434/api/create", json=data)
        return response.json()
    except Exception as e:
        return {"curl_command": "", "execution_output": f"Error: {str(e)}"}

def main():
    initialize_log_files()
    with gr.Blocks(theme='ParityError/LimeFace') as app:

        gr.Markdown("Ollama Companion")

        with gr.Tab("ModelFile Templater"):
            with gr.Row():
                model_name = gr.Textbox(label="Model Name", placeholder="Enter model name")
                modelfile_content_input = gr.Textbox(lines=10, label="Modelfile Content", placeholder="Enter manual modelfile content")
                stop_sequence = gr.Textbox(label="Stop Sequence", placeholder="Enter stop sequence")
                d1 = gr.Dropdown(choices=options_1, label="Model-Provider")
                d2 = gr.Dropdown([])

                def update_second(first_val):
                    d2 = gr.Dropdown(options_2[first_val])
                    return d2

                d1.input(update_second, d1, d2)

                outputs = gr.Textbox()

                def print_results(option_1, option_2):
                    global option_1_global, option_2_global  # Declare them as global
                    option_1_global = option_1  # Update global variable
                    option_2_global = option_2  # Update global variable
                    return f"You selected '{option_1}:{option_2}' in the second dropdown."

                d2.input(print_results, [d1, d2], outputs)

            parameter_inputs = []
            for param, (default, range_) in parameters.items():
                if isinstance(range_, list):  # Dropdown parameter
                    parameter_inputs.append(gr.Dropdown(label=param, choices=range_, value=default))
                elif range_ is None:  # Boolean parameter
                    parameter_inputs.append(gr.Checkbox(label=param, value=default))
                elif isinstance(range_, tuple):  # Numeric parameter with a range
                    parameter_inputs.append(
                        gr.Slider(label=param, minimum=range_[0], maximum=range_[1], value=default))

            submit_button = gr.Button("Build and deploy Model")
            curl_command_output = gr.Textbox(label="API Call")
            execution_output = gr.Textbox(label="Execution Output", interactive=False)

            submit_button.click(
                build_curl_command,
                inputs=[model_name, modelfile_content_input, stop_sequence] + parameter_inputs,
                outputs=[curl_command_output, execution_output]
            )
        with gr.Tab("Model Info"):
            with gr.Row():
                model_name_input = gr.Textbox(label="Model Name", placeholder="Enter model name for details")
                model_info_button = gr.Button("Get Model Info")
                model_list_button = gr.Button("List All Models")

            license_output = gr.Textbox(label="License", interactive=False)
            modelfile_output = gr.Textbox(label="Modelfile", interactive=False)
            parameters_output = gr.Textbox(label="Parameters", interactive=False)
            template_output = gr.Textbox(label="Template", interactive=False)
            model_list_output = gr.Textbox(label="List of Models", interactive=False)

            model_info_button.click(
                fn=show_model_details,
                inputs=[model_name_input],
                outputs=[license_output, modelfile_output, parameters_output, template_output]
            )

            model_list_button.click(fn=list_models, inputs=[], outputs=[model_list_output])
        with gr.Tab("Public Endpoint"):
            # Button to start the original endpoint
            start_endpoint_button = gr.Button("Start Public Endpoint")

            # Text box to display the last 2 lines
            last_2_lines_output = gr.Textbox(label="Last 2 Lines", interactive=False)

            # Set the action for the button click
            start_endpoint_button.click(start_endpoint_and_get_last_2_lines, inputs=[], outputs=[last_2_lines_output])

            # Button to start the OpenAI proxy endpoint
            start_openai_button = gr.Button("Start Public OpenAI Endpoint")

            # Text box to display the last 2 lines for the OpenAI proxy endpoint
            openai_last_2_lines_output = gr.Textbox(label="OpenAI Last 2 Lines", interactive=False)

            # Set the action for the button click
            start_openai_button.click(start_openai_proxy, inputs=[], outputs=[openai_last_2_lines_output])

            # Button to kill the endpoint
            kill_endpoint_button = gr.Button("Kill Both Endpoints")

            # Set the action for the button click
            kill_endpoint_button.click(kill_endpoint, inputs=[], outputs=[last_2_lines_output])
        with gr.Tab("LiteLLM-Proxy"):
            # Textboxes for displaying logs and status
            litellm_log_output = gr.Textbox(label="LiteLLM Log Output", interactive=False, lines=10)
            litellm_kill_status = gr.Textbox(label="LiteLLM Kill Status", interactive=False, lines=10)
            polling_status = gr.Textbox(label="Polling Status", interactive=False, lines=10)

            # Buttons for starting, stopping, and killing the proxy
            with gr.Row():
                start_litellm_button = gr.Button("Start LiteLLM Proxy")
                kill_litellm_button = gr.Button("Kill LiteLLM Proxy")
                start_polling_button = gr.Button("Start Polling")
                stop_polling_button = gr.Button("Stop Polling")

            # Link the buttons to their respective functions
            start_litellm_button.click(
                fn=start_litellm_proxy_and_read_log,
                inputs=[],
                outputs=[litellm_log_output]
            )

            kill_litellm_button.click(
                fn=kill_litellm_proxy,
                inputs=[],
                outputs=[litellm_kill_status]
            )

            start_polling_button.click(
                fn=start_polling,
                inputs=[],
                outputs=[polling_status]
            )

            stop_polling_button.click(
                fn=stop_polling,
                inputs=[],
                outputs=[polling_status]
            )


    app.launch(share=True, display_in_cell=False)



if __name__ == "__main__":
    main()

