<a href="https://colab.research.google.com/gist/Gholamrezadar/6914bba2e246bbf0e82f3e932f6729e2/ghd-colab-ollama.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This notebook shows how to install and run ollama on google colab + tunneling using ngrok.

set `NGROK_TOKEN` in colab secrets panel.

Gholamreza Dar 2024

In [None]:
# Installing Ollama
!curl -fsSL https://ollama.com/install.sh | sh

In [None]:
# needed for tunneling via ngrok
%env OLLAMA_HOST=0.0.0.0

In [None]:
# Running Ollama and serving on localhost:11434
!nohup ollama serve &

In [None]:
# Downloading a model from https://ollama.com/search
!ollama pull llama3.2:1b

In [None]:
# Check the available models
!ollama list

NAME           ID              SIZE      MODIFIED       
llama3.2:1b    baf6a787fdff    1.3 GB    16 minutes ago    


## Curl

In [None]:
# Test the api using curl locally
!curl http://localhost:11434/api/generate -d '{"model": "llama3.2:1b","prompt":"Question: Who was the first president of the United States? \n Only answer using a few words. maybe just a name Answer: "}'

{"model":"llama3.2:1b","created_at":"2024-12-10T06:23:06.818957955Z","response":"George","done":false}
{"model":"llama3.2:1b","created_at":"2024-12-10T06:23:07.012560323Z","response":" Washington","done":false}
{"model":"llama3.2:1b","created_at":"2024-12-10T06:23:07.207398347Z","response":"","done":true,"done_reason":"stop","context":[128006,9125,128007,271,38766,1303,33025,2696,25,6790,220,2366,18,271,128009,128006,882,128007,271,14924,25,10699,574,279,1176,4872,315,279,3723,4273,30,720,8442,4320,1701,264,2478,4339,13,7344,1120,264,836,22559,25,220,128009,128006,78191,128007,271,40052,6652],"total_duration":7380048972,"load_duration":3349295750,"prompt_eval_count":52,"prompt_eval_duration":3634000000,"eval_count":3,"eval_duration":394000000}


## Langchain


In [None]:
!pip install -qU langchain-ollama

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from langchain_ollama.llms import OllamaLLM

model = OllamaLLM(model="llama3.2:1b")
model.invoke("Who is the best soccer player? (Only answer using a few word. no explanations. do not put a period at the end.)")

'Lionel Messi'

In [None]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama.llms import OllamaLLM

template = """Question: {question}
Only answer using a few words. maybe just a name
Answer: """
prompt = ChatPromptTemplate.from_template(template)
model = OllamaLLM(model="llama3.2:1b")
chain = prompt | model
chain.invoke({"question": "Who was the first president of the United States?"})

'George Washington.'

## Expose the API publicly using ngrok

In [None]:
!pip install -qU pyngrok

In [None]:
from google.colab import userdata
from pyngrok import ngrok, conf

# get NGROK_TOKEN from colab secrets
ngrok_token = userdata.get('NGROK_TOKEN')
if not ngrok_token:
    raise ValueError("NGROK_TOKEN secret not found. Please add it to Colab secrets.")

# Set the ngrok auth token using Python
conf.get_default().auth_token = ngrok_token
ngrok.set_auth_token(ngrok_token)

# Expose Ollama server via ngrok on port 11434
public_url = ngrok.connect("http://localhost:11434")
print(f"Ollama server public URL: {public_url.public_url}")


Ollama server public URL: https://fb69-35-230-17-154.ngrok-free.app


In [None]:
# !ngrok http http://localhost:11434