In [None]:
# 1. Install Ollama in Colab
!curl -fsSL https://ollama.com/install.sh | sh

# 2. Start Ollama server
import subprocess
import time

subprocess.Popen(["ollama", "serve"])
time.sleep(10)  # wait for Ollama server to start

# 3. Pull the Llama 4  model (small and efficient)
!ollama pull llama4

# 4. Install Python dependencies
!pip install -q langchain sentence-transformers ollama langchain-community

# 5. Imports
from langchain_community.llms import Ollama
from langchain.agents import initialize_agent, Tool, AgentType
from sentence_transformers import SentenceTransformer
import numpy as np

# 6. Semantic Search (S-BERT)
embed_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

documents = [
    "Artificial Intelligence is the future of technology.",
    "Machine learning algorithms help computers learn from data.",
    "Deep learning is a subset of machine learning."
]

doc_embeddings = np.array([embed_model.encode(doc) for doc in documents])

def search(query):
    query_embedding = embed_model.encode(query)
    similarities = np.dot(doc_embeddings, query_embedding) / (
        np.linalg.norm(doc_embeddings, axis=1) * np.linalg.norm(query_embedding)
    )
    return documents[np.argmax(similarities)]

# 7. Ollama LangChain setup with Llama 4
llm = Ollama(model="llama4")

tools = [
    Tool(
        name="Search",
        func=search,
        description="Searches semantically relevant documents."
    )
]

agent = initialize_agent(
    tools=tools,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    llm=llm,
    verbose=True,
    handle_parsing_errors=True,
)

# 8. Function to invoke the LangChain agent
def langchain_agent(query):
    return agent.invoke(query)

# 9. Test the LangChain agent
query_agent = "What is the future of Artificial Intelligence?"
print("LangChain Agent Response:")
print(langchain_agent(query_agent))

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
pulling 9d507a36062c: 100% ▕▏  67 GB                         [K
pulling 399a8a5a36db: 100% ▕▏ 7.8 KB                         [K
pulling 24ca191a372b: 100% ▕▏ 6.0 KB                         [K
pulling c8e18cc26ec5: 100% ▕▏ 1.1 KB                         [K
pulling fc1ffc71ab8e: 100% ▕▏ 1.6 KB                         [K
pulling cf2c03fa5aed: 100% ▕▏  713 B                         [K
verifying sha256 digest ⠇ [K[?25h[?2026l[?2026h[?25l[A[A[A[A[A[A[A[1Gpulling manifest [K
pulling 9d507a36062c: 100% ▕▏  67 GB                         [K
pulling 399a8a5a36db: 100% ▕▏ 7.8 KB                         [K
pulling 24ca191a372b: 100% ▕▏ 6.0 KB                         [K
pulling c8e18cc26ec5: 100% ▕▏ 1.1 KB                         [K
pulling fc1ffc71ab8e: 100% ▕▏ 1.6 KB                         [K
pulling cf2c03fa5aed: 100% ▕▏  713 B                         [K
verifying sha256 digest ⠏ [K[?25h[?2026l[?2026h

Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/3.51k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

  llm = Ollama(model="llama4")
  agent = initialize_agent(


LangChain Agent Response:


[1m> Entering new AgentExecutor chain...[0m


ValueError: Ollama call failed with status code 500. Details: {"error":"model requires more system memory (56.0 GiB) than is available (10.6 GiB)"}