In [6]:
# AI Agent with Memory using Ollama and Local Storage


# Install Ollama in Colab
!curl -fsSL https://ollama.com/install.sh | sh

# Start Ollama server
import subprocess
import time

subprocess.Popen(["ollama", "serve"])
time.sleep(10)  # wait for Ollama server to start

# Pull the Gemma 3  model (small and efficient)
!ollama pull gemma3

time.sleep(10)

# AI Agent with Local Memory and Ollama (LangGraph + LangChain)

# AI Agent with Local Memory using Ollama + LangChain + LangGraph

# 1. Install Required Packages
!pip install langchain langgraph requests

# 2. Imports
import json
import requests
from typing import Optional, List, TypedDict
from langchain.llms.base import LLM
from langchain.schema import HumanMessage, AIMessage
from langgraph.graph import StateGraph, END

# 3. Ollama LLM Wrapper with Streaming Fix
class OllamaLLM(LLM):
    model: str = "gemma3"
    temperature: float = 0.0
    endpoint: str = "http://localhost:11434"  # Ensure Ollama is running locally

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        response = requests.post(
            f"{self.endpoint}/api/generate",
            json={"model": self.model, "prompt": prompt, "temperature": self.temperature},
            stream=True
        )

        output = ""
        for line in response.iter_lines():
            if not line:
                continue
            try:
                data = json.loads(line.decode("utf-8"))
                if "response" in data:
                    output += data["response"]
            except json.JSONDecodeError:
                continue

        return output.strip()

    @property
    def _llm_type(self) -> str:
        return "ollama"

llm = OllamaLLM()

# 4. Local Memory Implementation
class LocalMemory:
    def __init__(self):
        self.messages = []

    def load(self):
        return self.messages.copy()

    def save(self, input_text, output_text):
        self.messages.append(HumanMessage(content=input_text))
        self.messages.append(AIMessage(content=output_text))

memory = LocalMemory()

# 5. Define Agent State Schema
class AgentState(TypedDict):
    input: str
    output: str

# 6. Response Function
def respond_with_memory(state: AgentState) -> AgentState:
    history = memory.load()
    input_text = state["input"]
    prompt = "\n".join([m.content for m in history] + [input_text])
    response = llm(prompt)
    memory.save(input_text, response)
    return {"output": response, "input": input_text}

# 7. Build LangGraph Workflow
workflow = StateGraph(AgentState)
workflow.add_node("respond", respond_with_memory)
workflow.set_entry_point("respond")
workflow.set_finish_point("respond")

agent = workflow.compile()

# 8. Run the Agent
print("Interaction 1:")
state = {"input": "What's the capital of France?"}
result = agent.invoke(state)
print("Agent:", result["output"])

print("\nInteraction 2:")
state = {"input": "Remind me what you just told me."}
result = agent.invoke(state)
print("Agent:", result["output"])

# 9. View Memory History
print("\n--- Memory Trace ---")
for msg in memory.load():
    role = "Human" if isinstance(msg, HumanMessage) else "AI"
    print(f"{role}: {msg.content}")



>>> Cleaning up old version at /usr/local/lib/ollama
>>> Installing ollama to /usr/local
>>> Downloading Linux amd64 bundle
######################################################################## 100.0%
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.
[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling aeda25e63ebd: 100% ▕▏ 3.3 GB                         [K
pulling e0a42594d802: 1