## ALL Imports


In [6]:
!pip install agno
!pip install mistralai
!pip install mem0ai
!pip install duckduckgo-search

Collecting duckduckgo-search
  Downloading duckduckgo_search-8.0.4-py3-none-any.whl.metadata (16 kB)
Collecting primp>=0.15.0 (from duckduckgo-search)
  Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading duckduckgo_search-8.0.4-py3-none-any.whl (18 kB)
Downloading primp-0.15.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m53.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: primp, duckduckgo-search
Successfully installed duckduckgo-search-8.0.4 primp-0.15.0


In [13]:
import os
from mistralai import Mistral
import base64

from agno.agent import Agent, RunResponse
from agno.tools.duckduckgo import DuckDuckGoTools
from agno.models.google import Gemini

import time
from mem0 import MemoryClient
from mem0.client.main import APIError

In [18]:
from google.colab import userdata
MISTRAL_API_KEY = userdata.get("MISTRAL_KEY")

GEMINI_API_KEY = userdata.get("GEMINI_KEY")

MEMO_API_KEY = userdata.get("MEM0_KEY")

## OCR Scanning and Markdown generation using ***Mistral Agent***

In [9]:
client = Mistral(api_key = MISTRAL_API_KEY)

""" Manual Updation of PDF """
# uploaded_pdf = client.files.upload(
#     file={
#         "file_name": "uploaded_file.pdf",
#         "content": open("/content/Omm_Devgoswami_Resume_2025.pdf", "rb"),
#     },
#     purpose="ocr"
# )

# print("File uploaded successfully:", uploaded_pdf.id)
# file_url = client.files.get_signed_url(file_id = uploaded_pdf.id)

# response = client.ocr.process(
#     model = "mistral-ocr-latest",
#     document = {
#         "type" :"document_url",
#         "document_url" : file_url.url
#         },
#     include_image_base64 = True
# )


""" Using PDF-link for Process the PDF """
ocr_response = client.ocr.process(
    model="mistral-ocr-latest",
    document={
        "type": "document_url",
        "document_url": "https://ncert.nic.in/textbook/pdf/lebo104.pdf"
    },
    include_image_base64=True
)


def data_uri_to_bytes(data_uri):
  """ Cleaning the scanned data to remove the unwanted parts """
  _, encoded = data_uri.split("," ,1)
  return base64.b64decode(encoded)

def export_image(images):
  """ Scanning and storing all the images present in the PDF """
  parsed_image = data_uri_to_bytes(images.image_base64)
  with open(image.id, "wb") as image_file:
    image_file.write(parsed_image)

with open("digital_notes.md", "w") as file:
  """ Creating a Markdown from the scanned content """
  for page in ocr_response.pages:
    file.write(page.markdown)
    for image in page.images:
        export_image(image)

/usr/local/lib/python3.11/dist-packages/mistralai/models/documenturlchunk.py:38: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  for n, f in self.model_fields.items():
/usr/local/lib/python3.11/dist-packages/mistralai/models/ocrrequest.py:94: PydanticDeprecatedSince211: Accessing the 'model_fields' attribute on the instance is deprecated. Instead, you should access this attribute from the model class. Deprecated in Pydantic V2.11 to be removed in V3.0.
  for n, f in self.model_fields.items():


# Using ***Agno-AI to refine the markdown data*** and transfoming it into Flash-cards, Quizzes and Summaries !!

In [10]:
model = Gemini(
    id="gemini-2.0-flash-exp",
    name="Buddy",
    api_key=GEMINI_API_KEY,
    temperature=0.7,
    frequency_penalty=0.0,
    presence_penalty=0.0,
)

AGNO_PROMPT = """
You are Agno AI, the core semantic brain behind NeoScribe — a smart assistant that transforms messy handwritten academic notes into clean, structured digital learning content.

Your task is to:
1. Analyze the input academic content and output the following:
   - Main Topic
   - Clear Subtopics
   - One-line Summary per Subtopic
   - Prerequisite Concepts
   - Learning Difficulty (Beginner / Intermediate / Advanced)
   - Insights or Warnings (if applicable)
2. If the content contains image references or links:
   - Attempt to preserve them in markdown using `![Alt Text](path_or_description)` syntax.
   - If unable to embed, just list them as:
     **Image X:** [brief description]
   - Append all image descriptions at the end under `## Referenced Images`.

3. Format everything in markdown for readability and digital publishing.
4. Use concise, clear academic language.

In addition, you should:
- Generate a few flashcards with Q&A format.
- Include 3 - 4 possible quiz MCQs from the content.
- Provide a short note summary for memorization purposes.

Flash card format:
Q: What is ... ?
A: The Answer is ...

Quiz format:
Q: Which of the following is true about ...?
Option: A) Option 1
B) Option 2
C) Option 3 D) Option 4
A: The correct answer is B) Option 2.

Use the DuckDuckGo tool to search for any additional information needed to clarify concepts or provide context.
Mkae sure to include all relevant information in your response.

Everything should be markdown-friendly and easy to parse by an external memory engine.
"""

agent = Agent(
    model=model,
    description=AGNO_PROMPT,
    tools=[DuckDuckGoTools()],
    show_tool_calls=True,
    markdown=True,
)

with open("digital_notes.md", "r", encoding="utf-8", errors="replace") as f:
    markdown_content = f.read()

agent.print_response(
    "Analyze the following academic content and provide a structured summary:\n\n" + markdown_content,
    stream=True,
)


""" Retriving the data and storing it as a defined markdown data """
response: RunResponse = agent.run(
    "Analyze the following academic content and provide a structured summary:\n\n" + markdown_content,
    stream=False
)

agno_output = response.content

with open("quick_notes.md", "w", encoding="utf-8") as out:
    out.write(agno_output)


Output()

# Mem0 Integration : Storing the Information

In [19]:
AGNO_OUTPUT_PATH = "quick_notes.md"

client = MemoryClient(api_key = MEMO_API_KEY)
user_id = "neoscribe"

def parse_agno_output(text):
    sections = {}
    current_key = None
    for line in text.splitlines():
        line = line.strip()
        if line.startswith("## "):
            current_key = line[3:].strip()
            sections[current_key] = ""
        elif current_key:
            sections[current_key] += line + "\n"
    return sections

def load_markdown_content(filepath):
    with open(filepath, "r", encoding="utf-8") as f:
        return f.read()

def format_for_mem0(parsed):
    messages = []
    for qna in parsed.get("Flashcards", "").split("\n\n"):
        if qna.startswith("Q:") and "A:" in qna:
            q, a = qna.split("\nA:", 1)
            messages.append({"role": "user", "content": q.strip()})
            messages.append({"role": "assistant", "content": "A:" + a.strip()})

    quiz_section = parsed.get("Quiz MCQs", "").strip()
    if quiz_section:
        messages.append({"role": "user", "content": "Add these MCQ-style quiz questions:"})
        messages.append({"role": "assistant", "content": quiz_section})

    summary = parsed.get("Note Summary", "").strip()
    if summary:
        messages.append({"role": "user", "content": "Summarize the key concepts in a short note."})
        messages.append({"role": "assistant", "content": summary})

    return messages

def chunked(iterable, size):
    """Yield successive chunks from iterable."""
    for i in range(0, len(iterable), size):
        yield iterable[i:i + size]

def push_to_mem0(messages, user_id, batch_size=5):
    client = MemoryClient(api_key=MEMO_API_KEY)
    for i, batch in enumerate(chunked(messages, batch_size)):
        try:
            print(f"📤 Uploading batch {i + 1}/{(len(messages) + batch_size - 1) // batch_size}...")
            client.add(batch, user_id=user_id)
            time.sleep(1)  # slight delay to avoid rate limiting
        except APIError as e:
            print(f"❌ Error uploading batch {i + 1}: {e}")
    client.add(messages, user_id=user_id)
    print(f"✅ Successfully added {len(messages)//2} knowledge units to Mem0 for user '{user_id}'.")

def run_pipeline():
    content = load_markdown_content(AGNO_OUTPUT_PATH)
    parsed = parse_agno_output(content)
    messages = format_for_mem0(parsed)
    push_to_mem0(messages, user_id)

if __name__ == "__main__":
    run_pipeline()


📤 Uploading batch 1/2...


  return func(*args, **kwargs)


📤 Uploading batch 2/2...
✅ Successfully added 4 knowledge units to Mem0 for user 'neoscribe'.


# Mem0 - For Viewing and Accessing the Data

In [20]:
from rich.console import Console
from rich.panel import Panel
from rich.prompt import Prompt
from rich.markdown import Markdown
from rich import box

USER_ID = "neoscribe"

client = MemoryClient(api_key = MEMO_API_KEY)
console = Console()

def display_memories(results):
    if not results:
        console.print("[bold red]⚠ No memories found.[/bold red]")
        return

    for idx, item in enumerate(results, 1):
        role = item.get("role", "user").capitalize()
        content = item.get("memory", "No content available")

        console.print(Panel.fit(
            Markdown(content),
            title=f"[bold cyan]{role}[/bold cyan] Entry #{idx}",
            border_style="magenta",
            box=box.ROUNDED,
            padding=(1, 2),
        ))

def search_loop():
    console.print("[bold magenta]🔍 Welcome to NeoScribe Memory Viewer[/bold magenta]")
    console.print("Type a topic or keyword to search your stored flashcards, notes, or quizzes.")
    console.print("[grey70]Type 'exit' to quit.[/grey70]\n")

    while True:
        query = Prompt.ask("🔎 Enter your search query")
        if query.lower() in ["exit", "quit", "q"]:
            console.print("\n[bold green]👋 Exiting NeoScribe Memory Viewer.[/bold green]")
            break

        try:
            results = client.search(query=query, user_id=USER_ID, limit=10)
            display_memories(results)
        except Exception as e:
            console.print(f"[red]❌ Error searching memory:[/red] {e}")

if __name__ == "__main__":
    search_loop()


Evolution


Genes


q
