In [3]:
import re
import uuid
import time
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from collections import defaultdict

import requests
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, Range, SearchRequest
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx

# Constants
COLLECTION_NAME = "readme_sections"
VECTOR_SIZE = 768  # Size of nomic-embed-text embeddings
OLLAMA_API_URL = "http://localhost:11434/api/embeddings"

@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]

class READMEProcessor:
    def __init__(self):
        self.qdrant_client = QdrantClient("localhost", port=6333)
        self._setup_collection()
        self.tfidf_vectorizer = TfidfVectorizer()

    def _setup_collection(self):
        if not self.qdrant_client.get_collection(COLLECTION_NAME):
            self.qdrant_client.create_collection(
                collection_name=COLLECTION_NAME,
                vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE)
            )

    def _get_embedding(self, text: str) -> List[float]:
        response = requests.post(OLLAMA_API_URL, json={
            "model": "nomic-embed-text",
            "prompt": text
        })
        response.raise_for_status()
        return response.json()['embedding']

    def parse_readme(self, content: str) -> List[ReadmeSection]:
        html = markdown.markdown(content)
        soup = BeautifulSoup(html, 'html.parser')
        sections = []
        section_stack = []
        current_section = None

        for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
            if elem.name.startswith('h'):
                level = int(elem.name[1])
                while section_stack and section_stack[-1].level >= level:
                    section_stack.pop()

                parent = section_stack[-1] if section_stack else None
                current_section = ReadmeSection(
                    content=elem.text,
                    heading=elem.text,
                    level=level,
                    parent=parent.heading if parent else None,
                    children=[],
                    metadata={}
                )
                if parent:
                    parent.children.append(current_section.heading)
                sections.append(current_section)
                section_stack.append(current_section)
            else:
                if current_section:
                    current_section.content += "\n" + elem.text

        return sections

    def process_readme(self, content: str):
        sections = self.parse_readme(content)
        section_graph = self._build_section_graph(sections)
        for section in sections:
            self._add_section_to_qdrant(section, section_graph)

    def _build_section_graph(self, sections: List[ReadmeSection]) -> nx.DiGraph:
        G = nx.DiGraph()
        for section in sections:
            G.add_node(section.heading, level=section.level)
            if section.parent:
                G.add_edge(section.parent, section.heading)
        return G

    def _add_section_to_qdrant(self, section: ReadmeSection, section_graph: nx.DiGraph):
        vector = self._get_embedding(section.content)
        point_id = str(uuid.uuid4())
        timestamp = time.time()

        # Calculate centrality and other graph-based features
        centrality = nx.degree_centrality(section_graph)[section.heading]
        depth = nx.shortest_path_length(section_graph, source=list(section_graph.nodes)[0], target=section.heading)

        payload = {
            "content": section.content,
            "heading": section.heading,
            "level": section.level,
            "parent": section.parent,
            "children": section.children,
            "metadata": {
                **section.metadata,
                "timestamp": timestamp,
                "centrality": centrality,
                "depth": depth,
                "access_count": 0,
                "relevance_score": 1.0
            }
        }

        self.qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, vector=vector, payload=payload)]
        )

    def search_sections(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        query_vector = self._get_embedding(query)

        # Perform semantic search
        search_result = self.qdrant_client.search(
            collection_name=COLLECTION_NAME,
            query_vector=query_vector,
            limit=top_k * 2  # Retrieve more results for re-ranking
        )

        # Extract contents for TF-IDF re-ranking
        contents = [hit.payload['content'] for hit in search_result]
        tfidf_matrix = self.tfidf_vectorizer.fit_transform([query] + contents)
        
        # Calculate TF-IDF similarities
        tfidf_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])[0]
        
        # Combine semantic and TF-IDF scores
        combined_scores = [(hit, 0.7 * hit.score + 0.3 * tfidf_sim) 
                           for hit, tfidf_sim in zip(search_result, tfidf_similarities)]
        
        # Sort by combined score and take top_k
        combined_scores.sort(key=lambda x: x[1], reverse=True)
        top_results = combined_scores[:top_k]

        results = []
        for hit, score in top_results:
            section = hit.payload
            section['score'] = score
            self._update_section_relevance(hit.id, score)
            results.append(section)

        return results

    def _update_section_relevance(self, point_id: str, score: float):
        current_payload = self.qdrant_client.retrieve(COLLECTION_NAME, [point_id])[0].payload
        current_payload['metadata']['access_count'] += 1
        current_payload['metadata']['relevance_score'] = (current_payload['metadata']['relevance_score'] + score) / 2

        self.qdrant_client.upsert(
            collection_name=COLLECTION_NAME,
            points=[PointStruct(id=point_id, payload=current_payload)]
        )

    def get_context(self, section_heading: str, depth: int = 1) -> Dict[str, Any]:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match={'value': section_heading})]
        )
        results = self.qdrant_client.scroll(
            collection_name=COLLECTION_NAME,
            scroll_filter=filter_condition,
            limit=1
        )
        if not results.points:
            return {}

        section = results.points[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }

        if section['parent']:
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match={'value': section['parent']})]
            )
            parent_results = self.qdrant_client.scroll(
                collection_name=COLLECTION_NAME,
                scroll_filter=parent_filter,
                limit=1
            )
            if parent_results.points:
                context["parent"] = parent_results.points[0].payload

        if depth > 0:
            for child_heading in section['children']:
                child_context = self.get_context(child_heading, depth - 1)
                if child_context:
                    context["children"].append(child_context["current"])

            if context["parent"]:
                for sibling_heading in context["parent"]["children"]:
                    if sibling_heading != section_heading:
                        sibling_context = self.get_context(sibling_heading, 0)
                        if sibling_context:
                            context["siblings"].append(sibling_context["current"])

        return context

    def prune_sections(self, threshold: float = 0.5, max_age_days: int = 30):
        current_time = time.time()
        max_age_seconds = max_age_days * 24 * 60 * 60

        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="metadata.relevance_score",
                    range=Range(lt=threshold)
                ),
                FieldCondition(
                    key="metadata.timestamp",
                    range=Range(lt=current_time - max_age_seconds)
                )
            ]
        )

        self.qdrant_client.delete(
            collection_name=COLLECTION_NAME,
            points_selector=filter_condition
        )

# FastAPI app
app = FastAPI()
readme_processor = READMEProcessor()

@app.post("/process_readme")
async def process_readme(file: UploadFile = File(...)):
    content = await file.read()
    readme_processor.process_readme(content.decode())
    return {"message": "README processed successfully"}

@app.post("/search")
async def search(query: str, top_k: int = 5):
    results = readme_processor.search_sections(query, top_k)
    return {"results": results}

@app.get("/context/{section_heading}")
async def get_context(section_heading: str, depth: int = 1):
    context = readme_processor.get_context(section_heading, depth)
    return {"context": context}

@app.post("/prune")
async def prune(threshold: float = 0.5, max_age_days: int = 30):
    readme_processor.prune_sections(threshold, max_age_days)
    return {"message": "Pruning completed successfully"}

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:httpx:HTTP Request: GET http://localhost:6333/collections/readme_sections "HTTP/1.1 404 Not Found"


UnexpectedResponse: Unexpected Response: 404 (Not Found)
Raw response content:
b'{"status":{"error":"Not found: Collection `readme_sections` doesn\'t exist!"},"time":0.000011626}'

In [None]:
import os
import uuid
import time
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass

import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, Range
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant client
qdrant_client = QdrantClient(host="localhost", port=6333)

# Constants
COLLECTION_NAME = "advanced_readme_sections"
VECTOR_SIZE = 768

# Create collection if it doesn't exist
try:
    qdrant_client.get_collection(COLLECTION_NAME)
    logger.info(f"Collection '{COLLECTION_NAME}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME}'.")
    qdrant_client.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.EUCLID)
    )

# Initialize FastAPI app
app = FastAPI()

@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: List[float] = None

def get_embedding(text: str) -> np.ndarray:
    OLLAMA_API_URL = "http://localhost:11434/api/embeddings"
    response = requests.post(OLLAMA_API_URL, json={
        "model": "nomic-embed-text",
        "prompt": text
    })
    response.raise_for_status()
    return np.array(response.json()['embedding'])

def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            parent = section_stack[-1] if section_stack else None
            current_section = ReadmeSection(
                content='',
                heading=elem.text,
                level=level,
                parent=parent.heading if parent else None,
                children=[],
                metadata={}
            )
            if parent:
                parent.children.append(current_section.heading)
            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += "\n" + elem.text

    return sections

def build_section_graph(sections: List[ReadmeSection]) -> nx.DiGraph:
    G = nx.DiGraph()
    for section in sections:
        G.add_node(section.heading, level=section.level)
        if section.parent:
            G.add_edge(section.parent, section.heading)
    return G

def cluster_sections(sections: List[ReadmeSection], n_clusters: int = 10):
    embeddings = np.array([section.vector for section in sections])
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(embeddings)
    for section, label in zip(sections, cluster_labels):
        section.metadata['cluster'] = int(label)

def add_section_to_qdrant(section: ReadmeSection, section_graph: nx.DiGraph):
    try:
        vector = get_embedding(section.content)
    except Exception as e:
        logger.error(f"Failed to get embedding for section '{section.heading}': {e}")
        return
    
    point_id = str(uuid.uuid4())
    timestamp = time.time()

    centrality = nx.degree_centrality(section_graph).get(section.heading, 0)
    try:
        depth = nx.shortest_path_length(section_graph, source=list(section_graph.nodes)[0], target=section.heading)
    except nx.NetworkXNoPath:
        depth = 0

    payload = {
        "content": section.content,
        "heading": section.heading,
        "level": section.level,
        "parent": section.parent,
        "children": section.children,
        "metadata": {
            **section.metadata,
            "timestamp": timestamp,
            "centrality": centrality,
            "depth": depth,
            "access_count": 0,
            "relevance_score": 1.0
        }
    }

    qdrant_client.upsert(
        collection_name=COLLECTION_NAME,
        points=[PointStruct(id=point_id, vector=vector.tolist(), payload=payload)]
    )
    logger.info(f"Section '{section.heading}' added to Qdrant with ID {point_id}.")

knn_model: Optional[NearestNeighbors] = None
point_id_mapping: Dict[int, str] = {}

def build_knn_index():
    global knn_model, point_id_mapping
    logger.info("Building KNN index...")
    all_points = qdrant_client.scroll(collection_name=COLLECTION_NAME, limit=10000)
    
    if not all_points or not all_points[0]:
        logger.warning("No points found in the collection. KNN index not built.")
        knn_model = None
        point_id_mapping = {}
        return
    
    embeddings = np.array([point.vector for point in all_points[0]])
    
    if embeddings.size == 0:
        logger.warning("Embeddings array is empty. KNN index not built.")
        knn_model = None
        point_id_mapping = {}
        return
    
    knn_model = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
    knn_model.fit(embeddings)
    point_id_mapping = {i: point.id for i, point in enumerate(all_points[0])}
    logger.info(f"KNN index built successfully with {len(point_id_mapping)} points.")

tfidf_vectorizer = TfidfVectorizer()

def calculate_tfidf_similarity(query: str, document: str) -> float:
    tfidf_matrix = tfidf_vectorizer.fit_transform([query, document])
    return (tfidf_matrix * tfidf_matrix.T).A[0, 1]

def prepare_training_data(query: str, sections: List[ReadmeSection]):
    features = []
    labels = []
    for section in sections:
        feature_vector = [
            section.metadata.get('tfidf_similarity', 0.0),
            section.metadata.get('semantic_similarity', 0.0),
            section.metadata.get('centrality', 0.0),
            section.level,
            section.metadata.get('cluster', 0)
        ]
        features.append(feature_vector)
        labels.append(section.metadata.get('relevance_label', 1))  # Placeholder
    return np.array(features), np.array(labels)

xgb_ranker = XGBRanker(
    objective='rank:pairwise',
    learning_rate=0.1,
    max_depth=6,
    n_estimators=100
)

def search_sections(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
    if knn_model is None:
        logger.warning("KNN model is not built. No search can be performed.")
        return []
    
    query_vector = get_embedding(query).reshape(1, -1)
    distances, indices = knn_model.kneighbors(query_vector)
    nearest_points = [point_id_mapping[idx] for idx in indices[0]]
    
    sections = []
    for point_id in nearest_points:
        point = qdrant_client.retrieve(collection_name=COLLECTION_NAME, ids=[point_id])[0]
        section = point.payload
        section['vector'] = point.vector
        tfidf_sim = calculate_tfidf_similarity(query, section['content'])
        section['metadata']['tfidf_similarity'] = tfidf_sim
        semantic_sim = 1 / (1 + distances[0][indices[0].tolist().index(point_id_mapping.index(point_id))])
        section['metadata']['semantic_similarity'] = semantic_sim
        sections.append(section)
    
    if not sections:
        return []
    
    X_test, _ = prepare_training_data(query, sections)
    relevance_scores = xgb_ranker.predict(X_test)
    
    for section, score in zip(sections, relevance_scores):
        section['score'] = score
    sections.sort(key=lambda x: x['score'], reverse=True)
    
    for section in sections[:top_k]:
        update_section_relevance(section['id'], section['score'])
    return sections[:top_k]

def update_section_relevance(point_id: str, score: float):
    current_payload = qdrant_client.retrieve(
        collection_name=COLLECTION_NAME, ids=[point_id]
    )[0].payload
    current_payload['metadata']['access_count'] += 1
    current_payload['metadata']['relevance_score'] = (
        current_payload['metadata']['relevance_score'] + score
    ) / 2

    qdrant_client.upsert(
        collection_name=COLLECTION_NAME,
        points=[PointStruct(id=point_id, payload=current_payload)]
    )
    logger.info(f"Updated relevance for point ID {point_id}.")

def get_context(section_heading: str, depth: int = 1) -> Dict[str, Any]:
    filter_condition = Filter(
        must=[FieldCondition(key="heading", match={'value': section_heading})]
    )
    results = qdrant_client.scroll(
        collection_name=COLLECTION_NAME,
        scroll_filter=filter_condition,
        limit=1
    )
    if not results.points:
        return {}

    section = results.points[0].payload
    context = {
        "current": section,
        "parent": None,
        "children": [],
        "siblings": []
    }

    if section['parent']:
        parent_filter = Filter(
            must=[FieldCondition(key="heading", match={'value': section['parent']})]
        )
        parent_results = qdrant_client.scroll(
            collection_name=COLLECTION_NAME,
            scroll_filter=parent_filter,
            limit=1
        )
        if parent_results.points:
            context["parent"] = parent_results.points[0].payload

    if depth > 0 and 'children' in section:
        for child_heading in section['children']:
            child_context = get_context(child_heading, depth - 1)
            if child_context:
                context["children"].append(child_context["current"])

    if context["parent"] and 'children' in context["parent"]:
        for sibling_heading in context["parent"]["children"]:
            if sibling_heading != section_heading:
                sibling_context = get_context(sibling_heading, 0)
                if sibling_context:
                    context["siblings"].append(sibling_context["current"])

    return context

def prune_sections(threshold: float = 0.5, max_age_days: int = 30):
    current_time = time.time()
    max_age_seconds = max_age_days * 24 * 60 * 60

    filter_condition = Filter(
        must=[
            FieldCondition(
                key="metadata.relevance_score",
                range=Range(lt=threshold)
            ),
            FieldCondition(
                key="metadata.timestamp",
                range=Range(lt=current_time - max_age_seconds)
            )
        ]
    )

    qdrant_client.delete(
        collection_name=COLLECTION_NAME,
        points_selector=filter_condition
    )
    logger.info("Pruned low-relevance and old sections.")

@app.post("/process_readme")
async def process_readme_api(file: UploadFile = File(...)):
    content = await file.read()
    sections = parse_readme(content.decode())
    section_graph = build_section_graph(sections)
    for section in sections:
        section.vector = get_embedding(section.content).tolist()
    cluster_sections(sections)
    for section in sections:
        add_section_to_qdrant(section, section_graph)
    build_knn_index()
    return {"message": "README processed successfully"}

@app.post("/search")
async def search_api(query: str, top_k: int = 5):
    results = search_sections(query, top_k)
    return {"results": results}

@app.get("/context/{section_heading}")
async def get_context_api(section_heading: str, depth: int = 1):
    context = get_context(section_heading, depth)
    return {"context": context}

@app.post("/prune")
async def prune_api(threshold: float = 0.5, max_age_days: int = 30):
    prune_sections(threshold, max_age_days)
    return {"message": "Pruning completed successfully"}

@app.post("/rebuild_knn_index")
async def rebuild_knn_index():
    build_knn_index()
    return {"message": "KNN index rebuilt successfully"}

if __name__ == "__main__":
    import uvicorn
    build_knn_index()  # This will now handle empty collections gracefully
    uvicorn.run(app, host="0.0.0.0", port=8000)


In [4]:
!pip install xgboost




In [8]:
# Install Required Dependencies
!pip install fastapi uvicorn requests numpy qdrant-client markdown beautifulsoup4 scikit-learn xgboost networkx nest_asyncio python-dotenv sentence-transformers

# Comprehensive Implementation in One Code Block

import os
import uuid
import time
import math
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import asyncio
import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, Range
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx
import nest_asyncio
import uvicorn
from threading import Thread
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

# Apply nest_asyncio to allow nested event loops in Jupyter
nest_asyncio.apply()

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant clients for both collections
qdrant_client_readme = QdrantClient(host="localhost", port=6333)
qdrant_client_mind = QdrantClient(host="localhost", port=6333)

# Constants for Readme Sections
COLLECTION_NAME_README = "advanced_readme_sections"
VECTOR_SIZE_README = 768  # Adjust based on your embedding model

# Constants for Memory Manager
COLLECTION_NAME_MIND = "Mind"
VECTOR_SIZE_MIND = 384  # Example size; adjust based on SentenceTransformer model

# Create Readme Sections Collection if it doesn't exist
try:
    qdrant_client_readme.get_collection(COLLECTION_NAME_README)
    logger.info(f"Collection '{COLLECTION_NAME_README}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME_README}'.")
    qdrant_client_readme.create_collection(
        collection_name=COLLECTION_NAME_README,
        vectors_config=VectorParams(size=VECTOR_SIZE_README, distance=Distance.EUCLID)
    )

# Create Mind Collection if it doesn't exist
try:
    qdrant_client_mind.get_collection(COLLECTION_NAME_MIND)
    logger.info(f"Collection '{COLLECTION_NAME_MIND}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME_MIND}'.")
    # Initialize SentenceTransformer for MemoryManager
    memory_model = SentenceTransformer('all-MiniLM-L6-v2')
    VECTOR_SIZE_MIND = memory_model.get_sentence_embedding_dimension()
    qdrant_client_mind.create_collection(
        collection_name=COLLECTION_NAME_MIND,
        vectors_config=VectorParams(size=VECTOR_SIZE_MIND, distance=Distance.COSINE)
    )

# Define Data Models
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: Optional[List[float]] = None

class MemoryPacket(BaseModel):
    vector: List[float]
    content: str
    metadata: Dict[str, Any]

# Define MemoryManager Class
class MemoryManager:
    def __init__(self, qdrant_client: QdrantClient, collection_name: str, model_name: str = 'all-MiniLM-L6-v2'):
        self.qdrant_client = qdrant_client
        self.collection_name = collection_name
        self.model = SentenceTransformer(model_name)
        self._setup_collection()

    def _setup_collection(self):
        try:
            self.qdrant_client.get_collection(self.collection_name)
            logger.info(f"Collection '{self.collection_name}' exists.")
        except Exception:
            logger.info(f"Creating collection '{self.collection_name}'.")
            self.qdrant_client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(size=self.model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
            )

    async def create_memory(self, content: str, metadata: Dict[str, Any]):
        vector = self.model.encode(content).tolist()
        memory_packet = MemoryPacket(vector=vector, content=content, metadata=metadata)
        point_id = str(uuid.uuid4())

        try:
            self.qdrant_client.upsert(
                collection_name=self.collection_name,
                points=[PointStruct(id=point_id, vector=vector, payload=memory_packet.dict())]
            )
            logger.info(f"Memory created successfully with ID: {point_id}")
        except Exception as e:
            logger.error(f"Error creating memory: {e}")

    async def recall_memory(self, query_content: str, top_k: int = 5):
        query_vector = self.model.encode(query_content).tolist()

        try:
            results = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=query_vector,
                limit=top_k
            )

            memories = [MemoryPacket(**hit.payload) for hit in results]

            for memory in memories:
                self._update_relevance(memory, query_vector)

            ranked_memories = sorted(
                memories,
                key=lambda mem: (
                    mem.metadata['semantic_relativity'] * mem.metadata['memetic_similarity'] * mem.metadata['gravitational_pull']
                ),
                reverse=True
            )

            return [{
                "content": memory.content,
                "metadata": memory.metadata
            } for memory in ranked_memories[:top_k]]
        except Exception as e:
            logger.error(f"Error recalling memory: {e}")
            return []

    def _update_relevance(self, memory: MemoryPacket, query_vector: List[float]):
        memory.metadata["semantic_relativity"] = self._calculate_cosine_similarity(memory.vector, query_vector)
        memory.metadata["memetic_similarity"] = self._calculate_memetic_similarity(memory.metadata)
        memory.metadata["gravitational_pull"] = self._calculate_gravitational_pull(memory)
        memory.metadata["spacetime_coordinate"] = self._calculate_spacetime_coordinate(memory)

    @staticmethod
    def _calculate_cosine_similarity(vector_a: List[float], vector_b: List[float]) -> float:
        dot_product = sum(a * b for a, b in zip(vector_a, vector_b))
        magnitude_a = math.sqrt(sum(a ** 2 for a in vector_a))
        magnitude_b = math.sqrt(sum(b ** 2 for b in vector_b))

        if magnitude_a == 0 or magnitude_b == 0:
            return 0.0

        return dot_product / (magnitude_a * magnitude_b)

    @staticmethod
    def _calculate_memetic_similarity(metadata: Dict[str, Any]) -> float:
        tags = set(metadata.get("tags", []))
        reference_tags = set(metadata.get("reference_tags", []))

        if not tags or not reference_tags:
            return 1.0

        intersection = len(tags.intersection(reference_tags))
        union = len(tags.union(reference_tags))

        return intersection / union if union > 0 else 1.0

    @staticmethod
    def _calculate_gravitational_pull(memory: MemoryPacket) -> float:
        vector_magnitude = math.sqrt(sum(x ** 2 for x in memory.vector))
        recall_count = memory.metadata.get("recall_count", 0)
        memetic_similarity = memory.metadata.get("memetic_similarity", 1.0)
        semantic_relativity = memory.metadata.get("semantic_relativity", 1.0)

        return vector_magnitude * (1 + math.log1p(recall_count)) * memetic_similarity * semantic_relativity

    @staticmethod
    def _calculate_spacetime_coordinate(memory: MemoryPacket) -> float:
        time_decay_factor = 1 + (time.time() - memory.metadata.get("timestamp", time.time()))
        return memory.metadata["gravitational_pull"] / time_decay_factor

    async def prune_memories(self, threshold: float = 1e-5, max_age_days: int = 30):
        try:
            current_time = time.time()
            max_age_seconds = max_age_days * 24 * 60 * 60

            filter_condition = Filter(
                must=[
                    FieldCondition(
                        key="metadata.relevance_score",
                        range=Range(lt=threshold)
                    ),
                    FieldCondition(
                        key="metadata.timestamp",
                        range=Range(lt=current_time - max_age_seconds)
                    )
                ]
            )

            self.qdrant_client.delete(
                collection_name=self.collection_name,
                filter=filter_condition
            )
            logger.info("Pruned low-relevance and old memories.")
        except Exception as e:
            logger.error(f"Error pruning memories: {e}")

    async def purge_all_memories(self):
        try:
            self.qdrant_client.delete_collection(self.collection_name)
            self._setup_collection()
            logger.info(f"Purged all memories in the collection '{self.collection_name}'.")
        except Exception as e:
            logger.error(f"Error purging all memories: {e}")
            raise e

    async def recall_memory_with_metadata(self, query_content: str, search_metadata: Dict[str, Any], top_k: int = 10):
        try:
            query_vector = self.model.encode(query_content).tolist()
            results = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=query_vector,
                limit=top_k
            )

            memories = [MemoryPacket(**hit.payload) for hit in results]

            matching_memories = []
            for memory in memories:
                memory_metadata = memory.metadata
                if all(memory_metadata.get(key) == value for key, value in search_metadata.items()):
                    matching_memories.append({
                        "content": memory.content,
                        "metadata": memory_metadata
                    })

            if not matching_memories:
                return {"message": "No matching memories found"}

            return {"memories": matching_memories}
        except Exception as e:
            logger.error(f"Error recalling memories by metadata: {str(e)}")
            return {"message": "Error during memory recall"}

    async def delete_memories_by_metadata(self, metadata: Dict[str, Any]):
        try:
            # Scroll through all memories in the collection
            scroll_result = self.qdrant_client.scroll(self.collection_name, limit=1000)

            memories_to_delete = []
            for point in scroll_result:
                point_metadata = point.payload.get("metadata", {})
                if all(point_metadata.get(key) == value for key, value in metadata.items()):
                    memories_to_delete.append(point.id)

            if memories_to_delete:
                self.qdrant_client.delete(
                    collection_name=self.collection_name,
                    points_selector={"points": memories_to_delete}
                )
                logger.info(f"Deleted {len(memories_to_delete)} memories matching the metadata.")
            else:
                logger.info("No memories found matching the specified metadata.")
        except Exception as e:
            logger.error(f"Error deleting memories by metadata: {str(e)}")

# Initialize MemoryManager for Mind Collection
memory_manager = MemoryManager(
    qdrant_client=qdrant_client_mind,
    collection_name=COLLECTION_NAME_MIND,
    model_name='all-MiniLM-L6-v2'
)

# Utility Functions for Readme Processing

def get_embedding(text: str) -> np.ndarray:
    OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://localhost:11434/api/embeddings")
    try:
        response = requests.post(OLLAMA_API_URL, json={
            "model": "nomic-embed-text",
            "prompt": text
        })
        response.raise_for_status()
        return np.array(response.json()['embedding'])
    except Exception as e:
        logger.error(f"Error fetching embedding: {e}")
        raise

def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            parent = section_stack[-1] if section_stack else None
            current_section = ReadmeSection(
                content='',
                heading=elem.text.strip(),
                level=level,
                parent=parent.heading if parent else None,
                children=[],
                metadata={}
            )
            if parent:
                parent.children.append(current_section.heading)
            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += "\n" + elem.get_text(separator=" ", strip=True)

    return sections

def build_section_graph(sections: List[ReadmeSection]) -> nx.DiGraph:
    G = nx.DiGraph()
    for section in sections:
        G.add_node(section.heading, level=section.level)
        if section.parent:
            G.add_edge(section.parent, section.heading)
    return G

def cluster_sections(sections: List[ReadmeSection], n_clusters: int = 10):
    embeddings = np.array([section.vector for section in sections if section.vector is not None])
    if embeddings.size == 0:
        logger.warning("No embeddings available for clustering.")
        return

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(embeddings)
    for section, label in zip([s for s in sections if s.vector is not None], cluster_labels):
        section.metadata['cluster'] = int(label)

def add_section_to_qdrant(section: ReadmeSection, section_graph: nx.DiGraph):
    if not section.vector:
        logger.error(f"Section '{section.heading}' has no vector.")
        return

    point_id = str(uuid.uuid4())
    timestamp = time.time()

    centrality = nx.degree_centrality(section_graph).get(section.heading, 0)
    try:
        depth = nx.shortest_path_length(section_graph, source=list(section_graph.nodes)[0], target=section.heading)
    except nx.NetworkXNoPath:
        depth = 0

    payload = {
        "content": section.content,
        "heading": section.heading,
        "level": section.level,
        "parent": section.parent,
        "children": section.children,
        "metadata": {
            **section.metadata,
            "timestamp": timestamp,
            "centrality": centrality,
            "depth": depth,
            "access_count": 0,
            "relevance_score": 1.0
        }
    }

    try:
        qdrant_client_readme.upsert(
            collection_name=COLLECTION_NAME_README,
            points=[PointStruct(id=point_id, vector=section.vector, payload=payload)]
        )
        logger.info(f"Section '{section.heading}' added to Qdrant with ID {point_id}.")
    except Exception as e:
        logger.error(f"Failed to upsert section '{section.heading}': {e}")

knn_model_readme: Optional[NearestNeighbors] = None
point_id_mapping_readme: Dict[int, str] = {}

def build_knn_index_readme():
    global knn_model_readme, point_id_mapping_readme
    logger.info("Building KNN index for Readme Sections...")
    try:
        # Scroll retrieves points in batches; adjust batch size as needed
        all_points = []
        scroll_response = qdrant_client_readme.scroll(collection_name=COLLECTION_NAME_README, limit=10000)
        while scroll_response:
            all_points.extend(scroll_response.points)
            if scroll_response.next_page_offset:
                scroll_response = qdrant_client_readme.scroll(collection_name=COLLECTION_NAME_README, limit=10000, offset=scroll_response.next_page_offset)
            else:
                break

        if not all_points:
            logger.warning("No points found in the Readme collection. KNN index not built.")
            knn_model_readme = None
            point_id_mapping_readme = {}
            return

        embeddings = np.array([point.vector for point in all_points])
        if embeddings.size == 0:
            logger.warning("Embeddings array is empty for Readme sections. KNN index not built.")
            knn_model_readme = None
            point_id_mapping_readme = {}
            return

        knn_model_readme = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
        knn_model_readme.fit(embeddings)
        point_id_mapping_readme = {i: point.id for i, point in enumerate(all_points)}
        logger.info(f"KNN index for Readme sections built successfully with {len(point_id_mapping_readme)} points.")
    except Exception as e:
        logger.error(f"Error building KNN index for Readme sections: {e}")
        knn_model_readme = None
        point_id_mapping_readme = {}

tfidf_vectorizer = TfidfVectorizer()

def calculate_tfidf_similarity(query: str, document: str) -> float:
    tfidf_matrix = tfidf_vectorizer.fit_transform([query, document])
    return (tfidf_matrix * tfidf_matrix.T).A[0, 1]

def prepare_training_data(query: str, sections: List[Dict[str, Any]]):
    features = []
    labels = []
    for section in sections:
        feature_vector = [
            section['metadata'].get('tfidf_similarity', 0.0),
            section['metadata'].get('semantic_similarity', 0.0),
            section['metadata'].get('centrality', 0.0),
            section['level'],
            section['metadata'].get('cluster', 0)
        ]
        features.append(feature_vector)
        labels.append(section['metadata'].get('relevance_label', 1))  # Placeholder
    return np.array(features), np.array(labels)

xgb_ranker = XGBRanker(
    objective='rank:pairwise',
    learning_rate=0.1,
    max_depth=6,
    n_estimators=100
)

def train_xgb_ranker():
    try:
        # Placeholder: Implement actual training logic
        # This should be done offline with proper labeled data
        # For demonstration, we'll skip training
        logger.info("Training XGBRanker is not implemented. Using default model.")
    except Exception as e:
        logger.error(f"Error training XGBRanker: {e}")

# Train the ranker (currently a placeholder)
train_xgb_ranker()

def search_sections(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
    if knn_model_readme is None:
        logger.warning("KNN model for Readme sections is not built. No search can be performed.")
        return []

    try:
        query_vector = get_embedding(query).reshape(1, -1)
    except Exception as e:
        logger.error(f"Failed to get embedding for query '{query}': {e}")
        return []

    try:
        distances, indices = knn_model_readme.kneighbors(query_vector)
    except Exception as e:
        logger.error(f"Error during KNN search: {e}")
        return []

    nearest_points = [point_id_mapping_readme[idx] for idx in indices[0]]

    sections = []
    for idx, point_id in enumerate(nearest_points):
        try:
            points = qdrant_client_readme.retrieve(collection_name=COLLECTION_NAME_README, ids=[point_id])
            if not points:
                continue
            point = points[0]
            section = point.payload
            section['vector'] = point.vector.tolist()
            tfidf_sim = calculate_tfidf_similarity(query, section['content'])
            section['metadata']['tfidf_similarity'] = tfidf_sim
            # Use the distance directly
            semantic_sim = 1 / (1 + distances[0][idx])
            section['metadata']['semantic_similarity'] = semantic_sim
            sections.append(section)
        except Exception as e:
            logger.error(f"Error retrieving section '{point_id}': {e}")

    if not sections:
        return []

    X_test, _ = prepare_training_data(query, sections)
    if X_test.size == 0:
        logger.warning("No features available for ranking.")
        return []

    try:
        relevance_scores = xgb_ranker.predict(X_test)
    except Exception as e:
        logger.error(f"Error during ranking: {e}")
        relevance_scores = np.ones(len(sections))  # Fallback

    for section, score in zip(sections, relevance_scores):
        section['score'] = score
    sections.sort(key=lambda x: x['score'], reverse=True)

    for section in sections[:top_k]:
        update_section_relevance(section['id'], section['score'])

    return sections[:top_k]

def update_section_relevance(point_id: str, score: float):
    try:
        points = qdrant_client_readme.retrieve(collection_name=COLLECTION_NAME_README, ids=[point_id])
        if not points:
            logger.warning(f"Point ID '{point_id}' not found for relevance update.")
            return
        current_payload = points[0].payload
        current_payload['metadata']['access_count'] += 1
        current_payload['metadata']['relevance_score'] = (
            current_payload['metadata']['relevance_score'] + score
        ) / 2

        qdrant_client_readme.upsert(
            collection_name=COLLECTION_NAME_README,
            points=[PointStruct(id=point_id, vector=points[0].vector.tolist(), payload=current_payload)]
        )
        logger.info(f"Updated relevance for point ID {point_id}.")
    except Exception as e:
        logger.error(f"Error updating relevance for point ID '{point_id}': {e}")

def get_context(section_heading: str, depth: int = 1) -> Dict[str, Any]:
    try:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match={'value': section_heading})]
        )
        results = qdrant_client_readme.scroll(
            collection_name=COLLECTION_NAME_README,
            filter=filter_condition,
            limit=1
        )
        if not results.points:
            return {}

        section = results.points[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }

        if section.get('parent'):
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match={'value': section['parent']})]
            )
            parent_results = qdrant_client_readme.scroll(
                collection_name=COLLECTION_NAME_README,
                filter=parent_filter,
                limit=1
            )
            if parent_results.points:
                context["parent"] = parent_results.points[0].payload

        if depth > 0 and 'children' in section:
            for child_heading in section['children']:
                child_context = get_context(child_heading, depth - 1)
                if child_context:
                    context["children"].append(child_context["current"])

        if context.get("parent") and 'children' in context["parent"]:
            for sibling_heading in context["parent"]["children"]:
                if sibling_heading != section_heading:
                    sibling_context = get_context(sibling_heading, 0)
                    if sibling_context:
                        context["siblings"].append(sibling_context["current"])

        return context
    except Exception as e:
        logger.error(f"Error getting context for section '{section_heading}': {e}")
        return {}

def prune_sections(threshold: float = 0.5, max_age_days: int = 30):
    try:
        current_time = time.time()
        max_age_seconds = max_age_days * 24 * 60 * 60

        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="metadata.relevance_score",
                    range=Range(lt=threshold)
                ),
                FieldCondition(
                    key="metadata.timestamp",
                    range=Range(lt=current_time - max_age_seconds)
                )
            ]
        )

        qdrant_client_readme.delete(
            collection_name=COLLECTION_NAME_README,
            filter=filter_condition
        )
        logger.info("Pruned low-relevance and old sections.")
    except Exception as e:
        logger.error(f"Error pruning sections: {e}")

# Initialize FastAPI app
app = FastAPI()

# Define Endpoints
@app.post("/process_readme")
async def process_readme_api(file: UploadFile = File(...)):
    try:
        content = await file.read()
        sections = parse_readme(content.decode())
        section_graph = build_section_graph(sections)
        for section in sections:
            section.vector = get_embedding(section.content).tolist()
        cluster_sections(sections)
        for section in sections:
            add_section_to_qdrant(section, section_graph)
        build_knn_index_readme()
        return {"message": "README processed successfully"}
    except Exception as e:
        logger.error(f"Error processing README: {e}")
        raise HTTPException(status_code=500, detail="Failed to process README.")

@app.post("/search")
async def search_api(query: str, top_k: int = 5):
    try:
        results = search_sections(query, top_k)
        return {"results": results}
    except Exception as e:
        logger.error(f"Error during search: {e}")
        raise HTTPException(status_code=500, detail="Search failed.")

@app.get("/context/{section_heading}")
async def get_context_api(section_heading: str, depth: int = 1):
    try:
        context = get_context(section_heading, depth)
        return {"context": context}
    except Exception as e:
        logger.error(f"Error retrieving context: {e}")
        raise HTTPException(status_code=500, detail="Failed to retrieve context.")

@app.post("/prune")
async def prune_api(threshold: float = 0.5, max_age_days: int = 30):
    try:
        prune_sections(threshold, max_age_days)
        return {"message": "Pruning completed successfully"}
    except Exception as e:
        logger.error(f"Error during pruning: {e}")
        raise HTTPException(status_code=500, detail="Pruning failed.")

@app.post("/rebuild_knn_index")
async def rebuild_knn_index_api():
    try:
        build_knn_index_readme()
        return {"message": "KNN index rebuilt successfully"}
    except Exception as e:
        logger.error(f"Error rebuilding KNN index: {e}")
        raise HTTPException(status_code=500, detail="Failed to rebuild KNN index.")

# Function to run Uvicorn server in a separate thread
def run_server():
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(server.serve())

# Start the server in a separate thread
server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print("FastAPI server is running on http://0.0.0.0:8000")

# Example Usage of MemoryManager (Optional)
# You can interact with MemoryManager separately if needed

# Example: Creating a memory
# await memory_manager.create_memory(content="Sample memory content.", metadata={"tags": ["example", "test"], "reference_tags": ["example"]})

# Example: Recalling memories
# memories = await memory_manager.recall_memory(query_content="Sample query.")
# print(memories)

# Example: Pruning memories
# await memory_manager.prune_memories()

# Example: Purging all memories
# await memory_manager.purge_all_memories()

# Example: Recalling memories with metadata
# memories_with_metadata = await memory_manager.recall_memory_with_metadata(query_content="Sample query.", search_metadata={"tags": "example"})
# print(memories_with_metadata)

# Example: Deleting memories by metadata
# await memory_manager.delete_memories_by_metadata(metadata={"tags": "test"})


INFO:httpx:HTTP Request: GET http://localhost:6333/collections/advanced_readme_sections "HTTP/1.1 200 OK"
INFO:__main__:Collection 'advanced_readme_sections' already exists.
INFO:httpx:HTTP Request: GET http://localhost:6333/collections/Mind "HTTP/1.1 200 OK"
INFO:__main__:Collection 'Mind' already exists.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2




INFO:httpx:HTTP Request: GET http://localhost:6333/collections/Mind "HTTP/1.1 200 OK"
INFO:__main__:Collection 'Mind' exists.
INFO:__main__:Training XGBRanker is not implemented. Using default model.


FastAPI server is running on http://0.0.0.0:8000


Exception in thread Thread-21 (run_server):
Traceback (most recent call last):
  File "C:\Users\Dima\anaconda3\Lib\threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "C:\Users\Dima\anaconda3\Lib\threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\Dima\AppData\Local\Temp\ipykernel_3212\121075372.py", line 696, in run_server
  File "C:\Users\Dima\anaconda3\Lib\site-packages\nest_asyncio.py", line 45, in _get_event_loop
    loop = events.get_event_loop_policy().get_event_loop()
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\Dima\anaconda3\Lib\asyncio\events.py", line 677, in get_event_loop
    raise RuntimeError('There is no current event loop in thread %r.'
RuntimeError: There is no current event loop in thread 'Thread-21 (run_server)'.


In [None]:
# Install Required Dependencies

# Comprehensive Implementation in One Code Block

import os
import uuid
import time
import math
import logging
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import asyncio
import requests
import numpy as np
from qdrant_client import QdrantClient
from qdrant_client.http.models import (
    Distance, VectorParams, PointStruct, Filter, FieldCondition, Range
)
from fastapi import FastAPI, HTTPException, UploadFile, File
from pydantic import BaseModel
import markdown
from bs4 import BeautifulSoup
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from xgboost import XGBRanker
import networkx as nx
import nest_asyncio
import uvicorn
from threading import Thread
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime

# Apply nest_asyncio to allow nested event loops in Jupyter
nest_asyncio.apply()

# Initialize logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize Qdrant clients for both collections
qdrant_client_readme = QdrantClient(host="localhost", port=6333)
qdrant_client_mind = QdrantClient(host="localhost", port=6333)

# Constants for Readme Sections
COLLECTION_NAME_README = "advanced_readme_sections"
VECTOR_SIZE_README = 768  # Adjust based on your embedding model

# Constants for Memory Manager
COLLECTION_NAME_MIND = "Mind"
VECTOR_SIZE_MIND = 384  # Example size; will be updated based on model

# Create Readme Sections Collection if it doesn't exist
try:
    qdrant_client_readme.get_collection(COLLECTION_NAME_README)
    logger.info(f"Collection '{COLLECTION_NAME_README}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME_README}'.")
    qdrant_client_readme.create_collection(
        collection_name=COLLECTION_NAME_README,
        vectors_config=VectorParams(size=VECTOR_SIZE_README, distance=Distance.EUCLID)
    )

# Create Mind Collection if it doesn't exist
try:
    qdrant_client_mind.get_collection(COLLECTION_NAME_MIND)
    logger.info(f"Collection '{COLLECTION_NAME_MIND}' already exists.")
except Exception:
    logger.info(f"Creating collection '{COLLECTION_NAME_MIND}'.")
    # Initialize SentenceTransformer for MemoryManager
    memory_model = SentenceTransformer('all-MiniLM-L6-v2')
    VECTOR_SIZE_MIND = memory_model.get_sentence_embedding_dimension()
    qdrant_client_mind.create_collection(
        collection_name=COLLECTION_NAME_MIND,
        vectors_config=VectorParams(size=VECTOR_SIZE_MIND, distance=Distance.COSINE)
    )

# Define Data Models
@dataclass
class ReadmeSection:
    content: str
    heading: str
    level: int
    parent: Optional[str]
    children: List[str]
    metadata: Dict[str, Any]
    vector: Optional[List[float]] = None

class MemoryPacket(BaseModel):
    vector: List[float]
    content: str
    metadata: Dict[str, Any]

# Define MemoryManager Class
class MemoryManager:
    def __init__(self, qdrant_client: QdrantClient, collection_name: str, model_name: str = 'all-MiniLM-L6-v2'):
        self.qdrant_client = qdrant_client
        self.collection_name = collection_name
        self.model = SentenceTransformer(model_name)
        self._setup_collection()

    def _setup_collection(self):
        try:
            self.qdrant_client.get_collection(self.collection_name)
            logger.info(f"Collection '{self.collection_name}' exists.")
        except Exception:
            logger.info(f"Creating collection '{self.collection_name}'.")
            self.qdrant_client.create_collection(
                collection_name=self.collection_name,
                vectors_config=VectorParams(size=self.model.get_sentence_embedding_dimension(), distance=Distance.COSINE)
            )

    async def create_memory(self, content: str, metadata: Dict[str, Any]):
        vector = self.model.encode(content).tolist()
        memory_packet = MemoryPacket(vector=vector, content=content, metadata=metadata)
        point_id = str(uuid.uuid4())

        try:
            self.qdrant_client.upsert(
                collection_name=self.collection_name,
                points=[PointStruct(id=point_id, vector=vector, payload=memory_packet.dict())]
            )
            logger.info(f"Memory created successfully with ID: {point_id}")
        except Exception as e:
            logger.error(f"Error creating memory: {e}")

    async def recall_memory(self, query_content: str, top_k: int = 5):
        query_vector = self.model.encode(query_content).tolist()

        try:
            results = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=query_vector,
                limit=top_k
            )

            memories = [MemoryPacket(**hit.payload) for hit in results]

            for memory in memories:
                self._update_relevance(memory, query_vector)

            ranked_memories = sorted(
                memories,
                key=lambda mem: (
                    mem.metadata['semantic_relativity'] * mem.metadata['memetic_similarity'] * mem.metadata['gravitational_pull']
                ),
                reverse=True
            )

            return [{
                "content": memory.content,
                "metadata": memory.metadata
            } for memory in ranked_memories[:top_k]]
        except Exception as e:
            logger.error(f"Error recalling memory: {e}")
            return []

    def _update_relevance(self, memory: MemoryPacket, query_vector: List[float]):
        memory.metadata["semantic_relativity"] = self._calculate_cosine_similarity(memory.vector, query_vector)
        memory.metadata["memetic_similarity"] = self._calculate_memetic_similarity(memory.metadata)
        memory.metadata["gravitational_pull"] = self._calculate_gravitational_pull(memory)
        memory.metadata["spacetime_coordinate"] = self._calculate_spacetime_coordinate(memory)

    @staticmethod
    def _calculate_cosine_similarity(vector_a: List[float], vector_b: List[float]) -> float:
        dot_product = sum(a * b for a, b in zip(vector_a, vector_b))
        magnitude_a = math.sqrt(sum(a ** 2 for a in vector_a))
        magnitude_b = math.sqrt(sum(b ** 2 for b in vector_b))

        if magnitude_a == 0 or magnitude_b == 0:
            return 0.0

        return dot_product / (magnitude_a * magnitude_b)

    @staticmethod
    def _calculate_memetic_similarity(metadata: Dict[str, Any]) -> float:
        tags = set(metadata.get("tags", []))
        reference_tags = set(metadata.get("reference_tags", []))

        if not tags or not reference_tags:
            return 1.0

        intersection = len(tags.intersection(reference_tags))
        union = len(tags.union(reference_tags))

        return intersection / union if union > 0 else 1.0

    @staticmethod
    def _calculate_gravitational_pull(memory: MemoryPacket) -> float:
        vector_magnitude = math.sqrt(sum(x ** 2 for x in memory.vector))
        recall_count = memory.metadata.get("recall_count", 0)
        memetic_similarity = memory.metadata.get("memetic_similarity", 1.0)
        semantic_relativity = memory.metadata.get("semantic_relativity", 1.0)

        return vector_magnitude * (1 + math.log1p(recall_count)) * memetic_similarity * semantic_relativity

    @staticmethod
    def _calculate_spacetime_coordinate(memory: MemoryPacket) -> float:
        time_decay_factor = 1 + (time.time() - memory.metadata.get("timestamp", time.time()))
        return memory.metadata["gravitational_pull"] / time_decay_factor

    async def prune_memories(self, threshold: float = 1e-5, max_age_days: int = 30):
        try:
            current_time = time.time()
            max_age_seconds = max_age_days * 24 * 60 * 60

            filter_condition = Filter(
                must=[
                    FieldCondition(
                        key="metadata.relevance_score",
                        range=Range(lt=threshold)
                    ),
                    FieldCondition(
                        key="metadata.timestamp",
                        range=Range(lt=current_time - max_age_seconds)
                    )
                ]
            )

            self.qdrant_client.delete(
                collection_name=self.collection_name,
                filter=filter_condition
            )
            logger.info("Pruned low-relevance and old memories.")
        except Exception as e:
            logger.error(f"Error pruning memories: {e}")

    async def purge_all_memories(self):
        try:
            self.qdrant_client.delete_collection(self.collection_name)
            self._setup_collection()
            logger.info(f"Purged all memories in the collection '{self.collection_name}'.")
        except Exception as e:
            logger.error(f"Error purging all memories: {e}")
            raise e

    async def recall_memory_with_metadata(self, query_content: str, search_metadata: Dict[str, Any], top_k: int = 10):
        try:
            query_vector = self.model.encode(query_content).tolist()
            results = self.qdrant_client.search(
                collection_name=self.collection_name,
                query_vector=query_vector,
                limit=top_k
            )

            memories = [MemoryPacket(**hit.payload) for hit in results]

            matching_memories = []
            for memory in memories:
                memory_metadata = memory.metadata
                if all(memory_metadata.get(key) == value for key, value in search_metadata.items()):
                    matching_memories.append({
                        "content": memory.content,
                        "metadata": memory_metadata
                    })

            if not matching_memories:
                return {"message": "No matching memories found"}

            return {"memories": matching_memories}
        except Exception as e:
            logger.error(f"Error recalling memories by metadata: {str(e)}")
            return {"message": "Error during memory recall"}

    async def delete_memories_by_metadata(self, metadata: Dict[str, Any]):
        try:
            # Scroll through all memories in the collection
            scroll_result = self.qdrant_client.scroll(self.collection_name, limit=1000)

            memories_to_delete = []
            for point in scroll_result:
                point_metadata = point.payload.get("metadata", {})
                if all(point_metadata.get(key) == value for key, value in metadata.items()):
                    memories_to_delete.append(point.id)

            if memories_to_delete:
                self.qdrant_client.delete(
                    collection_name=self.collection_name,
                    points_selector={"points": memories_to_delete}
                )
                logger.info(f"Deleted {len(memories_to_delete)} memories matching the metadata.")
            else:
                logger.info("No memories found matching the specified metadata.")
        except Exception as e:
            logger.error(f"Error deleting memories by metadata: {str(e)}")

# Initialize MemoryManager for Mind Collection
memory_manager = MemoryManager(
    qdrant_client=qdrant_client_mind,
    collection_name=COLLECTION_NAME_MIND,
    model_name='all-MiniLM-L6-v2'
)

# Utility Functions for Readme Processing

def get_embedding(text: str) -> np.ndarray:
    OLLAMA_API_URL = os.getenv("OLLAMA_API_URL", "http://localhost:11434/api/embeddings")
    try:
        response = requests.post(OLLAMA_API_URL, json={
            "model": "nomic-embed-text",
            "prompt": text
        })
        response.raise_for_status()
        return np.array(response.json()['embedding'])
    except Exception as e:
        logger.error(f"Error fetching embedding: {e}")
        raise

def parse_readme(content: str) -> List[ReadmeSection]:
    html = markdown.markdown(content)
    soup = BeautifulSoup(html, 'html.parser')
    sections = []
    section_stack = []
    current_section = None

    for elem in soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'pre', 'ul', 'ol']):
        if elem.name.startswith('h'):
            level = int(elem.name[1])
            while section_stack and section_stack[-1].level >= level:
                section_stack.pop()

            parent = section_stack[-1] if section_stack else None
            current_section = ReadmeSection(
                content='',
                heading=elem.text.strip(),
                level=level,
                parent=parent.heading if parent else None,
                children=[],
                metadata={}
            )
            if parent:
                parent.children.append(current_section.heading)
            sections.append(current_section)
            section_stack.append(current_section)
        else:
            if current_section:
                current_section.content += "\n" + elem.get_text(separator=" ", strip=True)

    return sections

def build_section_graph(sections: List[ReadmeSection]) -> nx.DiGraph:
    G = nx.DiGraph()
    for section in sections:
        G.add_node(section.heading, level=section.level)
        if section.parent:
            G.add_edge(section.parent, section.heading)
    return G

def cluster_sections(sections: List[ReadmeSection], n_clusters: int = 10):
    embeddings = np.array([section.vector for section in sections if section.vector is not None])
    if embeddings.size == 0:
        logger.warning("No embeddings available for clustering.")
        return

    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    cluster_labels = kmeans.fit_predict(embeddings)
    for section, label in zip([s for s in sections if s.vector is not None], cluster_labels):
        section.metadata['cluster'] = int(label)

def add_section_to_qdrant(section: ReadmeSection, section_graph: nx.DiGraph):
    if not section.vector:
        logger.error(f"Section '{section.heading}' has no vector.")
        return

    point_id = str(uuid.uuid4())
    timestamp = time.time()

    centrality = nx.degree_centrality(section_graph).get(section.heading, 0)
    try:
        depth = nx.shortest_path_length(section_graph, source=list(section_graph.nodes)[0], target=section.heading)
    except nx.NetworkXNoPath:
        depth = 0

    payload = {
        "content": section.content,
        "heading": section.heading,
        "level": section.level,
        "parent": section.parent,
        "children": section.children,
        "metadata": {
            **section.metadata,
            "timestamp": timestamp,
            "centrality": centrality,
            "depth": depth,
            "access_count": 0,
            "relevance_score": 1.0
        }
    }

    try:
        qdrant_client_readme.upsert(
            collection_name=COLLECTION_NAME_README,
            points=[PointStruct(id=point_id, vector=section.vector, payload=payload)]
        )
        logger.info(f"Section '{section.heading}' added to Qdrant with ID {point_id}.")
    except Exception as e:
        logger.error(f"Failed to upsert section '{section.heading}': {e}")

knn_model_readme: Optional[NearestNeighbors] = None
point_id_mapping_readme: Dict[int, str] = {}

def build_knn_index_readme():
    global knn_model_readme, point_id_mapping_readme
    logger.info("Building KNN index for Readme Sections...")
    try:
        # Scroll retrieves points in batches; adjust batch size as needed
        all_points = []
        scroll_response = qdrant_client_readme.scroll(collection_name=COLLECTION_NAME_README, limit=10000)
        while scroll_response:
            all_points.extend(scroll_response.points)
            if scroll_response.next_page_offset:
                scroll_response = qdrant_client_readme.scroll(
                    collection_name=COLLECTION_NAME_README,
                    limit=10000,
                    offset=scroll_response.next_page_offset
                )
            else:
                break

        if not all_points:
            logger.warning("No points found in the Readme collection. KNN index not built.")
            knn_model_readme = None
            point_id_mapping_readme = {}
            return

        embeddings = np.array([point.vector for point in all_points])
        if embeddings.size == 0:
            logger.warning("Embeddings array is empty for Readme sections. KNN index not built.")
            knn_model_readme = None
            point_id_mapping_readme = {}
            return

        knn_model_readme = NearestNeighbors(n_neighbors=10, algorithm='auto', metric='euclidean')
        knn_model_readme.fit(embeddings)
        point_id_mapping_readme = {i: point.id for i, point in enumerate(all_points)}
        logger.info(f"KNN index for Readme sections built successfully with {len(point_id_mapping_readme)} points.")
    except Exception as e:
        logger.error(f"Error building KNN index for Readme sections: {e}")
        knn_model_readme = None
        point_id_mapping_readme = {}

tfidf_vectorizer = TfidfVectorizer()

def calculate_tfidf_similarity(query: str, document: str) -> float:
    tfidf_matrix = tfidf_vectorizer.fit_transform([query, document])
    return (tfidf_matrix * tfidf_matrix.T).A[0, 1]

def prepare_training_data(query: str, sections: List[Dict[str, Any]]):
    features = []
    labels = []
    for section in sections:
        feature_vector = [
            section['metadata'].get('tfidf_similarity', 0.0),
            section['metadata'].get('semantic_similarity', 0.0),
            section['metadata'].get('centrality', 0.0),
            section['level'],
            section['metadata'].get('cluster', 0)
        ]
        features.append(feature_vector)
        labels.append(section['metadata'].get('relevance_label', 1))  # Placeholder
    return np.array(features), np.array(labels)

xgb_ranker = XGBRanker(
    objective='rank:pairwise',
    learning_rate=0.1,
    max_depth=6,
    n_estimators=100
)

def train_xgb_ranker():
    try:
        # Placeholder: Implement actual training logic
        # This should be done offline with proper labeled data
        # For demonstration, we'll skip training
        logger.info("Training XGBRanker is not implemented. Using default model.")
    except Exception as e:
        logger.error(f"Error training XGBRanker: {e}")

# Train the ranker (currently a placeholder)
train_xgb_ranker()

def search_sections(query: str, top_k: int = 5) -> List[Dict[str, Any]]:
    if knn_model_readme is None:
        logger.warning("KNN model for Readme sections is not built. No search can be performed.")
        return []

    try:
        query_vector = get_embedding(query).reshape(1, -1)
    except Exception as e:
        logger.error(f"Failed to get embedding for query '{query}': {e}")
        return []

    try:
        distances, indices = knn_model_readme.kneighbors(query_vector)
    except Exception as e:
        logger.error(f"Error during KNN search: {e}")
        return []

    nearest_points = [point_id_mapping_readme[idx] for idx in indices[0]]

    sections = []
    for idx, point_id in enumerate(nearest_points):
        try:
            points = qdrant_client_readme.retrieve(collection_name=COLLECTION_NAME_README, ids=[point_id])
            if not points:
                continue
            point = points[0]
            section = point.payload
            section['vector'] = point.vector.tolist()
            tfidf_sim = calculate_tfidf_similarity(query, section['content'])
            section['metadata']['tfidf_similarity'] = tfidf_sim
            # Use the distance directly
            semantic_sim = 1 / (1 + distances[0][idx])
            section['metadata']['semantic_similarity'] = semantic_sim
            sections.append(section)
        except Exception as e:
            logger.error(f"Error retrieving section '{point_id}': {e}")

    if not sections:
        return []

    X_test, _ = prepare_training_data(query, sections)
    if X_test.size == 0:
        logger.warning("No features available for ranking.")
        return []

    try:
        relevance_scores = xgb_ranker.predict(X_test)
    except Exception as e:
        logger.error(f"Error during ranking: {e}")
        relevance_scores = np.ones(len(sections))  # Fallback

    for section, score in zip(sections, relevance_scores):
        section['score'] = score
    sections.sort(key=lambda x: x['score'], reverse=True)

    for section in sections[:top_k]:
        update_section_relevance(section['id'], section['score'])

    return sections[:top_k]

def update_section_relevance(point_id: str, score: float):
    try:
        points = qdrant_client_readme.retrieve(collection_name=COLLECTION_NAME_README, ids=[point_id])
        if not points:
            logger.warning(f"Point ID '{point_id}' not found for relevance update.")
            return
        current_payload = points[0].payload
        current_payload['metadata']['access_count'] += 1
        current_payload['metadata']['relevance_score'] = (
            current_payload['metadata']['relevance_score'] + score
        ) / 2

        qdrant_client_readme.upsert(
            collection_name=COLLECTION_NAME_README,
            points=[PointStruct(id=point_id, vector=points[0].vector.tolist(), payload=current_payload)]
        )
        logger.info(f"Updated relevance for point ID {point_id}.")
    except Exception as e:
        logger.error(f"Error updating relevance for point ID '{point_id}': {e}")

def get_context(section_heading: str, depth: int = 1) -> Dict[str, Any]:
    try:
        filter_condition = Filter(
            must=[FieldCondition(key="heading", match={'value': section_heading})]
        )
        results = qdrant_client_readme.scroll(
            collection_name=COLLECTION_NAME_README,
            filter=filter_condition,
            limit=1
        )
        if not results.points:
            return {}

        section = results.points[0].payload
        context = {
            "current": section,
            "parent": None,
            "children": [],
            "siblings": []
        }

        if section.get('parent'):
            parent_filter = Filter(
                must=[FieldCondition(key="heading", match={'value': section['parent']})]
            )
            parent_results = qdrant_client_readme.scroll(
                collection_name=COLLECTION_NAME_README,
                filter=parent_filter,
                limit=1
            )
            if parent_results.points:
                context["parent"] = parent_results.points[0].payload

        if depth > 0 and 'children' in section:
            for child_heading in section['children']:
                child_context = get_context(child_heading, depth - 1)
                if child_context:
                    context["children"].append(child_context["current"])

        if context.get("parent") and 'children' in context["parent"]:
            for sibling_heading in context["parent"]["children"]:
                if sibling_heading != section_heading:
                    sibling_context = get_context(sibling_heading, 0)
                    if sibling_context:
                        context["siblings"].append(sibling_context["current"])

        return context
    except Exception as e:
        logger.error(f"Error getting context for section '{section_heading}': {e}")
        return {}

def prune_sections(threshold: float = 0.5, max_age_days: int = 30):
    try:
        current_time = time.time()
        max_age_seconds = max_age_days * 24 * 60 * 60

        filter_condition = Filter(
            must=[
                FieldCondition(
                    key="metadata.relevance_score",
                    range=Range(lt=threshold)
                ),
                FieldCondition(
                    key="metadata.timestamp",
                    range=Range(lt=current_time - max_age_seconds)
                )
            ]
        )

        qdrant_client_readme.delete(
            collection_name=COLLECTION_NAME_README,
            filter=filter_condition
        )
        logger.info("Pruned low-relevance and old sections.")
    except Exception as e:
        logger.error(f"Error pruning sections: {e}")

# Initialize FastAPI app
app = FastAPI()

# Define Endpoints
@app.post("/process_readme")
async def process_readme_api(file: UploadFile = File(...)):
    try:
        content = await file.read()
        sections = parse_readme(content.decode())
        section_graph = build_section_graph(sections)
        for section in sections:
            section.vector = get_embedding(section.content).tolist()
        cluster_sections(sections)
        for section in sections:
            add_section_to_qdrant(section, section_graph)
        build_knn_index_readme()
        return {"message": "README processed successfully"}
    except Exception as e:
        logger.error(f"Error processing README: {e}")
        raise HTTPException(status_code=500, detail="Failed to process README.")

@app.post("/search")
async def search_api(query: str, top_k: int = 5):
    try:
        results = search_sections(query, top_k)
        return {"results": results}
    except Exception as e:
        logger.error(f"Error during search: {e}")
        raise HTTPException(status_code=500, detail="Search failed.")

@app.get("/context/{section_heading}")
async def get_context_api(section_heading: str, depth: int = 1):
    try:
        context = get_context(section_heading, depth)
        return {"context": context}
    except Exception as e:
        logger.error(f"Error retrieving context: {e}")
        raise HTTPException(status_code=500, detail="Failed to retrieve context.")

@app.post("/prune")
async def prune_api(threshold: float = 0.5, max_age_days: int = 30):
    try:
        prune_sections(threshold, max_age_days)
        return {"message": "Pruning completed successfully"}
    except Exception as e:
        logger.error(f"Error during pruning: {e}")
        raise HTTPException(status_code=500, detail="Pruning failed.")

@app.post("/rebuild_knn_index")
async def rebuild_knn_index_api():
    try:
        build_knn_index_readme()
        return {"message": "KNN index rebuilt successfully"}
    except Exception as e:
        logger.error(f"Error rebuilding KNN index: {e}")
        raise HTTPException(status_code=500, detail="Failed to rebuild KNN index.")

# Function to run Uvicorn server in a separate thread
def run_server():
    config = uvicorn.Config(app, host="0.0.0.0", port=8000, log_level="info")
    server = uvicorn.Server(config)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(server.serve())

# Start the server in a separate thread
server_thread = Thread(target=run_server, daemon=True)
server_thread.start()

print("FastAPI server is running on http://0.0.0.0:8000")

# Example Usage of MemoryManager (Optional)
# You can interact with MemoryManager separately if needed

# Example: Creating a memory
# await memory_manager.create_memory(content="Sample memory content.", metadata={"tags": ["example", "test"], "reference_tags": ["example"]})

# Example: Recalling memories
# memories = await memory_manager.recall_memory(query_content="Sample query.")
# print(memories)

# Example: Pruning memories
# await memory_manager.prune_memories()

# Example: Purging all memories
# await memory_manager.purge_all_memories()

# Example: Recalling memories with metadata
# memories_with_metadata = await memory_manager.recall_memory_with_metadata(query_content="Sample query.", search_metadata={"tags": "example"})
# print(memories_with_metadata)

# Example: Deleting memories by metadata
# await memory_manager.delete_memories_by_metadata(metadata={"tags": "test"})

In [None]:
#This will hit groq and retrieve a JSON object:

In [None]:
!pip install groq

# Tests:


- this is a test MD and this code will generate metadata and a title that you can then feed into the Gravrag:

In [57]:
baseUrl = "http://localhost:8000"

In [52]:
#!pip install groq
from groq import Groq
import json

#following metadata is required:
#abstitle
#summary
#keywords
#created_at

import re

def extract_metadata(response):
    # Try to isolate the JSON part of the response
    try:
        # Use regex to find the JSON block between triple backticks
        json_match = re.search(r'```(.*?)```', response, re.DOTALL)

        if json_match:
            json_part = json_match.group(1).strip()  # Extract the JSON string
        else:
            raise ValueError("Could not find the JSON block in the response.")

        # Print the extracted JSON part for debugging
        print("Extracted JSON part:", json_part)

        # Parse the JSON string to a Python dictionary
        metadata = json.loads(json_part)

        # Extract the title
        title = metadata["title"]

        # Return title as a string and metadata as a JSON object (dictionary)
        return title, metadata

    except json.JSONDecodeError as e:
        print("Error decoding JSON:", e)
        return "Error: Invalid JSON response.", None
    except ValueError as e:
        print("Error:", e)
        return "Error: Could not extract metadata.", None
    except Exception as e:
        print("An unexpected error occurred:", e)
        return "Error: Could not extract metadata.", None
    

# Initialize client and pass the API key directly
client = Groq(api_key="wedontcommitapikeys;)")

System_prompt = """You are Groq, an advanced AI capable of analyzing Markdown (MD) text inputs and generating creative outputs. When you receive an MD file, your task is to extract key themes, ideas, and insights to create a unique title that encapsulates the essence of the content.

Based on the analysis of the MD text, generate a metadata JSON object containing:

- **title**: A concise, engaging title that reflects the main topic or idea of the MD content.
- **summary**: A brief summary (1-2 sentences) highlighting the core message or purpose of the document.
- **keywords**: An array of relevant keywords or phrases derived from the MD content that represent its key concepts.
- **created_at**: A timestamp indicating when the metadata was generated.

Make your title catchy and informative, and ensure that the metadata accurately represents the content of the MD file.

Here’s an example of the expected output format:
{
    "title": "Engaging Title Here",
    "metadata = {
    "summary": "This document discusses key insights into ...",
    "keywords": "keyword1", "keyword2", "keyword3",
    "created_at": "2024-10-07T12:34:56Z"}
}
"""

User_prompt = """
1. [Overview](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
2. [Backend Components](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [Schemas](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [User Schema](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Project Schema](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Task Schema](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Submission Schema](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Comment Schema](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Notification Schema](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Email Schema](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Leaderboard Schema](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [Server Setup](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Imports and Dependencies](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Express Application Configuration](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Middleware Configuration](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [File Upload Handling](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Authentication Middleware](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Request Logging](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [Utility Functions](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [createNewTask Function](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Authentication Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [User Registration](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [User Login](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Promote User to Admin](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Get Current User Profile](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Task Management Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Create New Task](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Update Task](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Get All Tasks for User](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Delete Task](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Submission Management Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Get Submissions by User](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Project Management Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Create New Project](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Get All Projects](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Get Specific Project by ID](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Update Project](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Delete Project](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Comment Management Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Add a Comment to a Task](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Get Comments for a Task](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Admin Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Get All Users](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Update a User](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Delete a User](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Admin Overview](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Admin Task Status](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Admin User Activity](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Admin Analytics Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
                - [User Activity Analytics](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
                - [Task Completion Analytics](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Admin Task Assignment](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Miscellaneous Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [File Upload Endpoint](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Processor Endpoint](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Frontend Routes](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
3. [Frontend Components](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [HTML Structure](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [CSS and Styling](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [JavaScript Functionality](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Background Effects](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Navigation Bar](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Main Content](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Initial View](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Pitch Form](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Pitch Deck Slides](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
        - [Interactive Features](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Star Generation](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Pitch Form Handling](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Pitch Deck Generation](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Slide Navigation](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Tooltips](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Invest Modal](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
            - [Keyboard and Swipe Navigation](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
4. [Security Considerations](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [Authentication and Authorization](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [Data Sanitization](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
    - [File Handling Security](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
5. [Deployment and Environment Configuration](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
6. [Conclusion](https://www.notion.so/Comprehensive-description-of-groqy-backend-116546104b0e8014a407e02c3450db1a?pvs=21)
"""

chat_completion = client.chat.completions.create(
    messages=[
        {
            "role": "system",
            "content": f"{System_prompt}"
        },
        {
            "role": "user",
            "content": f"{User_prompt}"
        }
    ],
    model="llama3-8b-8192",
    temperature=0.5,
    max_tokens=1024,
    top_p=1,
    stop=None,
    stream=False,
)

# Print the response
result = (chat_completion.choices[0].message.content)

titleT, metadataT = extract_metadata(result)
print(title)
print(metadata)
    



INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Extracted JSON part: {
    "title": "Comprehensive Description of Groqy Backend",
    "summary": "This document provides a detailed overview of the Groqy backend, including its components, setup, utility functions, routes, frontend components, and security considerations.",
    "keywords": ["Groqy", "backend", "components", "setup", "utility functions", "routes", "frontend components", "security considerations"],
    "created_at": "2024-10-07T12:34:56Z"
}
Comprehensive Description of Groqy Backend
{'objective_id': 'project_x', 'user_id': 'user_123'}


### This will create memory inside the RAG using Query:

- recall using query when you dont know the specific metadata - e.g. like in SQL if you didnt know for some reason your table name

In [59]:
import requests
import json

def send_request(content=User_prompt):
    url = f"{baseUrl}/gravrag/create_memory"
    # Define headers
    headers = {
        "Content-Type": "application/json"
    }
    # Define the body
    body = {
        "content": content,
    }
    # Send the POST request
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    
    except requests.RequestException as e:
        return {"error": "Request failed", "details": str(e)}
    
send_request("Another test")

{'message': 'Memory created successfully'}

### This will create memory inside the RAG using metadata:
- recall using query when you know the specific metadata - e.g. like in SQL if you DO know the table name

In [61]:
import requests
import json
baseUrl = "http://localhost:8000"
def send_request_with_metadata(title, metadata):
    url = f"{baseUrl}/gravrag/create_memory"
    # Define headers
    headers = {
        "Content-Type": "application/json"
    }
    # Define the body
    body = {
        "content": title,
        "metadata": metadata
    }
    # Send the POST request
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    
    except requests.RequestException as e:
        return {"error": "Request failed", "details": str(e)}
    

#send_request_with_metadata(title,metadata)

{'message': 'Memory created successfully'}

### This will Recall memory using Just the Query (Recall Memory Based on Semantic Search):

In [62]:
import requests
import json

def recall_memory(query, topK):
    url = f"{baseUrl}/gravrag/recall_memory"
    
    # Define headers
    headers = {
        "Content-Type": "application/json"
    }
    
    # Define the body
    body = {
        "query": query,
        "top_k": topK
    }
    
    # Send the POST request
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    
    except requests.RequestException as e:
        return {"error": "Request failed", "details": str(e)}

response = recall_memory("Some query", topK=5)
print(response)


{'memories': [{'content': 'Some query', 'metadata': {'objective_id': 'project_x', 'user_id': 'user_123', 'timestamp': 1728271252.7177944, 'recall_count': 0, 'memetic_similarity': 1.0, 'semantic_relativity': 1.0, 'gravitational_pull': 0.9999999906888026, 'spacetime_coordinate': 0.07861617004577379}}, {'content': 'Some query', 'metadata': {'objective_id': 'project_x', 'user_id': 'user_123', 'timestamp': 1728267041.1143515, 'recall_count': 0, 'memetic_similarity': 1.0, 'semantic_relativity': 1.0, 'gravitational_pull': 0.9999999906888026, 'spacetime_coordinate': 0.0002367242378170876}}, {'content': 'Some query', 'metadata': {'objective_id': 'project_x', 'user_id': 'user_123', 'timestamp': 1728269247.933541, 'recall_count': 0, 'memetic_similarity': 1.0, 'semantic_relativity': 1.0, 'gravitational_pull': 0.9999999906888026, 'spacetime_coordinate': 0.0004956616470272158}}, {'content': 'Another test', 'metadata': {'timestamp': 1728271231.9067838, 'recall_count': 0, 'memetic_similarity': 1.0, 's

### This will Recall memory using Just the Metadata (Recall Memory with Metadata Matching):

- for this to work you need to match both the QUERY and the METADATA EXACTLY!!!

In [63]:
import requests
import json

def recall_memory_with_metadata(query, metadata, top_k):
    url = f"{baseUrl}/gravrag/recall_with_metadata"
    
    # Define headers
    headers = {
        "Content-Type": "application/json"
    }
    
    # Define the body, including top_k
    body = {
        "query": query,
        "metadata": metadata,
        "top_k": top_k  # Include top_k in the body
    }
    
    # Send the POST request
    try:
        response = requests.post(
            url,
            headers=headers,
            data=json.dumps(body)  # Convert the body to JSON format
        )
        
        # Check if the request was successful
        if response.status_code == 200:
            return response.json()  # Return the response as JSON
        else:
            return {"error": f"Request failed with status code {response.status_code}", "details": response.text}
    
    except requests.RequestException as e:
        return {"error": "Request failed", "details": str(e)}

# Example usage
metadata = {
    "objective_id": "project_x",
    "user_id": "user_123"
}
top_k = 1

response = recall_memory_with_metadata("Some query", metadata, top_k)
print(response)


{'memories': [{'content': 'Some query', 'metadata': {'objective_id': 'project_x', 'user_id': 'user_123', 'timestamp': 1728267041.1143515, 'recall_count': 0, 'memetic_similarity': 1.0, 'semantic_relativity': 1.0, 'gravitational_pull': 0.9999999906888026, 'spacetime_coordinate': 0.00023622207143356419}}]}
